diff --git a/doc/benchmark.md b/doc/benchmark.md index 0f624c3..455c12e 100644 --- a/doc/benchmark.md +++ b/doc/benchmark.md @@ -1,8 +1,11 @@ # Benchmarks -Hardware: Intel(R) Xeon(R) CPU E3-1225 V2 @ 3.20GHz -Software: Windows 10, MSVC 2017, MinGW GCC 7.2.0 -Time unit: milliseconds (unless explicitly specified) +Hardware: HP laptop, Intel(R) Core(TM) i5-8300H CPU @ 2.30GHz, 16 GB RAM +Software: Windows 10, MinGW GCC 11.3.0, MSVC 2022 +Time unit: milliseconds (unless explicitly specified) + +Unless it's specified, the default compiler is GCC. +The hardware used for benchmark is pretty medium to low end at the time of benchmarking (December 2023). ## EventQueue enqueue and process -- single threading @@ -22,8 +25,8 @@ Time unit: milliseconds (unless explicitly specified) 10M 100 100 - 401 - 1146 + 289 + 939 100k @@ -31,8 +34,8 @@ Time unit: milliseconds (unless explicitly specified) 100M 100 100 - 4012 - 11467 + 2822 + 9328 100k @@ -40,8 +43,8 @@ Time unit: milliseconds (unless explicitly specified) 100M 1000 1000 - 4102 - 11600 + 2923 + 9502 @@ -68,7 +71,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - + @@ -77,7 +80,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - + @@ -87,7 +90,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - + @@ -96,7 +99,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - + @@ -106,7 +109,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - + @@ -115,7 +118,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - + @@ -125,7 +128,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - + @@ -134,7 +137,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - + @@ -144,7 +147,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - + @@ -153,7 +156,7 @@ The EventQueue is processed in one thread. The Single/Multi threading in the tab - +
10M 100 10022831824
SpinLock10M 100 10016921303
10M 100 10034462989
SpinLock10M 100 10030253186
10M 100 10040003151
SpinLock10M 100 10030763049
10M 100 10019711657
SpinLock10M 100 10017551659
10M 100 100928708
SpinLock10M 100 10020821891
@@ -164,7 +167,7 @@ When there are fewer threads (about around the number of CPU cores which is 4 he ## CallbackList append/remove callbacks The benchmark loops 100K times, in each loop it appends 1000 empty callbacks to a CallbackList, then remove all that 1000 callbacks. So there are totally 100M append/remove operations. -The total benchmarked time is about 21000 milliseconds. That's to say in 1 milliseconds there can be 5000 append/remove operations. +The total benchmarked time is about 16000 milliseconds. That's to say in 1 milliseconds there can be 6000 append/remove operations. ## CallbackList invoking VS native function invoking @@ -181,114 +184,114 @@ Iterations: 100,000,000 Inline global function - MSVC 2017 - 217 - 1501 - 6921 + MSVC + 139 + 1267 + 3058 - GCC 7.2 - 187 - 1489 - 4463 + GCC + 141 + 1149 + 2563 Non-inline global function - MSVC 2017 - 241 - 1526 - 6544 + MSVC + 143 + 1273 + 3047 - GCC 7.2 - 233 - 1488 - 4787 + GCC + 132 + 1218 + 2583 Function object - MSVC 2017 - 194 - 1498 - 6433 + MSVC + 139 + 1198 + 2993 - GCC 7.2 - 212 - 1485 - 4951 + GCC + 141 + 1107 + 2633 Member virtual function - MSVC 2017 - 207 - 1533 - 6558 + MSVC + 159 + 1221 + 3076 - GCC 7.2 - 212 - 1485 - 4489 + GCC + 140 + 1231 + 2691 Member non-virtual function - MSVC 2017 - 214 - 1533 - 6390 + MSVC + 140 + 1266 + 3054 - GCC 7.2 - 211 - 1486 - 4872 + GCC + 140 + 1193 + 2701 Member non-inline virtual function - MSVC 2017 - 206 - 1522 - 6578 + MSVC + 158 + 1223 + 3103 - GCC 7.2 - 182 - 1666 - 4593 + GCC + 133 + 1231 + 2676 Member non-inline non-virtual function - MSVC 2017 - 206 - 1491 - 6992 + MSVC + 134 + 1266 + 3028 - GCC 7.2 - 205 - 1486 - 4490 + GCC + 134 + 1205 + 2652 All functions - MSVC 2017 - 1374 - 10951 - 29973 + MSVC + 91 + 903 + 2214 - GCC 7.2 - 1223 - 9770 - 22958 + GCC + 89 + 858 + 1852 diff --git a/tests/benchmark/b1_callbacklist_invoking_vs_cpp.cpp b/tests/benchmark/b1_callbacklist_invoking_vs_cpp.cpp index e25f673..ef88838 100644 --- a/tests/benchmark/b1_callbacklist_invoking_vs_cpp.cpp +++ b/tests/benchmark/b1_callbacklist_invoking_vs_cpp.cpp @@ -65,6 +65,8 @@ struct FunctionObject TEST_CASE("b1, CallbackList invoking vs C++ invoking") { + std::cout << std::endl << "b1, CallbackList invoking vs C++ invoking" << std::endl; + constexpr int iterateCount = 1000 * 1000 * 10; constexpr int callbackCount = 10; diff --git a/tests/benchmark/b2_map_vs_unordered_map.cpp b/tests/benchmark/b2_map_vs_unordered_map.cpp index 8b82ffc..7abd661 100644 --- a/tests/benchmark/b2_map_vs_unordered_map.cpp +++ b/tests/benchmark/b2_map_vs_unordered_map.cpp @@ -54,6 +54,8 @@ std::string generateRandomString(const int length){ TEST_CASE("b2, std::map vs std::unordered_map") { + std::cout << std::endl << "b2, std::map vs std::unordered_map" << std::endl; + constexpr int stringCount = 1000 * 1000; std::vector stringList(stringCount); for(auto & s : stringList) { @@ -99,7 +101,7 @@ TEST_CASE("b2, std::map vs std::unordered_map") } }); } - std::cout << mapInsertTime << " " << mapLookupTime << std::endl; - std::cout << unorderedMapInsertTime << " " << unorderedMapLookupTime << std::endl; + std::cout << "Map: insert " << mapInsertTime << " lookup " << mapLookupTime << std::endl; + std::cout << "UnordereMap: insert " << unorderedMapInsertTime << " lookup " << unorderedMapLookupTime << std::endl; } diff --git a/tests/benchmark/b3_b5_eventqueue.cpp b/tests/benchmark/b3_b5_eventqueue.cpp index 12e15bf..ff6e425 100644 --- a/tests/benchmark/b3_b5_eventqueue.cpp +++ b/tests/benchmark/b3_b5_eventqueue.cpp @@ -166,6 +166,8 @@ struct B3PoliciesSingleThreading { TEST_CASE("b3, EventQueue, one thread") { + std::cout << std::endl << "b3, EventQueue, one thread" << std::endl; + doExecuteEventQueue("Multi threading", 100, 1000 * 100, 100); doExecuteEventQueue("Multi threading", 1000, 1000 * 100, 100); doExecuteEventQueue("Multi threading", 1000, 1000 * 100, 1000); @@ -181,6 +183,8 @@ struct B4PoliciesMultiThreading { TEST_CASE("b4, EventQueue, multi threads, mutex") { + std::cout << std::endl << "b4, EventQueue, multi threads, mutex" << std::endl; + doMultiThreadingExecuteEventQueue("Mutex", 1, 1, 1000 * 1000 * 10, 100); doMultiThreadingExecuteEventQueue("Mutex", 1, 3, 1000 * 1000 * 10, 100); doMultiThreadingExecuteEventQueue("Mutex", 2, 2, 1000 * 1000 * 10, 100); @@ -194,6 +198,8 @@ struct B5PoliciesMultiThreading { TEST_CASE("b5, EventQueue, multi threads, spinlock") { + std::cout << std::endl << "b5, EventQueue, multi threads, spinlock" << std::endl; + doMultiThreadingExecuteEventQueue("Spinlock", 1, 1, 1000 * 1000 * 10, 100); doMultiThreadingExecuteEventQueue("Spinlock", 1, 3, 1000 * 1000 * 10, 100); doMultiThreadingExecuteEventQueue("Spinlock", 2, 2, 1000 * 1000 * 10, 100); diff --git a/tests/benchmark/b6_callbacklist_add_remove_callbacks.cpp b/tests/benchmark/b6_callbacklist_add_remove_callbacks.cpp index 9680754..fa527eb 100644 --- a/tests/benchmark/b6_callbacklist_add_remove_callbacks.cpp +++ b/tests/benchmark/b6_callbacklist_add_remove_callbacks.cpp @@ -16,6 +16,8 @@ TEST_CASE("b6, CallbackList add/remove callbacks") { + std::cout << std::endl << "b6, CallbackList add/remove callbacks" << std::endl; + using CL = eventpp::CallbackList; constexpr size_t callbackCount = 1000; constexpr size_t iterateCount = 1000 * 100; diff --git a/tests/benchmark/b7_callbacklist_vs_function_list.cpp b/tests/benchmark/b7_callbacklist_vs_function_list.cpp index 37833ab..a16d588 100644 --- a/tests/benchmark/b7_callbacklist_vs_function_list.cpp +++ b/tests/benchmark/b7_callbacklist_vs_function_list.cpp @@ -100,14 +100,16 @@ void doCallbackListVsFunctionList(const std::string & message, AddCL && addCl, A } ); - std::cout << message << " timeCallbackList " << timeCallbackList << std::endl; - std::cout << message << " timeFunctionList " << timeFunctionList << std::endl; + std::cout << message << " CallbackList " << timeCallbackList << std::endl; + std::cout << message << " FunctionList " << timeFunctionList << std::endl; } } //unnamed namespace TEST_CASE("b7, CallbackList vs vector of functions") { + std::cout << std::endl << "b7, CallbackList vs vector of functions" << std::endl; + struct PoliciesMultiThreading { using Threading = eventpp::MultipleThreading; }; @@ -163,6 +165,9 @@ TEST_CASE("b7, CallbackList vs vector of functions") }; + for(BenchmarkItem & item : itemList) { + doCallbackListVsFunctionList("Single thread, " + item.message, item.addClSingle, item.addFl); + } for(BenchmarkItem & item : itemList) { doCallbackListVsFunctionList("Multi thread, " + item.message, item.addClMulti, item.addFl); } diff --git a/tests/benchmark/b8_eventqueue_anydata.cpp b/tests/benchmark/b8_eventqueue_anydata.cpp index 7c3295a..1cbec44 100644 --- a/tests/benchmark/b8_eventqueue_anydata.cpp +++ b/tests/benchmark/b8_eventqueue_anydata.cpp @@ -146,6 +146,8 @@ void doExecuteEventQueueWithAnyData( TEST_CASE("b8, EventQueue, AnyData") { + std::cout << std::endl << "b8, EventQueue, AnyData" << std::endl; + doExecuteEventQueue("Without AnyData, small data", 100, 1000 * 100, 100); doExecuteEventQueueWithAnyData("With AnyData, small data", 100, 1000 * 100, 100); doExecuteEventQueue("Without AnyData, large data", 100, 1000 * 100, 100);