diff --git a/easy_profiler_core/include/easy/reader.h b/easy_profiler_core/include/easy/reader.h index fe4545d..139db50 100644 --- a/easy_profiler_core/include/easy/reader.h +++ b/easy_profiler_core/include/easy/reader.h @@ -68,6 +68,7 @@ namespace profiler { struct BlockStatistics EASY_FINAL { profiler::timestamp_t total_duration; ///< Total duration of all block calls + profiler::timestamp_t median_duration; ///< Median duration of all block calls profiler::timestamp_t total_children_duration; ///< Total duration of all children of all block calls profiler::block_index_t min_duration_block; ///< Will be used in GUI to jump to the block with min duration profiler::block_index_t max_duration_block; ///< Will be used in GUI to jump to the block with max duration @@ -76,6 +77,7 @@ namespace profiler { explicit BlockStatistics(profiler::timestamp_t _duration, profiler::block_index_t _block_index, profiler::block_index_t _parent_index) : total_duration(_duration) + , median_duration(0) , total_children_duration(0) , min_duration_block(_block_index) , max_duration_block(_block_index) @@ -298,7 +300,6 @@ namespace profiler { using blocks_t = profiler::BlocksTree::blocks_t; using thread_blocks_tree_t = std::unordered_map >; using block_getter_fn = std::function; - using stats_map_t = std::unordered_map >; ////////////////////////////////////////////////////////////////////////// diff --git a/easy_profiler_core/reader.cpp b/easy_profiler_core/reader.cpp index caafce7..436e234 100644 --- a/easy_profiler_core/reader.cpp +++ b/easy_profiler_core/reader.cpp @@ -50,9 +50,15 @@ ************************************************************************/ #include +#include +#include +#include #include +#include #include #include +#include +#include #include #include @@ -115,8 +121,117 @@ static bool isCompatibleVersion(uint32_t _version) ////////////////////////////////////////////////////////////////////////// +namespace { + +template +struct Counter +{ + T value = 0; +}; + +struct Stats +{ + profiler::BlockStatistics* stats; + std::map > durations; + + Stats(profiler::BlockStatistics* stats_ptr, profiler::timestamp_t duration) EASY_NOEXCEPT + : stats(stats_ptr) + { + durations[duration].value = 1; + } + + Stats(Stats&& another) EASY_NOEXCEPT + : stats(another.stats) + , durations(std::move(another.durations)) + { + another.stats = nullptr; + } + + Stats(const Stats&) = delete; +}; + +class ReaderThreadPool +{ + std::vector m_threads; + std::deque > m_tasks; + std::mutex m_mutex; + std::condition_variable m_cv; + std::atomic_bool m_stopFlag; + +public: + + ReaderThreadPool() : m_stopFlag {false} + { + m_threads.reserve(std::thread::hardware_concurrency()); + } + + ~ReaderThreadPool() + { + m_stopFlag.store(true, std::memory_order_release); + m_cv.notify_all(); + for (auto& t : m_threads) + { + if (t.joinable()) + { + t.join(); + } + } + } + + std::future async(std::function func) + { + std::unique_lock lock(m_mutex); + m_tasks.emplace_back(std::move(func)); + auto future = m_tasks.back().get_future(); + startNewThreadIfNeeded(); + lock.unlock(); + m_cv.notify_one(); + return future; + } + +private: + + void startNewThreadIfNeeded() + { + if (m_threads.size() < std::thread::hardware_concurrency() && m_threads.size() < m_tasks.size()) + { + m_threads.emplace_back(std::thread(&ReaderThreadPool::worker, this)); + } + } + + void worker() + { + while (true) + { + std::unique_lock lock(m_mutex); + + if (m_tasks.empty()) + { + m_cv.wait(lock, [this] { + return !m_tasks.empty() || m_stopFlag.load(std::memory_order_acquire); + }); + } + + if (m_tasks.empty() && m_stopFlag.load(std::memory_order_acquire)) + { + break; + } + + auto task = std::move(m_tasks.front()); + m_tasks.pop_front(); + lock.unlock(); + + task(); + } + } +}; + +} // end of namespace . + namespace profiler { + using stats_map_t = std::unordered_map >; + SerializedData::SerializedData() : m_size(0), m_data(nullptr) { } @@ -232,7 +347,7 @@ namespace profiler { ////////////////////////////////////////////////////////////////////////// using IdMap = std::unordered_map; -using CsStatsMap = std::unordered_map; +using CsStatsMap = std::unordered_map; ////////////////////////////////////////////////////////////////////////// @@ -264,7 +379,10 @@ static profiler::BlockStatistics* update_statistics( { // Update already existing statistics - auto stats = it->second; // write pointer to statistics into output (this is BlocksTree:: per_thread_stats or per_parent_stats or per_frame_stats) + // write pointer to statistics into output (this is BlocksTree:: per_thread_stats or per_parent_stats or per_frame_stats) + auto stats = it->second.stats; + auto& durations = it->second.durations; + ++durations[duration].value; ++stats->calls_number; // update calls number of this block stats->total_duration += duration; // update summary duration of all block calls @@ -298,7 +416,7 @@ static profiler::BlockStatistics* update_statistics( // Create new statistics. auto stats = new profiler::BlockStatistics(duration, _current_index, _parent_index); //_stats_map.emplace(key, stats); - _stats_map.emplace(_current.node->id(), stats); + _stats_map.emplace(_current.node->id(), Stats {stats, duration}); if (_calculate_children) { @@ -309,8 +427,14 @@ static profiler::BlockStatistics* update_statistics( return stats; } -static profiler::BlockStatistics* update_statistics(CsStatsMap& _stats_map, const profiler::BlocksTree& _current, profiler::block_index_t _current_index, profiler::block_index_t _parent_index, const profiler::blocks_t& _blocks, bool _calculate_children = true) -{ +static profiler::BlockStatistics* update_statistics( + CsStatsMap& _stats_map, + const profiler::BlocksTree& _current, + profiler::block_index_t _current_index, + profiler::block_index_t _parent_index, + const profiler::blocks_t& _blocks, + bool _calculate_children = true +) { auto duration = _current.node->duration(); CsStatsMap::key_type key(_current.node->name()); auto it = _stats_map.find(key); @@ -318,7 +442,11 @@ static profiler::BlockStatistics* update_statistics(CsStatsMap& _stats_map, cons { // Update already existing statistics - auto stats = it->second; // write pointer to statistics into output (this is BlocksTree:: per_thread_stats or per_parent_stats or per_frame_stats) + // write pointer to statistics into output (this is BlocksTree:: per_thread_stats or per_parent_stats or per_frame_stats) + auto stats = it->second.stats; + auto& durations = it->second.durations; + + ++durations[duration].value; ++stats->calls_number; // update calls number of this block stats->total_duration += duration; // update summary duration of all block calls @@ -351,7 +479,7 @@ static profiler::BlockStatistics* update_statistics(CsStatsMap& _stats_map, cons // This is first time the block appear in the file. // Create new statistics. auto stats = new profiler::BlockStatistics(duration, _current_index, _parent_index); - _stats_map.emplace(key, stats); + _stats_map.emplace(key, Stats {stats, duration}); if (_calculate_children) { @@ -362,6 +490,133 @@ static profiler::BlockStatistics* update_statistics(CsStatsMap& _stats_map, cons return stats; } +template +static void calculate_medians(TStatsMapIterator begin, TStatsMapIterator end) +{ + for (auto it = begin; it != end; ++it) + { + auto& durations = it->second.durations; + if (durations.empty()) + { + continue; + } + + size_t total_count = 0; + for (auto& kv : durations) + { + total_count += kv.second.value; + } + + auto stats = it->second.stats; + if (total_count & 1) + { + const auto index = total_count >> 1; + size_t i = 0; + for (auto& kv : durations) + { + const auto count = kv.second.value; + + i += count; + if (i < index) + { + continue; + } + + stats->median_duration = kv.first; + break; + } + } + else + { + const auto index2 = total_count >> 1; + const auto index1 = index2 - 1; + + size_t i = 0; + bool i1 = false; + for (auto& kv : durations) + { + const auto count = kv.second.value; + + i += count; + if (i < index1) + { + continue; + } + + if (!i1) + { + i1 = true; + stats->median_duration = kv.first; + } + + if (i < index2) + { + continue; + } + + stats->median_duration += kv.first; + stats->median_duration >>= 1; + + break; + } + } + + decltype(it->second.durations) dummy; + dummy.swap(durations); + } +} + +template +static void calculate_medians_async(ReaderThreadPool& pool, TStatsMap& stats_map) +{ + if (stats_map.empty()) + { + return; + } + + if (stats_map.size() < 1000) + { + calculate_medians(stats_map.begin(), stats_map.end()); + return; + } + + const auto threads_count = std::min(static_cast(std::thread::hardware_concurrency()), stats_map.size()); + const auto count_per_thread = stats_map.size() / threads_count; + + std::vector> results; + results.reserve(threads_count); + + for (size_t i = 1; i <= threads_count; ++i) + { + results.emplace_back(pool.async([i, threads_count, count_per_thread, &stats_map] + { + auto begin = stats_map.begin(); + const auto advance_count = (i - 1) * count_per_thread; + std::advance(begin, advance_count); + + auto end = begin; + if (i == threads_count) + { + end = stats_map.end(); + } + else + { + std::advance(end, count_per_thread); + } + + calculate_medians(begin, end); + })); + } + + for (auto& result : results) + { + if (result.valid()) + { + result.get(); + } + } +} + ////////////////////////////////////////////////////////////////////////// static void update_statistics_recursive(profiler::stats_map_t& _stats_map, profiler::BlocksTree& _current, profiler::block_index_t _current_index, profiler::block_index_t _parent_index, profiler::blocks_t& _blocks) @@ -715,6 +970,8 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic< profiler::block_index_t blocks_counter = 0; std::vector name; + ReaderThreadPool pool; + while (!inStream.eof() && threads_read_number++ < header.threads_count) { EASY_BLOCK("Read thread data", profiler::colors::DarkGreen); @@ -804,6 +1061,9 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic< } } + // calculate medians for each block + calculate_medians_async(pool, per_thread_statistics_cs); + if (!update_progress(progress, 20 + static_cast(67 * i / memory_size), _log)) { return 0; // Loading interrupted @@ -933,6 +1193,9 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic< if (tree.depth < child.depth) tree.depth = child.depth; } + + // calculate medians for each block + calculate_medians_async(pool, per_parent_statistics); } else { @@ -977,6 +1240,9 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic< if (!update_progress(progress, 20 + static_cast(67 * i / memory_size), _log)) return 0; // Loading interrupted } + + // calculate medians for each block + calculate_medians_async(pool, per_thread_statistics); } if (total_blocks_count != blocks_counter) @@ -1068,8 +1334,8 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic< EASY_BLOCK("Gather statistics for roots", profiler::colors::Purple); if (gather_statistics) { - std::vector statistics_threads; - statistics_threads.reserve(threaded_trees.size()); + std::vector> results; + results.reserve(threaded_trees.size()); for (auto& it : threaded_trees) { @@ -1081,7 +1347,7 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic< auto& per_parent_statistics = parent_statistics[it.first]; per_parent_statistics.clear(); - statistics_threads.emplace_back(std::thread([&] (profiler::BlocksTreeRoot& _root) + results.emplace_back(pool.async([&] { //std::sort(root.sync.begin(), root.sync.end(), [&blocks](profiler::block_index_t left, profiler::block_index_t right) //{ @@ -1089,24 +1355,27 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic< //}); profiler::block_index_t cs_index = 0; - for (auto child_index : _root.children) + for (auto child_index : root.children) { auto& frame = blocks[child_index]; if (descriptors[frame.node->id()]->type() == profiler::BlockType::Block) - ++_root.frames_number; + ++root.frames_number; frame.per_parent_stats = update_statistics(per_parent_statistics, frame, child_index, ~0U, blocks);//, root.thread_id, blocks); per_frame_statistics.clear(); update_statistics_recursive(per_frame_statistics, frame, child_index, child_index, blocks); - if (cs_index < _root.sync.size()) + calculate_medians(per_parent_statistics.begin(), per_parent_statistics.end()); + calculate_medians(per_frame_statistics.begin(), per_frame_statistics.end()); + + if (cs_index < root.sync.size()) { CsStatsMap frame_stats_cs; do { - auto j = _root.sync[cs_index]; + auto j = root.sync[cs_index]; auto& cs = blocks[j]; if (cs.node->end() < frame.node->begin()) continue; @@ -1114,23 +1383,28 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic< break; cs.per_frame_stats = update_statistics(frame_stats_cs, cs, cs_index, child_index, blocks); - } while (++cs_index < _root.sync.size()); + } while (++cs_index < root.sync.size()); + + calculate_medians(frame_stats_cs.begin(), frame_stats_cs.end()); } - if (_root.depth < frame.depth) - _root.depth = frame.depth; + if (root.depth < frame.depth) + root.depth = frame.depth; - _root.profiled_time += frame.node->duration(); + root.profiled_time += frame.node->duration(); } - ++_root.depth; - }, std::ref(root))); + ++root.depth; + })); } - int j = 0, n = static_cast(statistics_threads.size()); - for (auto& thread : statistics_threads) + int j = 0, n = static_cast(results.size()); + for (auto& result : results) { - thread.join(); + if (result.valid()) + { + result.get(); + } progress.store(90 + (10 * ++j) / n, std::memory_order_release); } } diff --git a/profiler_gui/blocks_graphics_view.cpp b/profiler_gui/blocks_graphics_view.cpp index dc8df2d..b33c065 100644 --- a/profiler_gui/blocks_graphics_view.cpp +++ b/profiler_gui/blocks_graphics_view.cpp @@ -2348,20 +2348,20 @@ void BlocksGraphicsView::onIdleTimeout() ++row; const auto duration = itemBlock.node->duration(); - lay->addWidget(new QLabel("Duration:", widget), row, 0, Qt::AlignRight); + lay->addWidget(new QLabel("Time:", widget), row, 0, Qt::AlignRight); lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, duration, 3), widget), row, 1, 1, 2, Qt::AlignLeft); ++row; if (itemBlock.per_thread_stats) { - lay->addWidget(new QLabel("Sum:", widget), row, 0, Qt::AlignRight); + lay->addWidget(new QLabel("Total:", widget), row, 0, Qt::AlignRight); lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, itemBlock.per_thread_stats->total_duration, 3), widget), row, 1, 1, 2, Qt::AlignLeft); ++row; lay->addWidget(new BoldLabel("-------- Statistics --------", widget), row, 0, 1, 3, Qt::AlignHCenter); lay->addWidget(new QLabel("per ", widget), row + 1, 0, Qt::AlignRight); lay->addWidget(new QLabel("This %:", widget), row + 2, 0, Qt::AlignRight); - lay->addWidget(new QLabel("Sum %:", widget), row + 3, 0, Qt::AlignRight); + lay->addWidget(new QLabel("Total %:", widget), row + 3, 0, Qt::AlignRight); lay->addWidget(new QLabel("N Calls:", widget), row + 4, 0, Qt::AlignRight); lay->addWidget(new QLabel("Thread", widget), row + 1, 1, Qt::AlignHCenter); @@ -2436,7 +2436,7 @@ void BlocksGraphicsView::onIdleTimeout() ++row; const auto duration = itemBlock.node->duration(); - lay->addWidget(new QLabel("Duration:", widget), row, 0, Qt::AlignRight); + lay->addWidget(new QLabel("Time:", widget), row, 0, Qt::AlignRight); lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, duration, 3), widget), row, 1, 1, 3, Qt::AlignLeft); ++row; @@ -2506,10 +2506,14 @@ void BlocksGraphicsView::onIdleTimeout() { const auto duration = itemBlock.node->duration(); - lay->addWidget(new QLabel("Average:", widget), row, 0, Qt::AlignRight); + lay->addWidget(new QLabel("Avg:", widget), row, 0, Qt::AlignRight); lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, itemBlock.per_thread_stats->average_duration(), 3), widget), row, 1, 1, 3, Qt::AlignLeft); ++row; + lay->addWidget(new QLabel("Median:", widget), row, 0, Qt::AlignRight); + lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, itemBlock.per_thread_stats->median_duration, 3), widget), row, 1, 1, 3, Qt::AlignLeft); + ++row; + // Calculate idle/active time { const auto& threadRoot = item->root(); @@ -2555,8 +2559,8 @@ void BlocksGraphicsView::onIdleTimeout() lay->addWidget(new BoldLabel("-------- Statistics --------", widget), row, 0, 1, 5, Qt::AlignHCenter); lay->addWidget(new QLabel("per ", widget), row + 1, 0, Qt::AlignRight); lay->addWidget(new QLabel("This %:", widget), row + 2, 0, Qt::AlignRight); - lay->addWidget(new QLabel("Sum %:", widget), row + 3, 0, Qt::AlignRight); - lay->addWidget(new QLabel("Sum self %:", widget), row + 4, 0, Qt::AlignRight); + lay->addWidget(new QLabel("Total %:", widget), row + 3, 0, Qt::AlignRight); + lay->addWidget(new QLabel("Total self %:", widget), row + 4, 0, Qt::AlignRight); lay->addWidget(new QLabel("N Calls:", widget), row + 5, 0, Qt::AlignRight); lay->addWidget(new QLabel("Thread", widget), row + 1, 1, Qt::AlignHCenter); @@ -3174,7 +3178,7 @@ void ThreadNamesWidget::onIdleTimeout() if (!root.children.empty()) duration = easyBlock(root.children.back()).tree.node->end() - easyBlock(root.children.front()).tree.node->begin(); - lay->addWidget(new QLabel("Duration:", widget), row, 0, Qt::AlignRight); + lay->addWidget(new QLabel("Time:", widget), row, 0, Qt::AlignRight); lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, duration, 3), widget), row, 1, Qt::AlignLeft); ++row;