From d85f9864d62ca68dfdbb2dcaa134847d978c2899 Mon Sep 17 00:00:00 2001 From: Victor Zarubkin Date: Wed, 14 Dec 2016 23:17:02 +0300 Subject: [PATCH] (GUI) Added statistics gathering for context switches (calls number, total duration, % per thread or frame). Known issue: if context switch events occur out of profiled blocks then percent/thread may be calculated wrong as it is calculating relative to thread profiled time (which may be a very low value). --- easy_profiler_core/reader.cpp | 77 +++++++++++++++++++++++++-- profiler_gui/blocks_graphics_view.cpp | 50 +++++++++++++++-- 2 files changed, 119 insertions(+), 8 deletions(-) diff --git a/easy_profiler_core/reader.cpp b/easy_profiler_core/reader.cpp index ba4000b..a0bf702 100644 --- a/easy_profiler_core/reader.cpp +++ b/easy_profiler_core/reader.cpp @@ -188,11 +188,14 @@ typedef ::std::unordered_map<::profiler::block_id_t, ::profiler::BlockStatistics which uses it as a key, exists only inside fillTreesFromFile function. */ typedef ::std::unordered_map<::profiler::hashed_cstr, ::profiler::block_id_t> IdMap; +typedef ::std::unordered_map<::profiler::hashed_cstr, ::profiler::BlockStatistics*> CsStatsMap; + #else // TODO: Create optimized version of profiler::hashed_cstr for Linux too. typedef ::std::unordered_map<::profiler::block_id_t, ::profiler::BlockStatistics*, ::profiler::passthrough_hash> StatsMap; typedef ::std::unordered_map<::profiler::hashed_stdstring, ::profiler::block_id_t> IdMap; +typedef ::std::unordered_map<::profiler::hashed_stdstring, ::profiler::BlockStatistics*> CsStatsMap; #endif @@ -253,6 +256,47 @@ automatically receive statistics update. return stats; } +::profiler::BlockStatistics* update_statistics(CsStatsMap& _stats_map, const ::profiler::BlocksTree& _current, ::profiler::block_index_t _current_index, ::profiler::block_index_t _parent_index) +{ + auto duration = _current.node->duration(); + CsStatsMap::key_type key(_current.node->name()); + auto it = _stats_map.find(key); + if (it != _stats_map.end()) + { + // Update already existing statistics + + auto stats = it->second; // write pointer to statistics into output (this is BlocksTree:: per_thread_stats or per_parent_stats or per_frame_stats) + + ++stats->calls_number; // update calls number of this block + stats->total_duration += duration; // update summary duration of all block calls + + if (duration > stats->max_duration) + { + // update max duration + stats->max_duration_block = _current_index; + stats->max_duration = duration; + } + + if (duration < stats->min_duration) + { + // update min duraton + stats->min_duration_block = _current_index; + stats->min_duration = duration; + } + + // average duration is calculated inside average_duration() method by dividing total_duration to the calls_number + + return stats; + } + + // This is first time the block appear in the file. + // Create new statistics. + auto stats = new ::profiler::BlockStatistics(duration, _current_index, _parent_index); + _stats_map.emplace(key, stats); + + return stats; +} + ////////////////////////////////////////////////////////////////////////// void update_statistics_recursive(StatsMap& _stats_map, ::profiler::BlocksTree& _current, ::profiler::block_index_t _current_index, ::profiler::block_index_t _parent_index, ::profiler::blocks_t& _blocks) @@ -462,7 +506,7 @@ extern "C" { } typedef ::std::unordered_map<::profiler::thread_id_t, StatsMap, ::profiler::passthrough_hash> PerThreadStats; - PerThreadStats thread_statistics, parent_statistics, frame_statistics; + PerThreadStats parent_statistics, frame_statistics; IdMap identification_table; blocks.reserve(total_blocks_number); @@ -492,6 +536,8 @@ extern "C" { root.thread_name = name.data(); } + CsStatsMap per_thread_statistics_cs; + uint32_t blocks_number_in_thread = 0; inFile.read((char*)&blocks_number_in_thread, sizeof(decltype(blocks_number_in_thread))); auto threshold = read_number + blocks_number_in_thread; @@ -534,6 +580,12 @@ extern "C" { root.wait_time += baseData->duration(); root.sync.emplace_back(block_index); + + if (gather_statistics) + { + EASY_BLOCK("Gather per thread statistics", ::profiler::colors::Coral); + tree.per_thread_stats = update_statistics(per_thread_statistics_cs, tree, block_index, thread_id); + } } auto oldprogress = progress.exchange(20 + static_cast(70 * i / memory_size), ::std::memory_order_release); @@ -547,6 +599,8 @@ extern "C" { if (inFile.eof()) break; + StatsMap per_thread_statistics; + blocks_number_in_thread = 0; inFile.read((char*)&blocks_number_in_thread, sizeof(decltype(blocks_number_in_thread))); threshold = read_number + blocks_number_in_thread; @@ -600,9 +654,6 @@ extern "C" { tree.node = baseData; const auto block_index = blocks_counter++; - auto& per_parent_statistics = parent_statistics[thread_id]; - auto& per_thread_statistics = thread_statistics[thread_id]; - if (*tree.node->name() != 0) { // If block has runtime name then generate new id for such block. @@ -649,6 +700,7 @@ extern "C" { if (gather_statistics) { EASY_BLOCK("Gather statistic within parent", ::profiler::colors::Magenta); + auto& per_parent_statistics = parent_statistics[thread_id]; per_parent_statistics.clear(); //per_parent_statistics.reserve(tree.children.size()); // this gives slow-down on Windows @@ -731,6 +783,7 @@ extern "C" { // return blocks[left].node->begin() < blocks[right].node->begin(); //}); + ::profiler::block_index_t cs_index = 0; for (auto i : root.children) { auto& frame = blocks[i]; @@ -739,6 +792,22 @@ extern "C" { per_frame_statistics.clear(); update_statistics_recursive(per_frame_statistics, frame, i, i, blocks); + if (cs_index < root.sync.size()) + { + CsStatsMap frame_stats_cs; + do { + + auto j = root.sync[cs_index]; + auto& cs = blocks[j]; + if (cs.node->end() < frame.node->begin()) + continue; + if (cs.node->begin() > frame.node->end()) + break; + cs.per_frame_stats = update_statistics(frame_stats_cs, cs, cs_index, i); + + } while (++cs_index < root.sync.size()); + } + if (root.depth < frame.depth) root.depth = frame.depth; diff --git a/profiler_gui/blocks_graphics_view.cpp b/profiler_gui/blocks_graphics_view.cpp index 78789db..3bcfc02 100644 --- a/profiler_gui/blocks_graphics_view.cpp +++ b/profiler_gui/blocks_graphics_view.cpp @@ -1501,27 +1501,69 @@ void EasyGraphicsView::onIdleTimeout() auto cse = item->intersectEvent(pos); if (cse) { + const auto& itemBlock = cse->tree; + auto widget = new QWidget(); auto lay = new QGridLayout(widget); int row = 0; lay->addWidget(new EasyBoldLabel("Context switch event", widget), row, 0, 1, 2, Qt::AlignHCenter); - ++row; + lay->addWidget(new QLabel("Thread:", widget), row, 0, Qt::AlignRight); auto it = EASY_GLOBALS.profiler_blocks.find(cse->tree.node->id()); if (it != EASY_GLOBALS.profiler_blocks.end()) lay->addWidget(new QLabel(QString("%1 %2").arg(cse->tree.node->id()).arg(it->second.name()), widget), row, 1, Qt::AlignLeft); else lay->addWidget(new QLabel(QString::number(cse->tree.node->id()), widget), row, 1, Qt::AlignLeft); - ++row; + lay->addWidget(new QLabel("Process:", widget), row, 0, Qt::AlignRight); lay->addWidget(new QLabel(cse->tree.node->name(), widget), row, 1, Qt::AlignLeft); - ++row; + + const auto duration = itemBlock.node->duration(); lay->addWidget(new QLabel("Duration:", widget), row, 0, Qt::AlignRight); - lay->addWidget(new QLabel(::profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, cse->tree.node->duration(), 3), widget), row, 1, Qt::AlignLeft); + lay->addWidget(new QLabel(::profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, duration, 3), widget), row, 1, Qt::AlignLeft); + ++row; + + if (itemBlock.per_thread_stats) + { + lay->addWidget(new QLabel("Sum:", widget), row, 0, Qt::AlignRight); + lay->addWidget(new QLabel(::profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, itemBlock.per_thread_stats->total_duration, 3), widget), row, 1, 1, 3, Qt::AlignLeft); + ++row; + + lay->addWidget(new EasyBoldLabel("-------- Statistics --------", widget), row, 0, 1, 5, Qt::AlignHCenter); + lay->addWidget(new QLabel("per ", widget), row + 1, 0, Qt::AlignRight); + lay->addWidget(new QLabel("This %:", widget), row + 2, 0, Qt::AlignRight); + lay->addWidget(new QLabel("Sum %:", widget), row + 3, 0, Qt::AlignRight); + lay->addWidget(new QLabel("N Calls:", widget), row + 4, 0, Qt::AlignRight); + + lay->addWidget(new QLabel("Thread", widget), row + 1, 1, Qt::AlignHCenter); + + auto percent = ::profiler_gui::percentReal(duration, item->root()->profiled_time); + lay->addWidget(new QLabel(0.005 < percent && percent < 0.5001 ? QString::number(percent, 'f', 2) : QString::number(static_cast(0.5 + percent)), widget), row + 2, 1, Qt::AlignHCenter); + + lay->addWidget(new QLabel(QString::number(::profiler_gui::percent(itemBlock.per_thread_stats->total_duration, item->root()->profiled_time)), widget), row + 3, 1, Qt::AlignHCenter); + + lay->addWidget(new QLabel(QString::number(itemBlock.per_thread_stats->calls_number), widget), row + 4, 1, Qt::AlignHCenter); + + if (itemBlock.per_frame_stats) + { + int col = 2; + auto frame_duration = blocksTree(itemBlock.per_frame_stats->parent_block).node->duration(); + + lay->addWidget(new QLabel("Frame", widget), row + 1, col, Qt::AlignHCenter); + + percent = ::profiler_gui::percentReal(duration, frame_duration); + lay->addWidget(new QLabel(0.005 < percent && percent < 0.5001 ? QString::number(percent, 'f', 2) : QString::number(static_cast(0.5 + percent)), widget), row + 2, col, Qt::AlignHCenter); + + percent = ::profiler_gui::percentReal(itemBlock.per_frame_stats->total_duration, frame_duration); + lay->addWidget(new QLabel(0.005 < percent && percent < 0.5001 ? QString::number(percent, 'f', 2) : QString::number(static_cast(0.5 + percent)), widget), row + 3, col, Qt::AlignHCenter); + + lay->addWidget(new QLabel(QString::number(itemBlock.per_frame_stats->calls_number), widget), row + 4, col, Qt::AlignHCenter); + } + } m_csInfoWidget = new QGraphicsProxyWidget(); m_csInfoWidget->setWidget(widget);