0
0
mirror of https://github.com/yse/easy_profiler.git synced 2025-01-14 00:27:55 +08:00

[core] added median duration calculation

This commit is contained in:
Victor Zarubkin 2019-10-23 01:55:47 +03:00
parent 701992a63d
commit b23d21ffc8
3 changed files with 311 additions and 32 deletions

View File

@ -68,6 +68,7 @@ namespace profiler {
struct BlockStatistics EASY_FINAL
{
profiler::timestamp_t total_duration; ///< Total duration of all block calls
profiler::timestamp_t median_duration; ///< Median duration of all block calls
profiler::timestamp_t total_children_duration; ///< Total duration of all children of all block calls
profiler::block_index_t min_duration_block; ///< Will be used in GUI to jump to the block with min duration
profiler::block_index_t max_duration_block; ///< Will be used in GUI to jump to the block with max duration
@ -76,6 +77,7 @@ namespace profiler {
explicit BlockStatistics(profiler::timestamp_t _duration, profiler::block_index_t _block_index, profiler::block_index_t _parent_index)
: total_duration(_duration)
, median_duration(0)
, total_children_duration(0)
, min_duration_block(_block_index)
, max_duration_block(_block_index)
@ -298,7 +300,6 @@ namespace profiler {
using blocks_t = profiler::BlocksTree::blocks_t;
using thread_blocks_tree_t = std::unordered_map<profiler::thread_id_t, profiler::BlocksTreeRoot, ::estd::hash<profiler::thread_id_t> >;
using block_getter_fn = std::function<const profiler::BlocksTree&(profiler::block_index_t)>;
using stats_map_t = std::unordered_map<profiler::block_id_t, profiler::BlockStatistics*, estd::hash<profiler::block_id_t> >;
//////////////////////////////////////////////////////////////////////////

View File

@ -50,9 +50,15 @@
************************************************************************/
#include <algorithm>
#include <atomic>
#include <condition_variable>
#include <deque>
#include <fstream>
#include <future>
#include <iterator>
#include <limits>
#include <map>
#include <mutex>
#include <unordered_map>
#include <thread>
@ -115,8 +121,117 @@ static bool isCompatibleVersion(uint32_t _version)
//////////////////////////////////////////////////////////////////////////
namespace {
template <class T>
struct Counter
{
T value = 0;
};
struct Stats
{
profiler::BlockStatistics* stats;
std::map<profiler::timestamp_t, Counter<size_t> > durations;
Stats(profiler::BlockStatistics* stats_ptr, profiler::timestamp_t duration) EASY_NOEXCEPT
: stats(stats_ptr)
{
durations[duration].value = 1;
}
Stats(Stats&& another) EASY_NOEXCEPT
: stats(another.stats)
, durations(std::move(another.durations))
{
another.stats = nullptr;
}
Stats(const Stats&) = delete;
};
class ReaderThreadPool
{
std::vector<std::thread> m_threads;
std::deque<std::packaged_task<void()> > m_tasks;
std::mutex m_mutex;
std::condition_variable m_cv;
std::atomic_bool m_stopFlag;
public:
ReaderThreadPool() : m_stopFlag {false}
{
m_threads.reserve(std::thread::hardware_concurrency());
}
~ReaderThreadPool()
{
m_stopFlag.store(true, std::memory_order_release);
m_cv.notify_all();
for (auto& t : m_threads)
{
if (t.joinable())
{
t.join();
}
}
}
std::future<void> async(std::function<void()> func)
{
std::unique_lock<std::mutex> lock(m_mutex);
m_tasks.emplace_back(std::move(func));
auto future = m_tasks.back().get_future();
startNewThreadIfNeeded();
lock.unlock();
m_cv.notify_one();
return future;
}
private:
void startNewThreadIfNeeded()
{
if (m_threads.size() < std::thread::hardware_concurrency() && m_threads.size() < m_tasks.size())
{
m_threads.emplace_back(std::thread(&ReaderThreadPool::worker, this));
}
}
void worker()
{
while (true)
{
std::unique_lock<std::mutex> lock(m_mutex);
if (m_tasks.empty())
{
m_cv.wait(lock, [this] {
return !m_tasks.empty() || m_stopFlag.load(std::memory_order_acquire);
});
}
if (m_tasks.empty() && m_stopFlag.load(std::memory_order_acquire))
{
break;
}
auto task = std::move(m_tasks.front());
m_tasks.pop_front();
lock.unlock();
task();
}
}
};
} // end of namespace <noname>.
namespace profiler {
using stats_map_t = std::unordered_map<profiler::block_id_t, Stats, estd::hash<profiler::block_id_t> >;
SerializedData::SerializedData() : m_size(0), m_data(nullptr)
{
}
@ -232,7 +347,7 @@ namespace profiler {
//////////////////////////////////////////////////////////////////////////
using IdMap = std::unordered_map<profiler::hashed_stdstring, profiler::block_id_t>;
using CsStatsMap = std::unordered_map<profiler::string_with_hash, profiler::BlockStatistics*>;
using CsStatsMap = std::unordered_map<profiler::string_with_hash, Stats>;
//////////////////////////////////////////////////////////////////////////
@ -264,7 +379,10 @@ static profiler::BlockStatistics* update_statistics(
{
// Update already existing statistics
auto stats = it->second; // write pointer to statistics into output (this is BlocksTree:: per_thread_stats or per_parent_stats or per_frame_stats)
// write pointer to statistics into output (this is BlocksTree:: per_thread_stats or per_parent_stats or per_frame_stats)
auto stats = it->second.stats;
auto& durations = it->second.durations;
++durations[duration].value;
++stats->calls_number; // update calls number of this block
stats->total_duration += duration; // update summary duration of all block calls
@ -298,7 +416,7 @@ static profiler::BlockStatistics* update_statistics(
// Create new statistics.
auto stats = new profiler::BlockStatistics(duration, _current_index, _parent_index);
//_stats_map.emplace(key, stats);
_stats_map.emplace(_current.node->id(), stats);
_stats_map.emplace(_current.node->id(), Stats {stats, duration});
if (_calculate_children)
{
@ -309,8 +427,14 @@ static profiler::BlockStatistics* update_statistics(
return stats;
}
static profiler::BlockStatistics* update_statistics(CsStatsMap& _stats_map, const profiler::BlocksTree& _current, profiler::block_index_t _current_index, profiler::block_index_t _parent_index, const profiler::blocks_t& _blocks, bool _calculate_children = true)
{
static profiler::BlockStatistics* update_statistics(
CsStatsMap& _stats_map,
const profiler::BlocksTree& _current,
profiler::block_index_t _current_index,
profiler::block_index_t _parent_index,
const profiler::blocks_t& _blocks,
bool _calculate_children = true
) {
auto duration = _current.node->duration();
CsStatsMap::key_type key(_current.node->name());
auto it = _stats_map.find(key);
@ -318,7 +442,11 @@ static profiler::BlockStatistics* update_statistics(CsStatsMap& _stats_map, cons
{
// Update already existing statistics
auto stats = it->second; // write pointer to statistics into output (this is BlocksTree:: per_thread_stats or per_parent_stats or per_frame_stats)
// write pointer to statistics into output (this is BlocksTree:: per_thread_stats or per_parent_stats or per_frame_stats)
auto stats = it->second.stats;
auto& durations = it->second.durations;
++durations[duration].value;
++stats->calls_number; // update calls number of this block
stats->total_duration += duration; // update summary duration of all block calls
@ -351,7 +479,7 @@ static profiler::BlockStatistics* update_statistics(CsStatsMap& _stats_map, cons
// This is first time the block appear in the file.
// Create new statistics.
auto stats = new profiler::BlockStatistics(duration, _current_index, _parent_index);
_stats_map.emplace(key, stats);
_stats_map.emplace(key, Stats {stats, duration});
if (_calculate_children)
{
@ -362,6 +490,133 @@ static profiler::BlockStatistics* update_statistics(CsStatsMap& _stats_map, cons
return stats;
}
template <class TStatsMapIterator>
static void calculate_medians(TStatsMapIterator begin, TStatsMapIterator end)
{
for (auto it = begin; it != end; ++it)
{
auto& durations = it->second.durations;
if (durations.empty())
{
continue;
}
size_t total_count = 0;
for (auto& kv : durations)
{
total_count += kv.second.value;
}
auto stats = it->second.stats;
if (total_count & 1)
{
const auto index = total_count >> 1;
size_t i = 0;
for (auto& kv : durations)
{
const auto count = kv.second.value;
i += count;
if (i < index)
{
continue;
}
stats->median_duration = kv.first;
break;
}
}
else
{
const auto index2 = total_count >> 1;
const auto index1 = index2 - 1;
size_t i = 0;
bool i1 = false;
for (auto& kv : durations)
{
const auto count = kv.second.value;
i += count;
if (i < index1)
{
continue;
}
if (!i1)
{
i1 = true;
stats->median_duration = kv.first;
}
if (i < index2)
{
continue;
}
stats->median_duration += kv.first;
stats->median_duration >>= 1;
break;
}
}
decltype(it->second.durations) dummy;
dummy.swap(durations);
}
}
template <class TStatsMap>
static void calculate_medians_async(ReaderThreadPool& pool, TStatsMap& stats_map)
{
if (stats_map.empty())
{
return;
}
if (stats_map.size() < 1000)
{
calculate_medians(stats_map.begin(), stats_map.end());
return;
}
const auto threads_count = std::min(static_cast<size_t>(std::thread::hardware_concurrency()), stats_map.size());
const auto count_per_thread = stats_map.size() / threads_count;
std::vector<std::future<void>> results;
results.reserve(threads_count);
for (size_t i = 1; i <= threads_count; ++i)
{
results.emplace_back(pool.async([i, threads_count, count_per_thread, &stats_map]
{
auto begin = stats_map.begin();
const auto advance_count = (i - 1) * count_per_thread;
std::advance(begin, advance_count);
auto end = begin;
if (i == threads_count)
{
end = stats_map.end();
}
else
{
std::advance(end, count_per_thread);
}
calculate_medians(begin, end);
}));
}
for (auto& result : results)
{
if (result.valid())
{
result.get();
}
}
}
//////////////////////////////////////////////////////////////////////////
static void update_statistics_recursive(profiler::stats_map_t& _stats_map, profiler::BlocksTree& _current, profiler::block_index_t _current_index, profiler::block_index_t _parent_index, profiler::blocks_t& _blocks)
@ -715,6 +970,8 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic<
profiler::block_index_t blocks_counter = 0;
std::vector<char> name;
ReaderThreadPool pool;
while (!inStream.eof() && threads_read_number++ < header.threads_count)
{
EASY_BLOCK("Read thread data", profiler::colors::DarkGreen);
@ -804,6 +1061,9 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic<
}
}
// calculate medians for each block
calculate_medians_async(pool, per_thread_statistics_cs);
if (!update_progress(progress, 20 + static_cast<int>(67 * i / memory_size), _log))
{
return 0; // Loading interrupted
@ -933,6 +1193,9 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic<
if (tree.depth < child.depth)
tree.depth = child.depth;
}
// calculate medians for each block
calculate_medians_async(pool, per_parent_statistics);
}
else
{
@ -977,6 +1240,9 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic<
if (!update_progress(progress, 20 + static_cast<int>(67 * i / memory_size), _log))
return 0; // Loading interrupted
}
// calculate medians for each block
calculate_medians_async(pool, per_thread_statistics);
}
if (total_blocks_count != blocks_counter)
@ -1068,8 +1334,8 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic<
EASY_BLOCK("Gather statistics for roots", profiler::colors::Purple);
if (gather_statistics)
{
std::vector<std::thread> statistics_threads;
statistics_threads.reserve(threaded_trees.size());
std::vector<std::future<void>> results;
results.reserve(threaded_trees.size());
for (auto& it : threaded_trees)
{
@ -1081,7 +1347,7 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic<
auto& per_parent_statistics = parent_statistics[it.first];
per_parent_statistics.clear();
statistics_threads.emplace_back(std::thread([&] (profiler::BlocksTreeRoot& _root)
results.emplace_back(pool.async([&]
{
//std::sort(root.sync.begin(), root.sync.end(), [&blocks](profiler::block_index_t left, profiler::block_index_t right)
//{
@ -1089,24 +1355,27 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic<
//});
profiler::block_index_t cs_index = 0;
for (auto child_index : _root.children)
for (auto child_index : root.children)
{
auto& frame = blocks[child_index];
if (descriptors[frame.node->id()]->type() == profiler::BlockType::Block)
++_root.frames_number;
++root.frames_number;
frame.per_parent_stats = update_statistics(per_parent_statistics, frame, child_index, ~0U, blocks);//, root.thread_id, blocks);
per_frame_statistics.clear();
update_statistics_recursive(per_frame_statistics, frame, child_index, child_index, blocks);
if (cs_index < _root.sync.size())
calculate_medians(per_parent_statistics.begin(), per_parent_statistics.end());
calculate_medians(per_frame_statistics.begin(), per_frame_statistics.end());
if (cs_index < root.sync.size())
{
CsStatsMap frame_stats_cs;
do {
auto j = _root.sync[cs_index];
auto j = root.sync[cs_index];
auto& cs = blocks[j];
if (cs.node->end() < frame.node->begin())
continue;
@ -1114,23 +1383,28 @@ extern "C" PROFILER_API profiler::block_index_t fillTreesFromStream(std::atomic<
break;
cs.per_frame_stats = update_statistics(frame_stats_cs, cs, cs_index, child_index, blocks);
} while (++cs_index < _root.sync.size());
} while (++cs_index < root.sync.size());
calculate_medians(frame_stats_cs.begin(), frame_stats_cs.end());
}
if (_root.depth < frame.depth)
_root.depth = frame.depth;
if (root.depth < frame.depth)
root.depth = frame.depth;
_root.profiled_time += frame.node->duration();
root.profiled_time += frame.node->duration();
}
++_root.depth;
}, std::ref(root)));
++root.depth;
}));
}
int j = 0, n = static_cast<int>(statistics_threads.size());
for (auto& thread : statistics_threads)
int j = 0, n = static_cast<int>(results.size());
for (auto& result : results)
{
thread.join();
if (result.valid())
{
result.get();
}
progress.store(90 + (10 * ++j) / n, std::memory_order_release);
}
}

View File

@ -2348,20 +2348,20 @@ void BlocksGraphicsView::onIdleTimeout()
++row;
const auto duration = itemBlock.node->duration();
lay->addWidget(new QLabel("Duration:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Time:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, duration, 3), widget), row, 1, 1, 2, Qt::AlignLeft);
++row;
if (itemBlock.per_thread_stats)
{
lay->addWidget(new QLabel("Sum:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Total:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, itemBlock.per_thread_stats->total_duration, 3), widget), row, 1, 1, 2, Qt::AlignLeft);
++row;
lay->addWidget(new BoldLabel("-------- Statistics --------", widget), row, 0, 1, 3, Qt::AlignHCenter);
lay->addWidget(new QLabel("per ", widget), row + 1, 0, Qt::AlignRight);
lay->addWidget(new QLabel("This %:", widget), row + 2, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Sum %:", widget), row + 3, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Total %:", widget), row + 3, 0, Qt::AlignRight);
lay->addWidget(new QLabel("N Calls:", widget), row + 4, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Thread", widget), row + 1, 1, Qt::AlignHCenter);
@ -2436,7 +2436,7 @@ void BlocksGraphicsView::onIdleTimeout()
++row;
const auto duration = itemBlock.node->duration();
lay->addWidget(new QLabel("Duration:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Time:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, duration, 3),
widget), row, 1, 1, 3, Qt::AlignLeft);
++row;
@ -2506,10 +2506,14 @@ void BlocksGraphicsView::onIdleTimeout()
{
const auto duration = itemBlock.node->duration();
lay->addWidget(new QLabel("Average:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Avg:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, itemBlock.per_thread_stats->average_duration(), 3), widget), row, 1, 1, 3, Qt::AlignLeft);
++row;
lay->addWidget(new QLabel("Median:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, itemBlock.per_thread_stats->median_duration, 3), widget), row, 1, 1, 3, Qt::AlignLeft);
++row;
// Calculate idle/active time
{
const auto& threadRoot = item->root();
@ -2555,8 +2559,8 @@ void BlocksGraphicsView::onIdleTimeout()
lay->addWidget(new BoldLabel("-------- Statistics --------", widget), row, 0, 1, 5, Qt::AlignHCenter);
lay->addWidget(new QLabel("per ", widget), row + 1, 0, Qt::AlignRight);
lay->addWidget(new QLabel("This %:", widget), row + 2, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Sum %:", widget), row + 3, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Sum self %:", widget), row + 4, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Total %:", widget), row + 3, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Total self %:", widget), row + 4, 0, Qt::AlignRight);
lay->addWidget(new QLabel("N Calls:", widget), row + 5, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Thread", widget), row + 1, 1, Qt::AlignHCenter);
@ -3174,7 +3178,7 @@ void ThreadNamesWidget::onIdleTimeout()
if (!root.children.empty())
duration = easyBlock(root.children.back()).tree.node->end() - easyBlock(root.children.front()).tree.node->begin();
lay->addWidget(new QLabel("Duration:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel("Time:", widget), row, 0, Qt::AlignRight);
lay->addWidget(new QLabel(profiler_gui::timeStringRealNs(EASY_GLOBALS.time_units, duration, 3), widget), row, 1, Qt::AlignLeft);
++row;