feat(third_party): add oatpp,googltest,benchmark
All checks were successful
sm-rpc / build (Debug, aarch64-linux-gnu) (push) Successful in 1m7s
sm-rpc / build (Debug, arm-linux-gnueabihf) (push) Successful in 1m15s
sm-rpc / build (Debug, host.gcc) (push) Successful in 1m4s
sm-rpc / build (Debug, mipsel-linux-gnu) (push) Successful in 1m16s
sm-rpc / build (Release, aarch64-linux-gnu) (push) Successful in 1m34s
sm-rpc / build (Release, arm-linux-gnueabihf) (push) Successful in 1m33s
sm-rpc / build (Release, host.gcc) (push) Successful in 1m23s
sm-rpc / build (Release, mipsel-linux-gnu) (push) Successful in 1m30s
All checks were successful
sm-rpc / build (Debug, aarch64-linux-gnu) (push) Successful in 1m7s
sm-rpc / build (Debug, arm-linux-gnueabihf) (push) Successful in 1m15s
sm-rpc / build (Debug, host.gcc) (push) Successful in 1m4s
sm-rpc / build (Debug, mipsel-linux-gnu) (push) Successful in 1m16s
sm-rpc / build (Release, aarch64-linux-gnu) (push) Successful in 1m34s
sm-rpc / build (Release, arm-linux-gnueabihf) (push) Successful in 1m33s
sm-rpc / build (Release, host.gcc) (push) Successful in 1m23s
sm-rpc / build (Release, mipsel-linux-gnu) (push) Successful in 1m30s
This commit is contained in:
307
third_party/benchmark/test/perf_counters_gtest.cc
vendored
Normal file
307
third_party/benchmark/test/perf_counters_gtest.cc
vendored
Normal file
@@ -0,0 +1,307 @@
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
#include "../src/perf_counters.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#ifndef GTEST_SKIP
|
||||
struct MsgHandler {
|
||||
void operator=(std::ostream&) {}
|
||||
};
|
||||
#define GTEST_SKIP() return MsgHandler() = std::cout
|
||||
#endif
|
||||
|
||||
using benchmark::internal::PerfCounters;
|
||||
using benchmark::internal::PerfCountersMeasurement;
|
||||
using benchmark::internal::PerfCounterValues;
|
||||
using ::testing::AllOf;
|
||||
using ::testing::Gt;
|
||||
using ::testing::Lt;
|
||||
|
||||
namespace {
|
||||
const char kGenericPerfEvent1[] = "CYCLES";
|
||||
const char kGenericPerfEvent2[] = "INSTRUCTIONS";
|
||||
|
||||
TEST(PerfCountersTest, Init) {
|
||||
EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, OneCounter) {
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Performance counters not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1);
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, NegativeTest) {
|
||||
if (!PerfCounters::kSupported) {
|
||||
EXPECT_FALSE(PerfCounters::Initialize());
|
||||
return;
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
// Safety checks
|
||||
// Create() will always create a valid object, even if passed no or
|
||||
// wrong arguments as the new behavior is to warn and drop unsupported
|
||||
// counters
|
||||
EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0);
|
||||
EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0);
|
||||
EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0);
|
||||
{
|
||||
// Try sneaking in a bad egg to see if it is filtered out. The
|
||||
// number of counters has to be two, not zero
|
||||
auto counter =
|
||||
PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
|
||||
EXPECT_EQ(counter.num_counters(), 2);
|
||||
EXPECT_EQ(counter.names(), std::vector<std::string>(
|
||||
{kGenericPerfEvent2, kGenericPerfEvent1}));
|
||||
}
|
||||
{
|
||||
// Try sneaking in an outrageous counter, like a fat finger mistake
|
||||
auto counter = PerfCounters::Create(
|
||||
{kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1});
|
||||
EXPECT_EQ(counter.num_counters(), 2);
|
||||
EXPECT_EQ(counter.names(), std::vector<std::string>(
|
||||
{kGenericPerfEvent2, kGenericPerfEvent1}));
|
||||
}
|
||||
{
|
||||
// Finally try a golden input - it should like both of them
|
||||
EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2})
|
||||
.num_counters(),
|
||||
2);
|
||||
}
|
||||
{
|
||||
// Add a bad apple in the end of the chain to check the edges
|
||||
auto counter = PerfCounters::Create(
|
||||
{kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"});
|
||||
EXPECT_EQ(counter.num_counters(), 2);
|
||||
EXPECT_EQ(counter.names(), std::vector<std::string>(
|
||||
{kGenericPerfEvent1, kGenericPerfEvent2}));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, Read1Counter) {
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
auto counters = PerfCounters::Create({kGenericPerfEvent1});
|
||||
EXPECT_EQ(counters.num_counters(), 1);
|
||||
PerfCounterValues values1(1);
|
||||
EXPECT_TRUE(counters.Snapshot(&values1));
|
||||
EXPECT_GT(values1[0], 0);
|
||||
PerfCounterValues values2(1);
|
||||
EXPECT_TRUE(counters.Snapshot(&values2));
|
||||
EXPECT_GT(values2[0], 0);
|
||||
EXPECT_GT(values2[0], values1[0]);
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, Read2Counters) {
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
auto counters =
|
||||
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
|
||||
EXPECT_EQ(counters.num_counters(), 2);
|
||||
PerfCounterValues values1(2);
|
||||
EXPECT_TRUE(counters.Snapshot(&values1));
|
||||
EXPECT_GT(values1[0], 0);
|
||||
EXPECT_GT(values1[1], 0);
|
||||
PerfCounterValues values2(2);
|
||||
EXPECT_TRUE(counters.Snapshot(&values2));
|
||||
EXPECT_GT(values2[0], 0);
|
||||
EXPECT_GT(values2[1], 0);
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, ReopenExistingCounters) {
|
||||
// This test works in recent and old Intel hardware, Pixel 3, and Pixel 6.
|
||||
// However we cannot make assumptions beyond 2 HW counters due to Pixel 6.
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
std::vector<std::string> kMetrics({kGenericPerfEvent1});
|
||||
std::vector<PerfCounters> counters(2);
|
||||
for (auto& counter : counters) {
|
||||
counter = PerfCounters::Create(kMetrics);
|
||||
}
|
||||
PerfCounterValues values(1);
|
||||
EXPECT_TRUE(counters[0].Snapshot(&values));
|
||||
EXPECT_TRUE(counters[1].Snapshot(&values));
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, CreateExistingMeasurements) {
|
||||
// The test works (i.e. causes read to fail) for the assumptions
|
||||
// about hardware capabilities (i.e. small number (2) hardware
|
||||
// counters) at this date,
|
||||
// the same as previous test ReopenExistingCounters.
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
|
||||
// This means we will try 10 counters but we can only guarantee
|
||||
// for sure at this time that only 3 will work. Perhaps in the future
|
||||
// we could use libpfm to query for the hardware limits on this
|
||||
// particular platform.
|
||||
const int kMaxCounters = 10;
|
||||
const int kMinValidCounters = 2;
|
||||
|
||||
// Let's use a ubiquitous counter that is guaranteed to work
|
||||
// on all platforms
|
||||
const std::vector<std::string> kMetrics{"cycles"};
|
||||
|
||||
// Cannot create a vector of actual objects because the
|
||||
// copy constructor of PerfCounters is deleted - and so is
|
||||
// implicitly deleted on PerfCountersMeasurement too
|
||||
std::vector<std::unique_ptr<PerfCountersMeasurement>>
|
||||
perf_counter_measurements;
|
||||
|
||||
perf_counter_measurements.reserve(kMaxCounters);
|
||||
for (int j = 0; j < kMaxCounters; ++j) {
|
||||
perf_counter_measurements.emplace_back(
|
||||
new PerfCountersMeasurement(kMetrics));
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, double>> measurements;
|
||||
|
||||
// Start all counters together to see if they hold
|
||||
size_t max_counters = kMaxCounters;
|
||||
for (size_t i = 0; i < kMaxCounters; ++i) {
|
||||
auto& counter(*perf_counter_measurements[i]);
|
||||
EXPECT_EQ(counter.num_counters(), 1);
|
||||
if (!counter.Start()) {
|
||||
max_counters = i;
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
ASSERT_GE(max_counters, kMinValidCounters);
|
||||
|
||||
// Start all together
|
||||
for (size_t i = 0; i < max_counters; ++i) {
|
||||
auto& counter(*perf_counter_measurements[i]);
|
||||
EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
|
||||
}
|
||||
|
||||
// Start/stop individually
|
||||
for (size_t i = 0; i < max_counters; ++i) {
|
||||
auto& counter(*perf_counter_measurements[i]);
|
||||
measurements.clear();
|
||||
counter.Start();
|
||||
EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
|
||||
}
|
||||
}
|
||||
|
||||
// We try to do some meaningful work here but the compiler
|
||||
// insists in optimizing away our loop so we had to add a
|
||||
// no-optimize macro. In case it fails, we added some entropy
|
||||
// to this pool as well.
|
||||
|
||||
BENCHMARK_DONT_OPTIMIZE size_t do_work() {
|
||||
static std::mt19937 rd{std::random_device{}()};
|
||||
static std::uniform_int_distribution<size_t> mrand(0, 10);
|
||||
const size_t kNumLoops = 1000000;
|
||||
size_t sum = 0;
|
||||
for (size_t j = 0; j < kNumLoops; ++j) {
|
||||
sum += mrand(rd);
|
||||
}
|
||||
benchmark::DoNotOptimize(sum);
|
||||
return sum;
|
||||
}
|
||||
|
||||
void measure(size_t threadcount, PerfCounterValues* before,
|
||||
PerfCounterValues* after) {
|
||||
BM_CHECK_NE(before, nullptr);
|
||||
BM_CHECK_NE(after, nullptr);
|
||||
std::vector<std::thread> threads(threadcount);
|
||||
auto work = [&]() { BM_CHECK(do_work() > 1000); };
|
||||
|
||||
// We need to first set up the counters, then start the threads, so the
|
||||
// threads would inherit the counters. But later, we need to first destroy
|
||||
// the thread pool (so all the work finishes), then measure the counters. So
|
||||
// the scopes overlap, and we need to explicitly control the scope of the
|
||||
// threadpool.
|
||||
auto counters =
|
||||
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
|
||||
for (auto& t : threads) t = std::thread(work);
|
||||
counters.Snapshot(before);
|
||||
for (auto& t : threads) t.join();
|
||||
counters.Snapshot(after);
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, MultiThreaded) {
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
PerfCounterValues before(2);
|
||||
PerfCounterValues after(2);
|
||||
|
||||
// Notice that this test will work even if we taskset it to a single CPU
|
||||
// In this case the threads will run sequentially
|
||||
// Start two threads and measure the number of combined cycles and
|
||||
// instructions
|
||||
measure(2, &before, &after);
|
||||
std::vector<double> Elapsed2Threads{
|
||||
static_cast<double>(after[0] - before[0]),
|
||||
static_cast<double>(after[1] - before[1])};
|
||||
|
||||
// Start four threads and measure the number of combined cycles and
|
||||
// instructions
|
||||
measure(4, &before, &after);
|
||||
std::vector<double> Elapsed4Threads{
|
||||
static_cast<double>(after[0] - before[0]),
|
||||
static_cast<double>(after[1] - before[1])};
|
||||
|
||||
// The following expectations fail (at least on a beefy workstation with lots
|
||||
// of cpus) - it seems that in some circumstances the runtime of 4 threads
|
||||
// can even be better than with 2.
|
||||
// So instead of expecting 4 threads to be slower, let's just make sure they
|
||||
// do not differ too much in general (one is not more than 10x than the
|
||||
// other).
|
||||
EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
|
||||
EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, HardwareLimits) {
|
||||
// The test works (i.e. causes read to fail) for the assumptions
|
||||
// about hardware capabilities (i.e. small number (3-4) hardware
|
||||
// counters) at this date,
|
||||
// the same as previous test ReopenExistingCounters.
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
|
||||
// Taken from `perf list`, but focusses only on those HW events that actually
|
||||
// were reported when running `sudo perf stat -a sleep 10`, intersected over
|
||||
// several platforms. All HW events listed in the first command not reported
|
||||
// in the second seem to not work. This is sad as we don't really get to test
|
||||
// the grouping here (groups can contain up to 6 members)...
|
||||
std::vector<std::string> counter_names{
|
||||
"cycles", // leader
|
||||
"instructions", //
|
||||
"branch-misses", //
|
||||
};
|
||||
|
||||
// In the off-chance that some of these values are not supported,
|
||||
// we filter them out so the test will complete without failure
|
||||
// albeit it might not actually test the grouping on that platform
|
||||
std::vector<std::string> valid_names;
|
||||
for (const std::string& name : counter_names) {
|
||||
if (PerfCounters::IsCounterSupported(name)) {
|
||||
valid_names.push_back(name);
|
||||
}
|
||||
}
|
||||
PerfCountersMeasurement counter(valid_names);
|
||||
|
||||
std::vector<std::pair<std::string, double>> measurements;
|
||||
|
||||
counter.Start();
|
||||
EXPECT_TRUE(counter.Stop(measurements));
|
||||
}
|
||||
|
||||
} // namespace
|
Reference in New Issue
Block a user