0
0
mirror of https://github.com/yse/easy_profiler.git synced 2024-12-26 16:11:02 +08:00

Merge pull-request

This commit is contained in:
Sergey Yagovtsev 2016-12-12 02:24:36 +03:00
commit 98b3ac3a8f
2 changed files with 86 additions and 13 deletions

View File

@ -1,5 +1,9 @@
project(easy_profiler)
IF (NOT DEFINED LIB_NAME)
SET(LIB_NAME ${PROJECT_NAME})
ENDIF()
set(CPP_FILES
block.cpp
profile_manager.cpp
@ -85,10 +89,10 @@ if(WIN32)
)
endif(WIN32)
add_library(${PROJECT_NAME} SHARED ${SOURCES} resources.rc)
add_library(${LIB_NAME} SHARED ${SOURCES} resources.rc)
if(UNIX)
set(PLATFORM_LIBS pthread)
endif(UNIX)
target_link_libraries(${PROJECT_NAME} ${PLATFORM_LIBS})
target_link_libraries(${LIB_NAME} ${PLATFORM_LIBS})

View File

@ -40,6 +40,9 @@ along with this program.If not, see <http://www.gnu.org/licenses/>.
#else
#include <chrono>
#include <time.h>
#ifdef __ARM_ARCH
#include <sys/time.h>
#endif//__ARM_ARCH
#endif
static inline profiler::timestamp_t getCurrentTime()
@ -50,23 +53,89 @@ static inline profiler::timestamp_t getCurrentTime()
if (!QueryPerformanceCounter(&elapsedMicroseconds))
return 0;
return (profiler::timestamp_t)elapsedMicroseconds.QuadPart;
#else
#else// not _WIN32
#if (defined(__GNUC__) || defined(__ICC))
// part of code from google/benchmark library (Licensed under the Apache License, Version 2.0)
// see https://github.com/google/benchmark/blob/master/src/cycleclock.h#L111
#if defined(__i386__)
unsigned long long t;
__asm__ __volatile__("rdtsc" : "=A"(t));
return t;
#elif defined(__x86_64__)
unsigned int hi, lo;
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
return ((uint64_t)hi << 32) | lo;
#endif
int64_t ret;
__asm__ volatile("rdtsc" : "=A"(ret));
return ret;
#elif defined(__x86_64__) || defined(__amd64__)
uint64_t low, high;
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
return (high << 32) | low;
#elif defined(__powerpc__) || defined(__ppc__)
// This returns a time-base, which is not always precisely a cycle-count.
int64_t tbl, tbu0, tbu1;
asm("mftbu %0" : "=r"(tbu0));
asm("mftb %0" : "=r"(tbl));
asm("mftbu %0" : "=r"(tbu1));
tbl &= -static_cast<int64>(tbu0 == tbu1);
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
return (tbu1 << 32) | tbl;
#elif defined(__sparc__)
int64_t tick;
asm(".byte 0x83, 0x41, 0x00, 0x00");
asm("mov %%g1, %0" : "=r"(tick));
return tick;
#elif defined(__ia64__)
int64_t itc;
asm("mov %0 = ar.itc" : "=r"(itc));
return itc;
#elif defined(COMPILER_MSVC) && defined(_M_IX86)
// Older MSVC compilers (like 7.x) don't seem to support the
// __rdtsc intrinsic properly, so I prefer to use _asm instead
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
#elif defined(COMPILER_MSVC)
return __rdtsc();
#elif defined(__aarch64__)
// System timer of ARMv8 runs at a different frequency than the CPU's.
// The frequency is fixed, typically in the range 1-50MHz. It can be
// read at CNTFRQ special register. We assume the OS has set up
// the virtual timer properly.
int64_t virtual_timer_value;
asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
return virtual_timer_value;
#elif defined(__ARM_ARCH)
#if (__ARM_ARCH >= 6) // V6 is the earliest arch that has a standard cyclecount
uint32_t pmccntr;
uint32_t pmuseren;
uint32_t pmcntenset;
// Read the user mode perf monitor counter access permissions.
asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
if (pmcntenset & 0x80000000ul) { // Is it counting?
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
// The counter is set up to count every 64th cycle
return static_cast<int64_t>(pmccntr) * 64; // Should optimize to << 6
}
}
#endif
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__mips__)
// mips apparently only allows rdtsc for superusers, so we fall
// back to gettimeofday. It's possible clock_gettime would be better.
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#else
#warning You need to define fast getCurrentTime() for your OS and CPU
return std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
#define USE_STD_CHRONO
#endif
#else
#else // not _WIN32, __GNUC__, __ICC
#warning You need to define fast getCurrentTime() for your OS and CPU
return std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now()).time_since_epoch().count();
#define USE_STD_CHRONO
#define USE_STD_CHRONO
#endif
#endif