diff --git a/easy_profiler_core/CMakeLists.txt b/easy_profiler_core/CMakeLists.txt index 0252192..b49ff27 100644 --- a/easy_profiler_core/CMakeLists.txt +++ b/easy_profiler_core/CMakeLists.txt @@ -1,5 +1,9 @@ project(easy_profiler) +IF (NOT DEFINED LIB_NAME) + SET(LIB_NAME ${PROJECT_NAME}) +ENDIF() + set(CPP_FILES block.cpp profile_manager.cpp @@ -85,10 +89,10 @@ if(WIN32) ) endif(WIN32) -add_library(${PROJECT_NAME} SHARED ${SOURCES} resources.rc) +add_library(${LIB_NAME} SHARED ${SOURCES} resources.rc) if(UNIX) set(PLATFORM_LIBS pthread) endif(UNIX) -target_link_libraries(${PROJECT_NAME} ${PLATFORM_LIBS}) +target_link_libraries(${LIB_NAME} ${PLATFORM_LIBS}) diff --git a/easy_profiler_core/current_time.h b/easy_profiler_core/current_time.h index 3579323..d7492ae 100644 --- a/easy_profiler_core/current_time.h +++ b/easy_profiler_core/current_time.h @@ -40,6 +40,9 @@ along with this program.If not, see . #else #include #include +#ifdef __ARM_ARCH +#include +#endif//__ARM_ARCH #endif static inline profiler::timestamp_t getCurrentTime() @@ -50,23 +53,89 @@ static inline profiler::timestamp_t getCurrentTime() if (!QueryPerformanceCounter(&elapsedMicroseconds)) return 0; return (profiler::timestamp_t)elapsedMicroseconds.QuadPart; -#else +#else// not _WIN32 #if (defined(__GNUC__) || defined(__ICC)) + // part of code from google/benchmark library (Licensed under the Apache License, Version 2.0) + // see https://github.com/google/benchmark/blob/master/src/cycleclock.h#L111 #if defined(__i386__) - unsigned long long t; - __asm__ __volatile__("rdtsc" : "=A"(t)); - return t; - #elif defined(__x86_64__) - unsigned int hi, lo; - __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); - return ((uint64_t)hi << 32) | lo; - #endif + int64_t ret; + __asm__ volatile("rdtsc" : "=A"(ret)); + return ret; + #elif defined(__x86_64__) || defined(__amd64__) + uint64_t low, high; + __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); + return (high << 32) | low; + #elif defined(__powerpc__) || defined(__ppc__) + // This returns a time-base, which is not always precisely a cycle-count. + int64_t tbl, tbu0, tbu1; + asm("mftbu %0" : "=r"(tbu0)); + asm("mftb %0" : "=r"(tbl)); + asm("mftbu %0" : "=r"(tbu1)); + tbl &= -static_cast(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) + return (tbu1 << 32) | tbl; + #elif defined(__sparc__) + int64_t tick; + asm(".byte 0x83, 0x41, 0x00, 0x00"); + asm("mov %%g1, %0" : "=r"(tick)); + return tick; + #elif defined(__ia64__) + int64_t itc; + asm("mov %0 = ar.itc" : "=r"(itc)); + return itc; + #elif defined(COMPILER_MSVC) && defined(_M_IX86) + // Older MSVC compilers (like 7.x) don't seem to support the + // __rdtsc intrinsic properly, so I prefer to use _asm instead + // when I know it will work. Otherwise, I'll use __rdtsc and hope + // the code is being compiled with a non-ancient compiler. + _asm rdtsc + #elif defined(COMPILER_MSVC) + return __rdtsc(); + #elif defined(__aarch64__) + // System timer of ARMv8 runs at a different frequency than the CPU's. + // The frequency is fixed, typically in the range 1-50MHz. It can be + // read at CNTFRQ special register. We assume the OS has set up + // the virtual timer properly. + int64_t virtual_timer_value; + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); + return virtual_timer_value; + #elif defined(__ARM_ARCH) + #if (__ARM_ARCH >= 6) // V6 is the earliest arch that has a standard cyclecount + uint32_t pmccntr; + uint32_t pmuseren; + uint32_t pmcntenset; + // Read the user mode perf monitor counter access permissions. + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); + // The counter is set up to count every 64th cycle + return static_cast(pmccntr) * 64; // Should optimize to << 6 + } + } + #endif + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; + #elif defined(__mips__) + // mips apparently only allows rdtsc for superusers, so we fall + // back to gettimeofday. It's possible clock_gettime would be better. + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; + #else + #warning You need to define fast getCurrentTime() for your OS and CPU + return std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); + #define USE_STD_CHRONO + #endif -#else +#else // not _WIN32, __GNUC__, __ICC + #warning You need to define fast getCurrentTime() for your OS and CPU return std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); -#define USE_STD_CHRONO + #define USE_STD_CHRONO #endif #endif