mirror of
https://github.com/yse/easy_profiler.git
synced 2024-12-28 01:04:41 +08:00
Scrapped the alignment macros and replaced them with force-inline template functions; still need to test thoroughly.
This commit is contained in:
parent
75e4f35abe
commit
283b835dd8
@ -56,6 +56,7 @@ The Apache License, Version 2.0 (the "License");
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <list>
|
#include <list>
|
||||||
|
#include <type_traits>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
|
||||||
@ -145,50 +146,227 @@ namespace profiler {
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Useful on architectures that don't allow unaligned access (e.g. ARMv5).
|
|
||||||
// If this isn't defined, expressions like: *(uint16_t*)data = 0, will be used
|
|
||||||
// as usual, which is what you want on something like x86.
|
|
||||||
#ifdef EASY_ENABLE_STRICT_ALIGNMENT
|
|
||||||
# define EASY_UA_ZERO16(dst)\
|
|
||||||
do {\
|
|
||||||
((char*)dst)[0] = 0;\
|
|
||||||
((char*)dst)[1] = 0;\
|
|
||||||
} while(false)
|
|
||||||
|
|
||||||
# define EASY_UA_ZERO32(dst)\
|
template <typename T>
|
||||||
do {\
|
struct False : std::false_type {};
|
||||||
((char*)dst)[0] = 0;\
|
|
||||||
((char*)dst)[1] = 0;\
|
|
||||||
((char*)dst)[2] = 0;\
|
|
||||||
((char*)dst)[3] = 0;\
|
|
||||||
} while(false)
|
|
||||||
|
|
||||||
// Assumes that unaligned access is more common.
|
//! Checks if a pointer is aligned.
|
||||||
# define EASY_UA_ZERO64(dst)\
|
//! \param ptr The pointer to check.
|
||||||
do {\
|
//! \param alignment The alignement (must be a power of 2)
|
||||||
if (((unsigned long)dst & 7) != 0) {\
|
//! \returns true if the memory is aligned.
|
||||||
((char*)dst)[0] = 0;\
|
//!
|
||||||
((char*)dst)[1] = 0;\
|
template <uint32_t ALIGNMENT>
|
||||||
((char*)dst)[2] = 0;\
|
EASY_FORCE_INLINE bool is_aligned(void* ptr)
|
||||||
((char*)dst)[3] = 0;\
|
{
|
||||||
((char*)dst)[4] = 0;\
|
static_assert(ALIGNMENT % 2 == 0, "Alignment must be a power of two.");
|
||||||
((char*)dst)[5] = 0;\
|
return (uintptr_t)ptr & (ALIGNMENT-1) == 0;
|
||||||
((char*)dst)[6] = 0;\
|
}
|
||||||
((char*)dst)[7] = 0;\
|
|
||||||
}\
|
|
||||||
else {\
|
|
||||||
*(uint64_t*)dst = 0;\
|
|
||||||
}\
|
|
||||||
} while(false)
|
|
||||||
|
|
||||||
# define EASY_UA_SET16(dst, val, type)
|
EASY_FORCE_INLINE void unaligned_zero16(void* ptr)
|
||||||
# define EASY_UA_SET32(dst, val, type)
|
{
|
||||||
# define EASY_UA_SET64(dst, val, type)
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
*(uint16_t*)ptr = 0;
|
||||||
# define EASY_UA_LOAD16(dst)
|
#else
|
||||||
# define EASY_UA_LOAD32(dst)
|
((char*)ptr)[0] = 0;
|
||||||
# define EASY_UA_LOAD64(dst)
|
((char*)ptr)[1] = 0;
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
EASY_FORCE_INLINE void unaligned_zero32(void* ptr)
|
||||||
|
{
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
*(uint32_t*)ptr = 0;
|
||||||
|
#else
|
||||||
|
((char*)ptr)[0] = 0;
|
||||||
|
((char*)ptr)[1] = 0;
|
||||||
|
((char*)ptr)[2] = 0;
|
||||||
|
((char*)ptr)[3] = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
EASY_FORCE_INLINE void unaligned_zero64(void* ptr)
|
||||||
|
{
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
*(uint64_t*)ptr = 0;
|
||||||
|
#else
|
||||||
|
// Assume unaligned is more common.
|
||||||
|
if (!is_aligned<alignof(uint64_t)>(ptr)) {
|
||||||
|
((char*)ptr)[0] = 0;
|
||||||
|
((char*)ptr)[1] = 0;
|
||||||
|
((char*)ptr)[2] = 0;
|
||||||
|
((char*)ptr)[3] = 0;
|
||||||
|
((char*)ptr)[4] = 0;
|
||||||
|
((char*)ptr)[5] = 0;
|
||||||
|
((char*)ptr)[6] = 0;
|
||||||
|
((char*)ptr)[7] = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*(uint64_t*)ptr = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EASY_FORCE_INLINE void unaligned_store16(void* ptr, T val)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(T) == 2, "16 bit type required.");
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
*(T*)ptr = val;
|
||||||
|
#else
|
||||||
|
const char* const temp = &val;
|
||||||
|
((char*)ptr)[0] = temp[0];
|
||||||
|
((char*)ptr)[1] = temp[1];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EASY_FORCE_INLINE void unaligned_store32(void* ptr, T val)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(T) == 4, "32 bit type required.");
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
*(T*)ptr = val;
|
||||||
|
#else
|
||||||
|
const char* const temp = &val;
|
||||||
|
((char*)ptr)[0] = temp[0];
|
||||||
|
((char*)ptr)[1] = temp[1];
|
||||||
|
((char*)ptr)[2] = temp[2];
|
||||||
|
((char*)ptr)[3] = temp[3];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EASY_FORCE_INLINE void unaligned_store64(void* ptr, T val)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(T) == 8, "64 bit type required.");
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
*(T*)ptr = val;
|
||||||
|
#else
|
||||||
|
const char* const temp = &val;
|
||||||
|
// Assume unaligned is more common.
|
||||||
|
if (!is_aligned<alignof(T)>(ptr)) {
|
||||||
|
((char*)ptr)[0] = temp[0];
|
||||||
|
((char*)ptr)[1] = temp[1];
|
||||||
|
((char*)ptr)[2] = temp[2];
|
||||||
|
((char*)ptr)[3] = temp[3];
|
||||||
|
((char*)ptr)[4] = temp[4];
|
||||||
|
((char*)ptr)[5] = temp[5];
|
||||||
|
((char*)ptr)[6] = temp[6];
|
||||||
|
((char*)ptr)[7] = temp[7];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*(T*)ptr = val;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EASY_FORCE_INLINE T unaligned_load16(const void* ptr)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(T) == 2, "16 bit type required.");
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
return *(T*)ptr;
|
||||||
|
#else
|
||||||
|
T value;
|
||||||
|
((char*)&value)[0] = ((char*)ptr)[0];
|
||||||
|
((char*)&value)[1] = ((char*)ptr)[1];
|
||||||
|
return value;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EASY_FORCE_INLINE T unaligned_load16(const void* ptr, T* val)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(T) == 2, "16 bit type required.");
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
*val = *(T*)ptr;
|
||||||
|
return *val;
|
||||||
|
#else
|
||||||
|
((char*)val)[0] = ((char*)ptr)[0];
|
||||||
|
((char*)val)[1] = ((char*)ptr)[1];
|
||||||
|
return *val;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EASY_FORCE_INLINE T unaligned_load32(const void* ptr)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(T) == 4, "32 bit type required.");
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
return *(T*)ptr;
|
||||||
|
#else
|
||||||
|
T value;
|
||||||
|
((char*)&value)[0] = ((char*)ptr)[0];
|
||||||
|
((char*)&value)[1] = ((char*)ptr)[1];
|
||||||
|
((char*)&value)[2] = ((char*)ptr)[2];
|
||||||
|
((char*)&value)[3] = ((char*)ptr)[3];
|
||||||
|
return value;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EASY_FORCE_INLINE T unaligned_load32(const void* ptr, T* val)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(T) == 4, "32 bit type required.");
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
*val = *(T*)ptr;
|
||||||
|
#else
|
||||||
|
((char*)&val)[0] = ((char*)ptr)[0];
|
||||||
|
((char*)&val)[1] = ((char*)ptr)[1];
|
||||||
|
((char*)&val)[2] = ((char*)ptr)[2];
|
||||||
|
((char*)&val)[3] = ((char*)ptr)[3];
|
||||||
|
return *val;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EASY_FORCE_INLINE T unaligned_load64(const void* ptr)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(T) == 8, "64 bit type required.");
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
return *(T*)ptr;
|
||||||
|
#else
|
||||||
|
if (!is_aligned<alignof(T)>(ptr)) {
|
||||||
|
T value;
|
||||||
|
((char*)&value)[0] = ((char*)ptr)[0];
|
||||||
|
((char*)&value)[1] = ((char*)ptr)[1];
|
||||||
|
((char*)&value)[2] = ((char*)ptr)[2];
|
||||||
|
((char*)&value)[3] = ((char*)ptr)[3];
|
||||||
|
((char*)&value)[4] = ((char*)ptr)[4];
|
||||||
|
((char*)&value)[5] = ((char*)ptr)[5];
|
||||||
|
((char*)&value)[6] = ((char*)ptr)[6];
|
||||||
|
((char*)&value)[7] = ((char*)ptr)[7];
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return *(T*)ptr;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EASY_FORCE_INLINE T unaligned_load64(const void* ptr, T* val)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(T) == 8, "64 bit type required.");
|
||||||
|
#ifndef EASY_ENABLE_STRICT_ALIGNMENT
|
||||||
|
*val = *(T*)ptr;
|
||||||
|
#else
|
||||||
|
if (!is_aligned<alignof(T)>(ptr)) {
|
||||||
|
((char*)&val)[0] = ((char*)ptr)[0];
|
||||||
|
((char*)&val)[1] = ((char*)ptr)[1];
|
||||||
|
((char*)&val)[2] = ((char*)ptr)[2];
|
||||||
|
((char*)&val)[3] = ((char*)ptr)[3];
|
||||||
|
((char*)&val)[4] = ((char*)ptr)[4];
|
||||||
|
((char*)&val)[5] = ((char*)ptr)[5];
|
||||||
|
((char*)&val)[6] = ((char*)ptr)[6];
|
||||||
|
((char*)&val)[7] = ((char*)ptr)[7];
|
||||||
|
return *val;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*val = *(T*)ptr;
|
||||||
|
return *val;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <uint16_t N>
|
template <uint16_t N>
|
||||||
@ -227,9 +405,9 @@ class chunk_allocator
|
|||||||
// Although there is no need for unaligned access stuff b/c a new chunk will
|
// Although there is no need for unaligned access stuff b/c a new chunk will
|
||||||
// usually be at least 8 byte aligned (and we only need 2 byte alignment),
|
// usually be at least 8 byte aligned (and we only need 2 byte alignment),
|
||||||
// this is the only way I have been able to get rid of the GCC strict-aliasing warning
|
// this is the only way I have been able to get rid of the GCC strict-aliasing warning
|
||||||
// without using std::memset.
|
// without using std::memset. It's an extra line, but is just as fast as *(uint16_t*)last->data = 0;
|
||||||
char* temp = (char*)&last->data[0];
|
char* const data = (char*)&last->data;
|
||||||
*(uint16_t*)temp = 0;
|
*(uint16_t*)data = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Invert current chunks list to enable to iterate over chunks list in direct order.
|
/** Invert current chunks list to enable to iterate over chunks list in direct order.
|
||||||
@ -280,9 +458,14 @@ public:
|
|||||||
char* data = (char*)&m_chunks.back().data[0] + chunkOffset;
|
char* data = (char*)&m_chunks.back().data[0] + chunkOffset;
|
||||||
m_chunkOffset = chunkOffset + n + sizeof(uint16_t);
|
m_chunkOffset = chunkOffset + n + sizeof(uint16_t);
|
||||||
|
|
||||||
std::memcpy(data, &n, sizeof(uint16_t));
|
unaligned_store16(data, n);
|
||||||
data += sizeof(uint16_t);
|
data += sizeof(uint16_t);
|
||||||
std::memset(data + n, 0, sizeof(uint16_t));
|
|
||||||
|
// If there is enough space for at least another payload size,
|
||||||
|
// set it to zero.
|
||||||
|
if (chunkOffset < N-1)
|
||||||
|
unaligned_zero16(data + n);
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -293,12 +476,13 @@ public:
|
|||||||
|
|
||||||
char* data = (char*)&m_chunks.back().data[0];
|
char* data = (char*)&m_chunks.back().data[0];
|
||||||
std::memcpy(data, &n, sizeof(uint16_t));
|
std::memcpy(data, &n, sizeof(uint16_t));
|
||||||
|
unaligned_store16(data, n);
|
||||||
data += sizeof(uint16_t);
|
data += sizeof(uint16_t);
|
||||||
|
|
||||||
// If there is enough space for at least another payload size,
|
// If there is enough space for at least another payload size,
|
||||||
// set it to zero.
|
// set it to zero.
|
||||||
if (chunkOffset < N-1)
|
if (chunkOffset < N-1)
|
||||||
std::memset(data + n, 0, sizeof(uint16_t));
|
unaligned_zero16(data + n);
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
@ -353,15 +537,12 @@ public:
|
|||||||
do {
|
do {
|
||||||
const char* data = (char*)current->data;
|
const char* data = (char*)current->data;
|
||||||
int_fast32_t chunkOffset = 0; // signed int so overflow is not checked.
|
int_fast32_t chunkOffset = 0; // signed int so overflow is not checked.
|
||||||
uint16_t payloadSize = 0;
|
uint16_t payloadSize = unaligned_load16<uint16_t>(data);
|
||||||
std::memcpy(&payloadSize, data, sizeof(uint16_t));
|
while (chunkOffset < MAX_CHUNK_OFFSET && unaligned_load16(data, &payloadSize) != 0) {
|
||||||
while (chunkOffset < MAX_CHUNK_OFFSET && payloadSize != 0) {
|
|
||||||
const uint16_t chunkSize = sizeof(uint16_t) + payloadSize;
|
const uint16_t chunkSize = sizeof(uint16_t) + payloadSize;
|
||||||
_outputStream.write(data, chunkSize);
|
_outputStream.write(data, chunkSize);
|
||||||
|
|
||||||
data += chunkSize;
|
data += chunkSize;
|
||||||
chunkOffset += chunkSize;
|
chunkOffset += chunkSize;
|
||||||
std::memcpy(&payloadSize, data, sizeof(uint16_t));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
current = current->prev;
|
current = current->prev;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user