From c7b9d4f464385fa1cc8067899a77dccb1af27dbf Mon Sep 17 00:00:00 2001 From: Victor Zarubkin Date: Sat, 30 Sep 2017 19:51:33 +0300 Subject: [PATCH] #0 [Core] Refactoring: moved a lot of code to different source files --- easy_profiler_core/CMakeLists.txt | 20 +- easy_profiler_core/chunk_allocator.h | 507 ++++++++++++++++++ easy_profiler_core/current_thread.h | 79 +++ easy_profiler_core/event_trace_win.cpp | 10 +- easy_profiler_core/nonscoped_block.cpp | 91 ++++ easy_profiler_core/nonscoped_block.h | 73 +++ easy_profiler_core/profile_manager.cpp | 139 +---- easy_profiler_core/profile_manager.h | 694 +------------------------ easy_profiler_core/stack_buffer.h | 136 +++++ easy_profiler_core/thread_storage.cpp | 121 +++++ easy_profiler_core/thread_storage.h | 124 +++++ 11 files changed, 1162 insertions(+), 832 deletions(-) create mode 100644 easy_profiler_core/chunk_allocator.h create mode 100644 easy_profiler_core/current_thread.h create mode 100644 easy_profiler_core/nonscoped_block.cpp create mode 100644 easy_profiler_core/nonscoped_block.h create mode 100644 easy_profiler_core/stack_buffer.h create mode 100644 easy_profiler_core/thread_storage.cpp create mode 100644 easy_profiler_core/thread_storage.h diff --git a/easy_profiler_core/CMakeLists.txt b/easy_profiler_core/CMakeLists.txt index 8cce6e7..7ead8fb 100644 --- a/easy_profiler_core/CMakeLists.txt +++ b/easy_profiler_core/CMakeLists.txt @@ -109,25 +109,31 @@ message(STATUS "") # Add source files: set(CPP_FILES block.cpp + easy_socket.cpp + event_trace_win.cpp + nonscoped_block.cpp profile_manager.cpp reader.cpp - event_trace_win.cpp - easy_socket.cpp + thread_storage.cpp ) set(H_FILES - profile_manager.h - spin_lock.h - event_trace_win.h + chunk_allocator.h current_time.h + current_thread.h + event_trace_win.h + nonscoped_block.h + profile_manager.h + thread_storage.h + spin_lock.h + stack_buffer.h ) set(INCLUDE_FILES - include/easy/profiler.h - include/easy/reader.h include/easy/easy_net.h include/easy/easy_socket.h include/easy/easy_compiler_support.h + include/easy/profiler.h include/easy/profiler_aux.h include/easy/profiler_colors.h include/easy/reader.h diff --git a/easy_profiler_core/chunk_allocator.h b/easy_profiler_core/chunk_allocator.h new file mode 100644 index 0000000..0fa2c0d --- /dev/null +++ b/easy_profiler_core/chunk_allocator.h @@ -0,0 +1,507 @@ +/** +Lightweight profiler library for c++ +Copyright(C) 2016-2017 Sergey Yagovtsev, Victor Zarubkin + +Licensed under either of + * MIT license (LICENSE.MIT or http://opensource.org/licenses/MIT) + * Apache License, Version 2.0, (LICENSE.APACHE or http://www.apache.org/licenses/LICENSE-2.0) +at your option. + +The MIT License + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished + to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. + + +The Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +**/ + +#ifndef EASY_PROFILER_CHUNK_ALLOCATOR_H +#define EASY_PROFILER_CHUNK_ALLOCATOR_H + +#include +#include +#include +#include +#include "outstream.h" + +////////////////////////////////////////////////////////////////////////// + +#ifndef EASY_ENABLE_ALIGNMENT +# define EASY_ENABLE_ALIGNMENT 0 +#endif + +#ifndef EASY_ALIGNMENT_SIZE +# define EASY_ALIGNMENT_SIZE alignof(std::max_align_t) +#endif + +#if EASY_ENABLE_ALIGNMENT == 0 +# define EASY_ALIGNED(TYPE, VAR, A) TYPE VAR +# define EASY_MALLOC(MEMSIZE, A) malloc(MEMSIZE) +# define EASY_FREE(MEMPTR) free(MEMPTR) +#else +# if defined(_MSC_VER) +# define EASY_ALIGNED(TYPE, VAR, A) __declspec(align(A)) TYPE VAR +# define EASY_MALLOC(MEMSIZE, A) _aligned_malloc(MEMSIZE, A) +# define EASY_FREE(MEMPTR) _aligned_free(MEMPTR) +# elif defined(__GNUC__) +# define EASY_ALIGNED(TYPE, VAR, A) TYPE VAR __attribute__((aligned(A))) +# define EASY_MALLOC(MEMSIZE, A) memalign(A, MEMSIZE) +# define EASY_FREE(MEMPTR) free(MEMPTR) +# else +# define EASY_ALIGNED(TYPE, VAR, A) TYPE VAR +# define EASY_MALLOC(MEMSIZE, A) malloc(MEMSIZE) +# define EASY_FREE(MEMPTR) free(MEMPTR) +# endif +#endif + +////////////////////////////////////////////////////////////////////////// + +//! Checks if a pointer is aligned. +//! \param ptr The pointer to check. +//! \param alignment The alignement (must be a power of 2) +//! \returns true if the memory is aligned. +//! +template +EASY_FORCE_INLINE bool is_aligned(void* ptr) +{ + static_assert(ALIGNMENT % 2 == 0, "Alignment must be a power of two."); + return ((uintptr_t)ptr & (ALIGNMENT-1)) == 0; +} + +EASY_FORCE_INLINE void unaligned_zero16(void* ptr) +{ +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + *(uint16_t*)ptr = 0; +#else + ((char*)ptr)[0] = 0; + ((char*)ptr)[1] = 0; +#endif +} + +EASY_FORCE_INLINE void unaligned_zero32(void* ptr) +{ +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + *(uint32_t*)ptr = 0; +#else + ((char*)ptr)[0] = 0; + ((char*)ptr)[1] = 0; + ((char*)ptr)[2] = 0; + ((char*)ptr)[3] = 0; +#endif +} + +EASY_FORCE_INLINE void unaligned_zero64(void* ptr) +{ +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + *(uint64_t*)ptr = 0; +#else + // Assume unaligned is more common. + if (!is_aligned(ptr)) { + ((char*)ptr)[0] = 0; + ((char*)ptr)[1] = 0; + ((char*)ptr)[2] = 0; + ((char*)ptr)[3] = 0; + ((char*)ptr)[4] = 0; + ((char*)ptr)[5] = 0; + ((char*)ptr)[6] = 0; + ((char*)ptr)[7] = 0; + } + else { + *(uint64_t*)ptr = 0; + } +#endif +} + +template +EASY_FORCE_INLINE void unaligned_store16(void* ptr, T val) +{ + static_assert(sizeof(T) == 2, "16 bit type required."); +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + *(T*)ptr = val; +#else + const char* const temp = (char*)&val; + ((char*)ptr)[0] = temp[0]; + ((char*)ptr)[1] = temp[1]; +#endif +} + +template +EASY_FORCE_INLINE void unaligned_store32(void* ptr, T val) +{ + static_assert(sizeof(T) == 4, "32 bit type required."); +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + *(T*)ptr = val; +#else + const char* const temp = (char*)&val; + ((char*)ptr)[0] = temp[0]; + ((char*)ptr)[1] = temp[1]; + ((char*)ptr)[2] = temp[2]; + ((char*)ptr)[3] = temp[3]; +#endif +} + +template +EASY_FORCE_INLINE void unaligned_store64(void* ptr, T val) +{ + static_assert(sizeof(T) == 8, "64 bit type required."); +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + *(T*)ptr = val; +#else + const char* const temp = (char*)&val; + // Assume unaligned is more common. + if (!is_aligned(ptr)) { + ((char*)ptr)[0] = temp[0]; + ((char*)ptr)[1] = temp[1]; + ((char*)ptr)[2] = temp[2]; + ((char*)ptr)[3] = temp[3]; + ((char*)ptr)[4] = temp[4]; + ((char*)ptr)[5] = temp[5]; + ((char*)ptr)[6] = temp[6]; + ((char*)ptr)[7] = temp[7]; + } + else { + *(T*)ptr = val; + } +#endif +} + +template +EASY_FORCE_INLINE T unaligned_load16(const void* ptr) +{ + static_assert(sizeof(T) == 2, "16 bit type required."); +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + return *(T*)ptr; +#else + T value; + ((char*)&value)[0] = ((char*)ptr)[0]; + ((char*)&value)[1] = ((char*)ptr)[1]; + return value; +#endif +} + +template +EASY_FORCE_INLINE T unaligned_load16(const void* ptr, T* val) +{ + static_assert(sizeof(T) == 2, "16 bit type required."); +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + *val = *(T*)ptr; + return *val; +#else + ((char*)val)[0] = ((char*)ptr)[0]; + ((char*)val)[1] = ((char*)ptr)[1]; + return *val; +#endif +} + +template +EASY_FORCE_INLINE T unaligned_load32(const void* ptr) +{ + static_assert(sizeof(T) == 4, "32 bit type required."); +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + return *(T*)ptr; +#else + T value; + ((char*)&value)[0] = ((char*)ptr)[0]; + ((char*)&value)[1] = ((char*)ptr)[1]; + ((char*)&value)[2] = ((char*)ptr)[2]; + ((char*)&value)[3] = ((char*)ptr)[3]; + return value; +#endif +} + +template +EASY_FORCE_INLINE T unaligned_load32(const void* ptr, T* val) +{ + static_assert(sizeof(T) == 4, "32 bit type required."); +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + *val = *(T*)ptr; +#else + ((char*)&val)[0] = ((char*)ptr)[0]; + ((char*)&val)[1] = ((char*)ptr)[1]; + ((char*)&val)[2] = ((char*)ptr)[2]; + ((char*)&val)[3] = ((char*)ptr)[3]; + return *val; +#endif +} + +template +EASY_FORCE_INLINE T unaligned_load64(const void* ptr) +{ + static_assert(sizeof(T) == 8, "64 bit type required."); +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + return *(T*)ptr; +#else + if (!is_aligned(ptr)) { + T value; + ((char*)&value)[0] = ((char*)ptr)[0]; + ((char*)&value)[1] = ((char*)ptr)[1]; + ((char*)&value)[2] = ((char*)ptr)[2]; + ((char*)&value)[3] = ((char*)ptr)[3]; + ((char*)&value)[4] = ((char*)ptr)[4]; + ((char*)&value)[5] = ((char*)ptr)[5]; + ((char*)&value)[6] = ((char*)ptr)[6]; + ((char*)&value)[7] = ((char*)ptr)[7]; + return value; + } + else { + return *(T*)ptr; + } +#endif +} + +template +EASY_FORCE_INLINE T unaligned_load64(const void* ptr, T* val) +{ + static_assert(sizeof(T) == 8, "64 bit type required."); +#ifndef EASY_ENABLE_STRICT_ALIGNMENT + *val = *(T*)ptr; +#else + if (!is_aligned(ptr)) { + ((char*)&val)[0] = ((char*)ptr)[0]; + ((char*)&val)[1] = ((char*)ptr)[1]; + ((char*)&val)[2] = ((char*)ptr)[2]; + ((char*)&val)[3] = ((char*)ptr)[3]; + ((char*)&val)[4] = ((char*)ptr)[4]; + ((char*)&val)[5] = ((char*)ptr)[5]; + ((char*)&val)[6] = ((char*)ptr)[6]; + ((char*)&val)[7] = ((char*)ptr)[7]; + return *val; + } + else { + *val = *(T*)ptr; + return *val; + } +#endif +} + +////////////////////////////////////////////////////////////////////////// + +template +class chunk_allocator +{ + struct chunk { EASY_ALIGNED(char, data[N], EASY_ALIGNMENT_SIZE); chunk* prev = nullptr; }; + + struct chunk_list + { + chunk* last; + + chunk_list() : last(nullptr) + { + static_assert(sizeof(char) == 1, "easy_profiler logic error: sizeof(char) != 1 for this platform! Please, contact easy_profiler authors to resolve your problem."); + emplace_back(); + } + + ~chunk_list() + { + do free_last(); while (last != nullptr); + } + + void clear_all_except_last() + { + while (last->prev != nullptr) + free_last(); + zero_last_chunk_size(); + } + + void emplace_back() + { + auto prev = last; + last = ::new (EASY_MALLOC(sizeof(chunk), EASY_ALIGNMENT_SIZE)) chunk(); + last->prev = prev; + zero_last_chunk_size(); + } + + /** Invert current chunks list to enable to iterate over chunks list in direct order. + + This method is used by serialize(). + */ + void invert() + { + chunk* next = nullptr; + + while (last->prev != nullptr) { + auto p = last->prev; + last->prev = next; + next = last; + last = p; + } + + last->prev = next; + } + + private: + + chunk_list(const chunk_list&) = delete; + chunk_list(chunk_list&&) = delete; + + void free_last() + { + auto p = last; + last = last->prev; + EASY_FREE(p); + } + + void zero_last_chunk_size() + { + // Although there is no need for unaligned access stuff b/c a new chunk will + // usually be at least 8 byte aligned (and we only need 2 byte alignment), + // this is the only way I have been able to get rid of the GCC strict-aliasing warning + // without using std::memset. It's an extra line, but is just as fast as *(uint16_t*)last->data = 0; + char* const data = last->data; + *(uint16_t*)data = (uint16_t)0; + } + }; + + // Used in serialize(): workaround for no constexpr support in MSVC 2013. + static const int_fast32_t MAX_CHUNK_OFFSET = N - sizeof(uint16_t); + static const uint16_t N_MINUS_ONE = N - 1; + + chunk_list m_chunks; ///< List of chunks. + uint32_t m_size; ///< Number of elements stored(# of times allocate() has been called.) + uint16_t m_chunkOffset; ///< Number of bytes used in the current chunk. + +public: + + chunk_allocator() : m_size(0), m_chunkOffset(0) + { + } + + /** Allocate n bytes. + + Automatically checks if there is enough preserved memory to store additional n bytes + and allocates additional buffer if needed. + */ + void* allocate(uint16_t n) + { + ++m_size; + + if (!need_expand(n)) + { + // Temp to avoid extra load due to this* aliasing. + uint16_t chunkOffset = m_chunkOffset; + char* data = m_chunks.last->data + chunkOffset; + chunkOffset += n + sizeof(uint16_t); + m_chunkOffset = chunkOffset; + + unaligned_store16(data, n); + data += sizeof(uint16_t); + + // If there is enough space for at least another payload size, + // set it to zero. + if (chunkOffset < N_MINUS_ONE) + unaligned_zero16(data + n); + + return data; + } + + m_chunkOffset = n + sizeof(uint16_t); + m_chunks.emplace_back(); + + char* data = m_chunks.last->data; + unaligned_store16(data, n); + data += sizeof(uint16_t); + + // We assume here that it takes more than one element to fill a chunk. + unaligned_zero16(data + n); + + return data; + } + + /** Check if current storage is not enough to store additional n bytes. + */ + bool need_expand(uint16_t n) const + { + return (m_chunkOffset + n + sizeof(uint16_t)) > N; + } + + uint32_t size() const + { + return m_size; + } + + bool empty() const + { + return m_size == 0; + } + + void clear() + { + m_size = 0; + m_chunkOffset = 0; + m_chunks.clear_all_except_last(); // There is always at least one chunk + } + + /** Serialize data to stream. + + \warning Data will be cleared after serialization. + */ + void serialize(profiler::OStream& _outputStream) + { + // Chunks are stored in reversed order (stack). + // To be able to iterate them in direct order we have to invert the chunks list. + m_chunks.invert(); + + // Each chunk is an array of N bytes that can hold between + // 1(if the list isn't empty) and however many elements can fit in a chunk, + // where an element consists of a payload size + a payload as follows: + // elementStart[0..1]: size as a uint16_t + // elementStart[2..size-1]: payload. + + // The maximum chunk offset is N-sizeof(uint16_t) b/c, if we hit that (or go past), + // there is either no space left, 1 byte left, or 2 bytes left, all of which are + // too small to cary more than a zero-sized element. + + chunk* current = m_chunks.last; + do { + const char* data = current->data; + int_fast32_t chunkOffset = 0; // signed int so overflow is not checked. + uint16_t payloadSize = unaligned_load16(data); + while (chunkOffset < MAX_CHUNK_OFFSET && payloadSize != 0) { + const uint16_t chunkSize = sizeof(uint16_t) + payloadSize; + _outputStream.write(data, chunkSize); + data += chunkSize; + chunkOffset += chunkSize; + unaligned_load16(data, &payloadSize); + } + + current = current->prev; + } while (current != nullptr); + + clear(); + } + +private: + + chunk_allocator(const chunk_allocator&) = delete; + chunk_allocator(chunk_allocator&&) = delete; + +}; // END of class chunk_allocator. + +////////////////////////////////////////////////////////////////////////// + +#endif // EASY_PROFILER_CHUNK_ALLOCATOR_H diff --git a/easy_profiler_core/current_thread.h b/easy_profiler_core/current_thread.h new file mode 100644 index 0000000..162db81 --- /dev/null +++ b/easy_profiler_core/current_thread.h @@ -0,0 +1,79 @@ +/** +Lightweight profiler library for c++ +Copyright(C) 2016-2017 Sergey Yagovtsev, Victor Zarubkin + +Licensed under either of + * MIT license (LICENSE.MIT or http://opensource.org/licenses/MIT) + * Apache License, Version 2.0, (LICENSE.APACHE or http://www.apache.org/licenses/LICENSE-2.0) +at your option. + +The MIT License + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished + to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. + + +The Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +**/ + +#ifndef EASY_PROFILER_CURRENT_THREAD_H +#define EASY_PROFILER_CURRENT_THREAD_H + +#include + +#ifdef _WIN32 +# include +#elif defined(__APPLE__) +# include +# include +#else +# include +# include +# include +#endif + +inline profiler::thread_id_t getCurrentThreadId() +{ +#ifdef _WIN32 + return (profiler::thread_id_t)::GetCurrentThreadId(); +#elif defined(__APPLE__) +# if (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_6) || \ + (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) + EASY_THREAD_LOCAL static uint64_t _id = 0; + if (!_id) + pthread_threadid_np(NULL, &_id); + return (profiler::thread_id_t)_id; +# else + return (profiler::thread_id_t)pthread_self(); +# endif +#else + EASY_THREAD_LOCAL static const profiler::thread_id_t _id = (profiler::thread_id_t)syscall(__NR_gettid); + return _id; +#endif +} + +#endif // EASY_PROFILER_CURRENT_THREAD_H diff --git a/easy_profiler_core/event_trace_win.cpp b/easy_profiler_core/event_trace_win.cpp index e2bb989..c6714c6 100644 --- a/easy_profiler_core/event_trace_win.cpp +++ b/easy_profiler_core/event_trace_win.cpp @@ -171,8 +171,14 @@ namespace profiler { ////////////////////////////////////////////////////////////////////////// - typedef ::std::unordered_map thread_process_info_map; - typedef ::std::unordered_map process_info_map; + struct do_not_calc_hash { + template inline size_t operator()(T _value) const { + return static_cast(_value); + } + }; + + typedef ::std::unordered_map thread_process_info_map; + typedef ::std::unordered_map process_info_map; // Using static is safe because processTraceEvent() is called from one thread process_info_map PROCESS_INFO_TABLE; diff --git a/easy_profiler_core/nonscoped_block.cpp b/easy_profiler_core/nonscoped_block.cpp new file mode 100644 index 0000000..f94ae51 --- /dev/null +++ b/easy_profiler_core/nonscoped_block.cpp @@ -0,0 +1,91 @@ +/** +Lightweight profiler library for c++ +Copyright(C) 2016-2017 Sergey Yagovtsev, Victor Zarubkin + +Licensed under either of + * MIT license (LICENSE.MIT or http://opensource.org/licenses/MIT) + * Apache License, Version 2.0, (LICENSE.APACHE or http://www.apache.org/licenses/LICENSE-2.0) +at your option. + +The MIT License + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished + to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. + + +The Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +**/ + +#include "nonscoped_block.h" +#include + +NonscopedBlock::NonscopedBlock(const profiler::BaseBlockDescriptor* _desc, const char* _runtimeName, bool) + : profiler::Block(_desc, _runtimeName, false), m_runtimeName(nullptr) +{ + +} + +NonscopedBlock::~NonscopedBlock() +{ + // Actually destructor should not be invoked because StackBuffer do manual memory management + + m_end = m_begin; // to restrict profiler::Block to invoke profiler::endBlock() on destructor. + free(m_runtimeName); +} + +void NonscopedBlock::copyname() +{ + // Here we need to copy m_name to m_runtimeName to ensure that + // it would be alive to the moment we will serialize the block + + if ((m_status & profiler::ON) == 0) + return; + + if (*m_name != 0) + { + auto len = strlen(m_name); + m_runtimeName = static_cast(malloc(len + 1)); + + // memcpy should be faster than strncpy because we know + // actual bytes number and both strings have the same size + memcpy(m_runtimeName, m_name, len); + + m_runtimeName[len] = 0; + m_name = m_runtimeName; + } + else + { + m_name = ""; + } +} + +void NonscopedBlock::destroy() +{ + // free memory used by m_runtimeName + free(m_runtimeName); + m_name = ""; +} diff --git a/easy_profiler_core/nonscoped_block.h b/easy_profiler_core/nonscoped_block.h new file mode 100644 index 0000000..d628d75 --- /dev/null +++ b/easy_profiler_core/nonscoped_block.h @@ -0,0 +1,73 @@ +/** +Lightweight profiler library for c++ +Copyright(C) 2016-2017 Sergey Yagovtsev, Victor Zarubkin + +Licensed under either of + * MIT license (LICENSE.MIT or http://opensource.org/licenses/MIT) + * Apache License, Version 2.0, (LICENSE.APACHE or http://www.apache.org/licenses/LICENSE-2.0) +at your option. + +The MIT License + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished + to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. + + +The Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +**/ + +#ifndef EASY_PROFILER_NONSCOPED_BLOCK_H +#define EASY_PROFILER_NONSCOPED_BLOCK_H + +#include + +class NonscopedBlock : public profiler::Block +{ + char* m_runtimeName; ///< A copy of _runtimeName to make it safe to begin block in one function and end it in another + + NonscopedBlock() = delete; + NonscopedBlock(const NonscopedBlock&) = delete; + NonscopedBlock(NonscopedBlock&&) = delete; + NonscopedBlock& operator = (const NonscopedBlock&) = delete; + NonscopedBlock& operator = (NonscopedBlock&&) = delete; + +public: + + NonscopedBlock(const profiler::BaseBlockDescriptor* _desc, const char* _runtimeName, bool = false); + ~NonscopedBlock(); + + /** Copy string from m_name to m_runtimeName to make it safe to end block in another function. + + Performs any work if block is ON and m_name != "" + */ + void copyname(); + + void destroy(); + +}; // END of class NonscopedBlock. + +#endif // EASY_PROFILER_NONSCOPED_BLOCK_H diff --git a/easy_profiler_core/profile_manager.cpp b/easy_profiler_core/profile_manager.cpp index e002f78..e2e700d 100644 --- a/easy_profiler_core/profile_manager.cpp +++ b/easy_profiler_core/profile_manager.cpp @@ -59,6 +59,7 @@ #include "event_trace_win.h" #include "current_time.h" +#include "current_thread.h" #ifdef __APPLE__ #include @@ -116,6 +117,10 @@ # undef min #endif +#ifndef EASY_ENABLE_BLOCK_STATUS +# define EASY_ENABLE_BLOCK_STATUS 1 +#endif + #if !defined(_WIN32) && !defined(EASY_OPTION_REMOVE_EMPTY_UNGUARDED_THREADS) # define EASY_OPTION_REMOVE_EMPTY_UNGUARDED_THREADS 0 #endif @@ -161,12 +166,16 @@ extern const uint32_t EASY_CURRENT_VERSION = EASY_VERSION_INT(EASY_PROFILER_VERS const uint8_t FORCE_ON_FLAG = profiler::FORCE_ON & ~profiler::ON; #if defined(EASY_CHRONO_CLOCK) +#include const int64_t CPU_FREQUENCY = EASY_CHRONO_CLOCK::period::den / EASY_CHRONO_CLOCK::period::num; # define TICKS_TO_US(ticks) ticks * 1000000LL / CPU_FREQUENCY #elif defined(_WIN32) const decltype(LARGE_INTEGER::QuadPart) CPU_FREQUENCY = ([](){ LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); return freq.QuadPart; })(); # define TICKS_TO_US(ticks) ticks * 1000000LL / CPU_FREQUENCY #else +# ifndef __APPLE__ +# include +# endif int64_t calculate_cpu_frequency() { double g_TicksPerNanoSec; @@ -620,136 +629,6 @@ public: ////////////////////////////////////////////////////////////////////////// -NonscopedBlock::NonscopedBlock(const profiler::BaseBlockDescriptor* _desc, const char* _runtimeName, bool) - : profiler::Block(_desc, _runtimeName, false), m_runtimeName(nullptr) -{ - -} - -NonscopedBlock::~NonscopedBlock() -{ - // Actually destructor should not be invoked because StackBuffer do manual memory management - - m_end = m_begin; // to restrict profiler::Block to invoke profiler::endBlock() on destructor. - free(m_runtimeName); -} - -void NonscopedBlock::copyname() -{ - // Here we need to copy m_name to m_runtimeName to ensure that - // it would be alive to the moment we will serialize the block - - if ((m_status & profiler::ON) == 0) - return; - - if (*m_name != 0) - { - auto len = strlen(m_name); - m_runtimeName = static_cast(malloc(len + 1)); - - // memcpy should be faster than strncpy because we know - // actual bytes number and both strings have the same size - memcpy(m_runtimeName, m_name, len); - - m_runtimeName[len] = 0; - m_name = m_runtimeName; - } - else - { - m_name = ""; - } -} - -void NonscopedBlock::destroy() -{ - // free memory used by m_runtimeName - free(m_runtimeName); - m_name = ""; -} - -////////////////////////////////////////////////////////////////////////// - -ThreadStorage::ThreadStorage() : nonscopedBlocks(16), id(getCurrentThreadId()), allowChildren(true), named(false), guarded(false) -#ifndef _WIN32 -, pthread_id(pthread_self()) -#endif - -{ - expired = ATOMIC_VAR_INIT(0); - frame = ATOMIC_VAR_INIT(false); -} - -void ThreadStorage::storeBlock(const profiler::Block& block) -{ -#if EASY_OPTION_MEASURE_STORAGE_EXPAND != 0 - EASY_LOCAL_STATIC_PTR(const BaseBlockDescriptor*, desc,\ - MANAGER.addBlockDescriptor(EASY_OPTION_STORAGE_EXPAND_BLOCKS_ON ? profiler::ON : profiler::OFF, EASY_UNIQUE_LINE_ID, "EasyProfiler.ExpandStorage",\ - __FILE__, __LINE__, profiler::BLOCK_TYPE_BLOCK, EASY_COLOR_INTERNAL_EVENT)); - - EASY_THREAD_LOCAL static profiler::timestamp_t beginTime = 0ULL; - EASY_THREAD_LOCAL static profiler::timestamp_t endTime = 0ULL; -#endif - - uint16_t name_length = static_cast(strlen(block.name())); - uint16_t size = static_cast(sizeof(BaseBlockData) + name_length + 1); - -#if EASY_OPTION_MEASURE_STORAGE_EXPAND != 0 - const bool expanded = (desc->m_status & profiler::ON) && blocks.closedList.need_expand(size); - if (expanded) beginTime = getCurrentTime(); -#endif - - void* data = blocks.closedList.allocate(size); - -#if EASY_OPTION_MEASURE_STORAGE_EXPAND != 0 - if (expanded) endTime = getCurrentTime(); -#endif - - ::new (data) SerializedBlock(block, name_length); - blocks.usedMemorySize += size; - -#if EASY_OPTION_MEASURE_STORAGE_EXPAND != 0 - if (expanded) - { - profiler::Block b(beginTime, desc->id(), ""); - b.finish(endTime); - - size = static_cast(sizeof(BaseBlockData) + 1); - data = blocks.closedList.allocate(size); - ::new (data) SerializedBlock(b, 0); - blocks.usedMemorySize += size; - } -#endif -} - -void ThreadStorage::storeCSwitch(const CSwitchBlock& block) -{ - uint16_t name_length = static_cast(strlen(block.name())); - uint16_t size = static_cast(sizeof(CSwitchEvent) + name_length + 1); - void* data = sync.closedList.allocate(size); - ::new (data) SerializedCSwitch(block, name_length); - sync.usedMemorySize += size; -} - -void ThreadStorage::clearClosed() -{ - blocks.clearClosed(); - sync.clearClosed(); -} - -void ThreadStorage::popSilent() -{ - if (!blocks.openedList.empty()) - { - Block& top = blocks.openedList.back(); - top.m_end = top.m_begin; - if (!top.m_isScoped) - nonscopedBlocks.pop(); - blocks.openedList.pop_back(); - } -} - -////////////////////////////////////////////////////////////////////////// - ThreadGuard::~ThreadGuard() { #ifndef EASY_PROFILER_API_DISABLED diff --git a/easy_profiler_core/profile_manager.h b/easy_profiler_core/profile_manager.h index 078cea4..c198790 100644 --- a/easy_profiler_core/profile_manager.h +++ b/easy_profiler_core/profile_manager.h @@ -49,706 +49,14 @@ The Apache License, Version 2.0 (the "License"); #include "spin_lock.h" #include "outstream.h" #include "hashed_cstr.h" +#include "thread_storage.h" #include #include #include #include #include -#include #include -#include -#include - -////////////////////////////////////////////////////////////////////////// - -#ifdef _WIN32 -#include -#elif defined(__APPLE__) -#include -#include -#else -#include -#include -#include -#include -#include -#include -#endif - -#ifdef max -#undef max -#endif - -inline profiler::thread_id_t getCurrentThreadId() -{ -#ifdef _WIN32 - return (profiler::thread_id_t)::GetCurrentThreadId(); -#elif defined(__APPLE__) -# if (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_6) || \ - (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) - EASY_THREAD_LOCAL static uint64_t _id = 0; - if (!_id) - pthread_threadid_np(NULL, &_id); - return (profiler::thread_id_t)_id; -# else - return (profiler::thread_id_t)pthread_self(); -# endif -#else - EASY_THREAD_LOCAL static const profiler::thread_id_t _id = (profiler::thread_id_t)syscall(__NR_gettid); - return _id; -#endif -} - -namespace profiler { - - class SerializedBlock; - - struct do_not_calc_hash { - template inline size_t operator()(T _value) const { - return static_cast(_value); - } - }; - -} - -////////////////////////////////////////////////////////////////////////// - -#ifndef EASY_ENABLE_BLOCK_STATUS -# define EASY_ENABLE_BLOCK_STATUS 1 -#endif - -#ifndef EASY_ENABLE_ALIGNMENT -# define EASY_ENABLE_ALIGNMENT 0 -#endif - -#ifndef EASY_ALIGNMENT_SIZE -# define EASY_ALIGNMENT_SIZE alignof(std::max_align_t) -#endif - -#if EASY_ENABLE_ALIGNMENT == 0 -# define EASY_ALIGNED(TYPE, VAR, A) TYPE VAR -# define EASY_MALLOC(MEMSIZE, A) malloc(MEMSIZE) -# define EASY_FREE(MEMPTR) free(MEMPTR) -#else -# if defined(_MSC_VER) -# define EASY_ALIGNED(TYPE, VAR, A) __declspec(align(A)) TYPE VAR -# define EASY_MALLOC(MEMSIZE, A) _aligned_malloc(MEMSIZE, A) -# define EASY_FREE(MEMPTR) _aligned_free(MEMPTR) -# elif defined(__GNUC__) -# define EASY_ALIGNED(TYPE, VAR, A) TYPE VAR __attribute__((aligned(A))) -# define EASY_MALLOC(MEMSIZE, A) memalign(A, MEMSIZE) -# define EASY_FREE(MEMPTR) free(MEMPTR) -# else -# define EASY_ALIGNED(TYPE, VAR, A) TYPE VAR -# define EASY_MALLOC(MEMSIZE, A) malloc(MEMSIZE) -# define EASY_FREE(MEMPTR) free(MEMPTR) -# endif -#endif - -//! Checks if a pointer is aligned. -//! \param ptr The pointer to check. -//! \param alignment The alignement (must be a power of 2) -//! \returns true if the memory is aligned. -//! -template -EASY_FORCE_INLINE bool is_aligned(void* ptr) -{ - static_assert(ALIGNMENT % 2 == 0, "Alignment must be a power of two."); - return ((uintptr_t)ptr & (ALIGNMENT-1)) == 0; -} - -EASY_FORCE_INLINE void unaligned_zero16(void* ptr) -{ -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - *(uint16_t*)ptr = 0; -#else - ((char*)ptr)[0] = 0; - ((char*)ptr)[1] = 0; -#endif -} - -EASY_FORCE_INLINE void unaligned_zero32(void* ptr) -{ -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - *(uint32_t*)ptr = 0; -#else - ((char*)ptr)[0] = 0; - ((char*)ptr)[1] = 0; - ((char*)ptr)[2] = 0; - ((char*)ptr)[3] = 0; -#endif -} - -EASY_FORCE_INLINE void unaligned_zero64(void* ptr) -{ -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - *(uint64_t*)ptr = 0; -#else - // Assume unaligned is more common. - if (!is_aligned(ptr)) { - ((char*)ptr)[0] = 0; - ((char*)ptr)[1] = 0; - ((char*)ptr)[2] = 0; - ((char*)ptr)[3] = 0; - ((char*)ptr)[4] = 0; - ((char*)ptr)[5] = 0; - ((char*)ptr)[6] = 0; - ((char*)ptr)[7] = 0; - } - else { - *(uint64_t*)ptr = 0; - } -#endif -} - -template -EASY_FORCE_INLINE void unaligned_store16(void* ptr, T val) -{ - static_assert(sizeof(T) == 2, "16 bit type required."); -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - *(T*)ptr = val; -#else - const char* const temp = (char*)&val; - ((char*)ptr)[0] = temp[0]; - ((char*)ptr)[1] = temp[1]; -#endif -} - -template -EASY_FORCE_INLINE void unaligned_store32(void* ptr, T val) -{ - static_assert(sizeof(T) == 4, "32 bit type required."); -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - *(T*)ptr = val; -#else - const char* const temp = (char*)&val; - ((char*)ptr)[0] = temp[0]; - ((char*)ptr)[1] = temp[1]; - ((char*)ptr)[2] = temp[2]; - ((char*)ptr)[3] = temp[3]; -#endif -} - -template -EASY_FORCE_INLINE void unaligned_store64(void* ptr, T val) -{ - static_assert(sizeof(T) == 8, "64 bit type required."); -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - *(T*)ptr = val; -#else - const char* const temp = (char*)&val; - // Assume unaligned is more common. - if (!is_aligned(ptr)) { - ((char*)ptr)[0] = temp[0]; - ((char*)ptr)[1] = temp[1]; - ((char*)ptr)[2] = temp[2]; - ((char*)ptr)[3] = temp[3]; - ((char*)ptr)[4] = temp[4]; - ((char*)ptr)[5] = temp[5]; - ((char*)ptr)[6] = temp[6]; - ((char*)ptr)[7] = temp[7]; - } - else { - *(T*)ptr = val; - } -#endif -} - -template -EASY_FORCE_INLINE T unaligned_load16(const void* ptr) -{ - static_assert(sizeof(T) == 2, "16 bit type required."); -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - return *(T*)ptr; -#else - T value; - ((char*)&value)[0] = ((char*)ptr)[0]; - ((char*)&value)[1] = ((char*)ptr)[1]; - return value; -#endif -} - -template -EASY_FORCE_INLINE T unaligned_load16(const void* ptr, T* val) -{ - static_assert(sizeof(T) == 2, "16 bit type required."); -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - *val = *(T*)ptr; - return *val; -#else - ((char*)val)[0] = ((char*)ptr)[0]; - ((char*)val)[1] = ((char*)ptr)[1]; - return *val; -#endif -} - -template -EASY_FORCE_INLINE T unaligned_load32(const void* ptr) -{ - static_assert(sizeof(T) == 4, "32 bit type required."); -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - return *(T*)ptr; -#else - T value; - ((char*)&value)[0] = ((char*)ptr)[0]; - ((char*)&value)[1] = ((char*)ptr)[1]; - ((char*)&value)[2] = ((char*)ptr)[2]; - ((char*)&value)[3] = ((char*)ptr)[3]; - return value; -#endif -} - -template -EASY_FORCE_INLINE T unaligned_load32(const void* ptr, T* val) -{ - static_assert(sizeof(T) == 4, "32 bit type required."); -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - *val = *(T*)ptr; -#else - ((char*)&val)[0] = ((char*)ptr)[0]; - ((char*)&val)[1] = ((char*)ptr)[1]; - ((char*)&val)[2] = ((char*)ptr)[2]; - ((char*)&val)[3] = ((char*)ptr)[3]; - return *val; -#endif -} - -template -EASY_FORCE_INLINE T unaligned_load64(const void* ptr) -{ - static_assert(sizeof(T) == 8, "64 bit type required."); -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - return *(T*)ptr; -#else - if (!is_aligned(ptr)) { - T value; - ((char*)&value)[0] = ((char*)ptr)[0]; - ((char*)&value)[1] = ((char*)ptr)[1]; - ((char*)&value)[2] = ((char*)ptr)[2]; - ((char*)&value)[3] = ((char*)ptr)[3]; - ((char*)&value)[4] = ((char*)ptr)[4]; - ((char*)&value)[5] = ((char*)ptr)[5]; - ((char*)&value)[6] = ((char*)ptr)[6]; - ((char*)&value)[7] = ((char*)ptr)[7]; - return value; - } - else { - return *(T*)ptr; - } -#endif -} - -template -EASY_FORCE_INLINE T unaligned_load64(const void* ptr, T* val) -{ - static_assert(sizeof(T) == 8, "64 bit type required."); -#ifndef EASY_ENABLE_STRICT_ALIGNMENT - *val = *(T*)ptr; -#else - if (!is_aligned(ptr)) { - ((char*)&val)[0] = ((char*)ptr)[0]; - ((char*)&val)[1] = ((char*)ptr)[1]; - ((char*)&val)[2] = ((char*)ptr)[2]; - ((char*)&val)[3] = ((char*)ptr)[3]; - ((char*)&val)[4] = ((char*)ptr)[4]; - ((char*)&val)[5] = ((char*)ptr)[5]; - ((char*)&val)[6] = ((char*)ptr)[6]; - ((char*)&val)[7] = ((char*)ptr)[7]; - return *val; - } - else { - *val = *(T*)ptr; - return *val; - } -#endif -} - - -template -class chunk_allocator -{ - struct chunk { EASY_ALIGNED(char, data[N], EASY_ALIGNMENT_SIZE); chunk* prev = nullptr; }; - - struct chunk_list - { - chunk* last; - - chunk_list() : last(nullptr) - { - static_assert(sizeof(char) == 1, "easy_profiler logic error: sizeof(char) != 1 for this platform! Please, contact easy_profiler authors to resolve your problem."); - emplace_back(); - } - - ~chunk_list() - { - do free_last(); while (last != nullptr); - } - - void clear_all_except_last() - { - while (last->prev != nullptr) - free_last(); - zero_last_chunk_size(); - } - - void emplace_back() - { - auto prev = last; - last = ::new (EASY_MALLOC(sizeof(chunk), EASY_ALIGNMENT_SIZE)) chunk(); - last->prev = prev; - zero_last_chunk_size(); - } - - /** Invert current chunks list to enable to iterate over chunks list in direct order. - - This method is used by serialize(). - */ - void invert() - { - chunk* next = nullptr; - - while (last->prev != nullptr) { - auto p = last->prev; - last->prev = next; - next = last; - last = p; - } - - last->prev = next; - } - - private: - - chunk_list(const chunk_list&) = delete; - chunk_list(chunk_list&&) = delete; - - void free_last() - { - auto p = last; - last = last->prev; - EASY_FREE(p); - } - - void zero_last_chunk_size() - { - // Although there is no need for unaligned access stuff b/c a new chunk will - // usually be at least 8 byte aligned (and we only need 2 byte alignment), - // this is the only way I have been able to get rid of the GCC strict-aliasing warning - // without using std::memset. It's an extra line, but is just as fast as *(uint16_t*)last->data = 0; - char* const data = last->data; - *(uint16_t*)data = (uint16_t)0; - } - }; - - // Used in serialize(): workaround for no constexpr support in MSVC 2013. - static const int_fast32_t MAX_CHUNK_OFFSET = N - sizeof(uint16_t); - static const uint16_t N_MINUS_ONE = N - 1; - - chunk_list m_chunks; ///< List of chunks. - uint32_t m_size; ///< Number of elements stored(# of times allocate() has been called.) - uint16_t m_chunkOffset; ///< Number of bytes used in the current chunk. - -public: - - chunk_allocator() : m_size(0), m_chunkOffset(0) - { - } - - /** Allocate n bytes. - - Automatically checks if there is enough preserved memory to store additional n bytes - and allocates additional buffer if needed. - */ - void* allocate(uint16_t n) - { - ++m_size; - - if (!need_expand(n)) - { - // Temp to avoid extra load due to this* aliasing. - uint16_t chunkOffset = m_chunkOffset; - char* data = m_chunks.last->data + chunkOffset; - chunkOffset += n + sizeof(uint16_t); - m_chunkOffset = chunkOffset; - - unaligned_store16(data, n); - data += sizeof(uint16_t); - - // If there is enough space for at least another payload size, - // set it to zero. - if (chunkOffset < N_MINUS_ONE) - unaligned_zero16(data + n); - - return data; - } - - m_chunkOffset = n + sizeof(uint16_t); - m_chunks.emplace_back(); - - char* data = m_chunks.last->data; - unaligned_store16(data, n); - data += sizeof(uint16_t); - - // We assume here that it takes more than one element to fill a chunk. - unaligned_zero16(data + n); - - return data; - } - - /** Check if current storage is not enough to store additional n bytes. - */ - bool need_expand(uint16_t n) const - { - return (m_chunkOffset + n + sizeof(uint16_t)) > N; - } - - uint32_t size() const - { - return m_size; - } - - bool empty() const - { - return m_size == 0; - } - - void clear() - { - m_size = 0; - m_chunkOffset = 0; - m_chunks.clear_all_except_last(); // There is always at least one chunk - } - - /** Serialize data to stream. - - \warning Data will be cleared after serialization. - */ - void serialize(profiler::OStream& _outputStream) - { - // Chunks are stored in reversed order (stack). - // To be able to iterate them in direct order we have to invert the chunks list. - m_chunks.invert(); - - // Each chunk is an array of N bytes that can hold between - // 1(if the list isn't empty) and however many elements can fit in a chunk, - // where an element consists of a payload size + a payload as follows: - // elementStart[0..1]: size as a uint16_t - // elementStart[2..size-1]: payload. - - // The maximum chunk offset is N-sizeof(uint16_t) b/c, if we hit that (or go past), - // there is either no space left, 1 byte left, or 2 bytes left, all of which are - // too small to cary more than a zero-sized element. - - chunk* current = m_chunks.last; - do { - const char* data = current->data; - int_fast32_t chunkOffset = 0; // signed int so overflow is not checked. - uint16_t payloadSize = unaligned_load16(data); - while (chunkOffset < MAX_CHUNK_OFFSET && payloadSize != 0) { - const uint16_t chunkSize = sizeof(uint16_t) + payloadSize; - _outputStream.write(data, chunkSize); - data += chunkSize; - chunkOffset += chunkSize; - unaligned_load16(data, &payloadSize); - } - - current = current->prev; - } while (current != nullptr); - - clear(); - } - -private: - - chunk_allocator(const chunk_allocator&) = delete; - chunk_allocator(chunk_allocator&&) = delete; - -}; // END of class chunk_allocator. - -////////////////////////////////////////////////////////////////////////// - -class NonscopedBlock : public profiler::Block -{ - char* m_runtimeName; ///< a copy of _runtimeName to make it safe to begin block in one function and end it in another - - NonscopedBlock() = delete; - NonscopedBlock(const NonscopedBlock&) = delete; - NonscopedBlock(NonscopedBlock&&) = delete; - NonscopedBlock& operator = (const NonscopedBlock&) = delete; - NonscopedBlock& operator = (NonscopedBlock&&) = delete; - -public: - - NonscopedBlock(const profiler::BaseBlockDescriptor* _desc, const char* _runtimeName, bool = false); - ~NonscopedBlock(); - - /** Copy string from m_name to m_runtimeName to make it safe to end block in another function. - - Performs any work if block is ON and m_name != "" - */ - void copyname(); - - void destroy(); - -}; // END of class NonscopedBlock. - -////////////////////////////////////////////////////////////////////////// - -template -inline void destroy_elem(T*) -{ - -} - -inline void destroy_elem(NonscopedBlock* _elem) -{ - _elem->destroy(); -} - -template -class StackBuffer -{ - struct chunk { int8_t data[sizeof(T)]; }; - - std::list m_overflow; ///< List of additional stack elements if current capacity of buffer is not enough - T* m_buffer; ///< Contiguous buffer used for stack - uint32_t m_size; ///< Current size of stack - uint32_t m_capacity; ///< Current capacity of m_buffer - uint32_t m_maxcapacity; ///< Maximum used capacity including m_buffer and m_overflow - -public: - - StackBuffer(uint32_t N) : m_buffer(static_cast(malloc(N * sizeof(T)))), m_size(0), m_capacity(N), m_maxcapacity(N) - { - } - - ~StackBuffer() - { - for (uint32_t i = 0; i < m_size; ++i) - destroy_elem(m_buffer + i); - - free(m_buffer); - - for (auto& elem : m_overflow) - destroy_elem(reinterpret_cast(elem.data + 0)); - } - - template - T& push(TArgs ... _args) - { - if (m_size < m_capacity) - return *(::new (m_buffer + m_size++) T(_args...)); - - m_overflow.emplace_back(); - const uint32_t cap = m_capacity + static_cast(m_overflow.size()); - if (m_maxcapacity < cap) - m_maxcapacity = cap; - - return *(::new (m_overflow.back().data + 0) T(_args...)); - } - - void pop() - { - if (m_overflow.empty()) - { - // m_size should not be equal to 0 here because ProfileManager behavior does not allow such situation - destroy_elem(m_buffer + --m_size); - - if (m_size == 0 && m_maxcapacity > m_capacity) - { - // When stack gone empty we can resize buffer to use enough space in the future - free(m_buffer); - m_maxcapacity = m_capacity = std::max(m_maxcapacity, m_capacity << 1); - m_buffer = static_cast(malloc(m_capacity * sizeof(T))); - } - - return; - } - - destroy_elem(reinterpret_cast(m_overflow.back().data + 0)); - m_overflow.pop_back(); - } - -private: - - StackBuffer(const StackBuffer&) = delete; - StackBuffer(StackBuffer&&) = delete; - -}; // END of class StackBuffer. - -////////////////////////////////////////////////////////////////////////// - -template -struct BlocksList -{ - BlocksList() = default; - - std::vector openedList; - chunk_allocator closedList; - uint64_t usedMemorySize = 0; - - void clearClosed() { - //closedList.clear(); - usedMemorySize = 0; - } - -private: - - BlocksList(const BlocksList&) = delete; - BlocksList(BlocksList&&) = delete; - -}; // END of struct BlocksList. - -////////////////////////////////////////////////////////////////////////// - -class CSwitchBlock : public profiler::CSwitchEvent -{ - const char* m_name; - -public: - - CSwitchBlock(profiler::timestamp_t _begin_time, profiler::thread_id_t _tid, const char* _runtimeName); - inline const char* name() const { return m_name; } -}; - -////////////////////////////////////////////////////////////////////////// - -const uint16_t SIZEOF_BLOCK = sizeof(profiler::BaseBlockData) + 1 + sizeof(uint16_t); // SerializedBlock stores BaseBlockData + at least 1 character for name ('\0') + 2 bytes for size of serialized data -const uint16_t SIZEOF_CSWITCH = sizeof(profiler::CSwitchEvent) + 1 + sizeof(uint16_t); // SerializedCSwitch also stores additional 4 bytes to be able to save 64-bit thread_id - -struct ThreadStorage -{ - StackBuffer nonscopedBlocks; - BlocksList, SIZEOF_BLOCK * (uint16_t)128U> blocks; - BlocksList sync; - - std::string name; ///< Thread name - -#ifndef _WIN32 - const pthread_t pthread_id; ///< Thread pointer -#endif - - const profiler::thread_id_t id; ///< Thread ID - std::atomic expired; ///< Is thread expired - std::atomic_bool frame; ///< Is new frame opened - bool allowChildren; ///< False if one of previously opened blocks has OFF_RECURSIVE or ON_WITHOUT_CHILDREN status - bool named; ///< True if thread name was set - bool guarded; ///< True if thread has been registered using ThreadGuard - - void storeBlock(const profiler::Block& _block); - void storeCSwitch(const CSwitchBlock& _block); - void clearClosed(); - void popSilent(); - - ThreadStorage(); - -private: - - ThreadStorage(const ThreadStorage&) = delete; - ThreadStorage(ThreadStorage&&) = delete; - -}; // END of struct ThreadStorage. ////////////////////////////////////////////////////////////////////////// diff --git a/easy_profiler_core/stack_buffer.h b/easy_profiler_core/stack_buffer.h new file mode 100644 index 0000000..b3bc4d3 --- /dev/null +++ b/easy_profiler_core/stack_buffer.h @@ -0,0 +1,136 @@ +/** +Lightweight profiler library for c++ +Copyright(C) 2016-2017 Sergey Yagovtsev, Victor Zarubkin + +Licensed under either of + * MIT license (LICENSE.MIT or http://opensource.org/licenses/MIT) + * Apache License, Version 2.0, (LICENSE.APACHE or http://www.apache.org/licenses/LICENSE-2.0) +at your option. + +The MIT License + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished + to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. + + +The Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +**/ + +#ifndef EASY_PROFILER_STACK_BUFFER_H +#define EASY_PROFILER_STACK_BUFFER_H + +#include "nonscoped_block.h" +#include +#include + +#ifdef max +#undef max +#endif + +template +inline void destroy_elem(T*) +{ + +} + +inline void destroy_elem(NonscopedBlock* _elem) +{ + _elem->destroy(); +} + +template +class StackBuffer +{ + struct chunk { int8_t data[sizeof(T)]; }; + + std::list m_overflow; ///< List of additional stack elements if current capacity of buffer is not enough + T* m_buffer; ///< Contiguous buffer used for stack + uint32_t m_size; ///< Current size of stack + uint32_t m_capacity; ///< Current capacity of m_buffer + uint32_t m_maxcapacity; ///< Maximum used capacity including m_buffer and m_overflow + +public: + + StackBuffer(uint32_t N) : m_buffer(static_cast(malloc(N * sizeof(T)))), m_size(0), m_capacity(N), m_maxcapacity(N) + { + } + + ~StackBuffer() + { + for (uint32_t i = 0; i < m_size; ++i) + destroy_elem(m_buffer + i); + + free(m_buffer); + + for (auto& elem : m_overflow) + destroy_elem(reinterpret_cast(elem.data + 0)); + } + + template + T& push(TArgs ... _args) + { + if (m_size < m_capacity) + return *(::new (m_buffer + m_size++) T(_args...)); + + m_overflow.emplace_back(); + const uint32_t cap = m_capacity + static_cast(m_overflow.size()); + if (m_maxcapacity < cap) + m_maxcapacity = cap; + + return *(::new (m_overflow.back().data + 0) T(_args...)); + } + + void pop() + { + if (m_overflow.empty()) + { + // m_size should not be equal to 0 here because ProfileManager behavior does not allow such situation + destroy_elem(m_buffer + --m_size); + + if (m_size == 0 && m_maxcapacity > m_capacity) + { + // When stack gone empty we can resize buffer to use enough space in the future + free(m_buffer); + m_maxcapacity = m_capacity = std::max(m_maxcapacity, m_capacity << 1); + m_buffer = static_cast(malloc(m_capacity * sizeof(T))); + } + + return; + } + + destroy_elem(reinterpret_cast(m_overflow.back().data + 0)); + m_overflow.pop_back(); + } + +private: + + StackBuffer(const StackBuffer&) = delete; + StackBuffer(StackBuffer&&) = delete; + +}; // END of class StackBuffer. + +#endif // EASY_PROFILER_STACK_BUFFER_H diff --git a/easy_profiler_core/thread_storage.cpp b/easy_profiler_core/thread_storage.cpp new file mode 100644 index 0000000..3109d83 --- /dev/null +++ b/easy_profiler_core/thread_storage.cpp @@ -0,0 +1,121 @@ +/** +Lightweight profiler library for c++ +Copyright(C) 2016-2017 Sergey Yagovtsev, Victor Zarubkin + +Licensed under either of + * MIT license (LICENSE.MIT or http://opensource.org/licenses/MIT) + * Apache License, Version 2.0, (LICENSE.APACHE or http://www.apache.org/licenses/LICENSE-2.0) +at your option. + +The MIT License + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished + to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. + + +The Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +**/ + +#include +#include "thread_storage.h" +#include "current_thread.h" +#include "current_time.h" + +ThreadStorage::ThreadStorage() : nonscopedBlocks(16), id(getCurrentThreadId()), allowChildren(true), named(false), guarded(false) +{ + expired = ATOMIC_VAR_INIT(0); + frame = ATOMIC_VAR_INIT(false); +} + +void ThreadStorage::storeBlock(const profiler::Block& block) +{ +#if EASY_OPTION_MEASURE_STORAGE_EXPAND != 0 + EASY_LOCAL_STATIC_PTR(const BaseBlockDescriptor*, desc, \ + MANAGER.addBlockDescriptor(EASY_OPTION_STORAGE_EXPAND_BLOCKS_ON ? profiler::ON : profiler::OFF, EASY_UNIQUE_LINE_ID, "EasyProfiler.ExpandStorage", \ + __FILE__, __LINE__, profiler::BLOCK_TYPE_BLOCK, EASY_COLOR_INTERNAL_EVENT)); + + EASY_THREAD_LOCAL static profiler::timestamp_t beginTime = 0ULL; + EASY_THREAD_LOCAL static profiler::timestamp_t endTime = 0ULL; +#endif + + uint16_t name_length = static_cast(strlen(block.name())); + uint16_t size = static_cast(sizeof(profiler::BaseBlockData) + name_length + 1); + +#if EASY_OPTION_MEASURE_STORAGE_EXPAND != 0 + const bool expanded = (desc->m_status & profiler::ON) && blocks.closedList.need_expand(size); + if (expanded) beginTime = getCurrentTime(); +#endif + + void* data = blocks.closedList.allocate(size); + +#if EASY_OPTION_MEASURE_STORAGE_EXPAND != 0 + if (expanded) endTime = getCurrentTime(); +#endif + + ::new (data) profiler::SerializedBlock(block, name_length); + blocks.usedMemorySize += size; + +#if EASY_OPTION_MEASURE_STORAGE_EXPAND != 0 + if (expanded) + { + profiler::Block b(beginTime, desc->id(), ""); + b.finish(endTime); + + size = static_cast(sizeof(profiler::BaseBlockData) + 1); + data = blocks.closedList.allocate(size); + ::new (data) profiler::SerializedBlock(b, 0); + blocks.usedMemorySize += size; + } +#endif +} + +void ThreadStorage::storeCSwitch(const CSwitchBlock& block) +{ + uint16_t name_length = static_cast(strlen(block.name())); + uint16_t size = static_cast(sizeof(profiler::CSwitchEvent) + name_length + 1); + void* data = sync.closedList.allocate(size); + ::new (data) profiler::SerializedCSwitch(block, name_length); + sync.usedMemorySize += size; +} + +void ThreadStorage::clearClosed() +{ + blocks.clearClosed(); + sync.clearClosed(); +} + +void ThreadStorage::popSilent() +{ + if (!blocks.openedList.empty()) + { + profiler::Block& top = blocks.openedList.back(); + top.m_end = top.m_begin; + if (!top.m_isScoped) + nonscopedBlocks.pop(); + blocks.openedList.pop_back(); + } +} diff --git a/easy_profiler_core/thread_storage.h b/easy_profiler_core/thread_storage.h new file mode 100644 index 0000000..1d70029 --- /dev/null +++ b/easy_profiler_core/thread_storage.h @@ -0,0 +1,124 @@ +/** +Lightweight profiler library for c++ +Copyright(C) 2016-2017 Sergey Yagovtsev, Victor Zarubkin + +Licensed under either of + * MIT license (LICENSE.MIT or http://opensource.org/licenses/MIT) + * Apache License, Version 2.0, (LICENSE.APACHE or http://www.apache.org/licenses/LICENSE-2.0) +at your option. + +The MIT License + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is furnished + to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. + + +The Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +**/ + +#ifndef EASY_PROFILER_THREAD_STORAGE_H +#define EASY_PROFILER_THREAD_STORAGE_H + +#include +#include +#include +#include +#include "stack_buffer.h" +#include "chunk_allocator.h" + +////////////////////////////////////////////////////////////////////////// + +template +struct BlocksList +{ + BlocksList() = default; + + std::vector openedList; + chunk_allocator closedList; + uint64_t usedMemorySize = 0; + + void clearClosed() { + //closedList.clear(); + usedMemorySize = 0; + } + +private: + + BlocksList(const BlocksList&) = delete; + BlocksList(BlocksList&&) = delete; + +}; // END of struct BlocksList. + +////////////////////////////////////////////////////////////////////////// + +class CSwitchBlock : public profiler::CSwitchEvent +{ + const char* m_name; + +public: + + CSwitchBlock(profiler::timestamp_t _begin_time, profiler::thread_id_t _tid, const char* _runtimeName); + inline const char* name() const { return m_name; } +}; + +////////////////////////////////////////////////////////////////////////// + +const uint16_t SIZEOF_BLOCK = sizeof(profiler::BaseBlockData) + 1 + sizeof(uint16_t); // SerializedBlock stores BaseBlockData + at least 1 character for name ('\0') + 2 bytes for size of serialized data +const uint16_t SIZEOF_CSWITCH = sizeof(profiler::CSwitchEvent) + 1 + sizeof(uint16_t); // SerializedCSwitch also stores additional 4 bytes to be able to save 64-bit thread_id + +struct ThreadStorage +{ + StackBuffer nonscopedBlocks; + BlocksList, SIZEOF_BLOCK * (uint16_t)128U> blocks; + BlocksList sync; + + std::string name; ///< Thread name + + const profiler::thread_id_t id; ///< Thread ID + std::atomic expired; ///< Is thread expired + std::atomic_bool frame; ///< Is new frame opened + bool allowChildren; ///< False if one of previously opened blocks has OFF_RECURSIVE or ON_WITHOUT_CHILDREN status + bool named; ///< True if thread name was set + bool guarded; ///< True if thread has been registered using ThreadGuard + + void storeBlock(const profiler::Block& _block); + void storeCSwitch(const CSwitchBlock& _block); + void clearClosed(); + void popSilent(); + + ThreadStorage(); + +private: + + ThreadStorage(const ThreadStorage&) = delete; + ThreadStorage(ThreadStorage&&) = delete; + +}; // END of struct ThreadStorage. + +////////////////////////////////////////////////////////////////////////// + +#endif // EASY_PROFILER_THREAD_STORAGE_H