From fd732953ceb05b8b9ad9bf192c796392a4ad1158 Mon Sep 17 00:00:00 2001 From: Joshua Peraza Date: Thu, 20 Jan 2022 09:45:58 -0800 Subject: [PATCH] linux: handle multi-threaded crashes Handle multiple simultaneous crashes among threads by having the first crashing thread set an atomic flag and subsequently crashing threads check the flag before requesting a dump. If a dump has already been requested, the threads pause on a futex with a timeout in case the crashing thread crashes again or otherwise fails to WakeThreads(). The thread_local disabled_for_thread_ is removed and combined with this flag because accessing thread_locals produces undefined behavior in signal handlers. Bug:crashpad:384, chromium:861730 Change-Id: I83bce36e1010d0635ba8aeac937e150c43a4166f Reviewed-on: https://chromium-review.googlesource.com/c/crashpad/crashpad/+/3403017 Reviewed-by: Mark Mentovai Commit-Queue: Joshua Peraza --- client/crashpad_client_linux.cc | 63 ++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/client/crashpad_client_linux.cc b/client/crashpad_client_linux.cc index cb07ef08..5e534599 100644 --- a/client/crashpad_client_linux.cc +++ b/client/crashpad_client_linux.cc @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include #include @@ -137,10 +139,14 @@ class SignalHandler { // handler has been installed. static SignalHandler* Get() { return handler_; } - // Disables any installed Crashpad signal handler for the calling thread. If a - // crash signal is received, any previously installed (non-Crashpad) signal - // handler will be restored and the signal reraised. - static void DisableForThread() { disabled_for_thread_ = true; } + // Disables any installed Crashpad signal handler. If a crash signal is + // received, any previously installed (non-Crashpad) signal handler will be + // restored and the signal reraised. + static void Disable() { + if (!handler_->disabled_.test_and_set()) { + handler_->WakeThreads(); + } + } void SetFirstChanceHandler(CrashpadClient::FirstChanceHandler handler) { first_chance_handler_ = handler; @@ -183,6 +189,9 @@ class SignalHandler { virtual void HandleCrashImpl() = 0; private: + static constexpr int32_t kDumpNotDone = 0; + static constexpr int32_t kDumpDone = 1; + // The signal handler installed at OS-level. static void HandleOrReraiseSignal(int signo, siginfo_t* siginfo, @@ -193,24 +202,60 @@ class SignalHandler { return; } - if (!handler_->disabled_for_thread_) { + // Only handle the first fatal signal observed. If another thread receives a + // crash signal, it waits for the first dump to complete instead of + // requesting another. + if (!handler_->disabled_.test_and_set()) { handler_->HandleCrash(signo, siginfo, context); + handler_->WakeThreads(); + } else { + // Processes on Android normally have several chained signal handlers that + // co-operate to report crashes. e.g. WebView will have this signal + // handler installed, the app embedding WebView may have a signal handler + // installed, and Bionic will have a signal handler. Each signal handler + // runs in succession, possibly managed by libsigchain. This wait is + // intended to avoid ill-effects from multiple signal handlers from + // different layers (possibly all trying to use ptrace()) from running + // simultaneously. It does not block forever so that in most conditions, + // those signal handlers will still have a chance to run and ensures + // process termination in case the first crashing thread crashes again in + // its signal handler. Though less typical, this situation also occurs on + // other Linuxes, e.g. to produce in-process stack traces for debug + // builds. + handler_->WaitForDumpDone(); } Signals::RestoreHandlerAndReraiseSignalOnReturn( siginfo, handler_->old_actions_.ActionForSignal(signo)); } + void WaitForDumpDone() { + kernel_timespec timeout; + timeout.tv_sec = 5; + timeout.tv_nsec = 0; + sys_futex(&dump_done_futex_, + FUTEX_WAIT_PRIVATE, + kDumpNotDone, + &timeout, + nullptr, + 0); + } + + void WakeThreads() { + dump_done_futex_ = kDumpDone; + sys_futex( + &dump_done_futex_, FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0); + } + Signals::OldActions old_actions_ = {}; ExceptionInformation exception_information_ = {}; CrashpadClient::FirstChanceHandler first_chance_handler_ = nullptr; + int32_t dump_done_futex_ = kDumpNotDone; + std::atomic_flag disabled_ = ATOMIC_FLAG_INIT; static SignalHandler* handler_; - - static thread_local bool disabled_for_thread_; }; SignalHandler* SignalHandler::handler_ = nullptr; -thread_local bool SignalHandler::disabled_for_thread_ = false; // Launches a single use handler to snapshot this process. class LaunchAtCrashHandler : public SignalHandler { @@ -664,7 +709,7 @@ void CrashpadClient::DumpWithoutCrash(NativeCPUContext* context) { // static void CrashpadClient::CrashWithoutDump(const std::string& message) { - SignalHandler::DisableForThread(); + SignalHandler::Disable(); LOG(FATAL) << message; }