linux: handle multi-threaded crashes

Handle multiple simultaneous crashes among threads by having
the first crashing thread set an atomic flag and subsequently crashing
threads check the flag before requesting a dump. If a dump has already
been requested, the threads pause on a futex with a timeout in case the
crashing thread crashes again or otherwise fails to WakeThreads().

The thread_local disabled_for_thread_ is removed and combined with this
flag because accessing thread_locals produces undefined behavior in
signal handlers.

Bug:crashpad:384, chromium:861730
Change-Id: I83bce36e1010d0635ba8aeac937e150c43a4166f
Reviewed-on: https://chromium-review.googlesource.com/c/crashpad/crashpad/+/3403017
Reviewed-by: Mark Mentovai <mark@chromium.org>
Commit-Queue: Joshua Peraza <jperaza@chromium.org>
This commit is contained in:
Joshua Peraza 2022-01-20 09:45:58 -08:00 committed by Crashpad LUCI CQ
parent 667424894f
commit fd732953ce

View File

@ -16,6 +16,8 @@
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <linux/futex.h>
#include <pthread.h>
#include <stdlib.h>
#include <sys/mman.h>
@ -137,10 +139,14 @@ class SignalHandler {
// handler has been installed.
static SignalHandler* Get() { return handler_; }
// Disables any installed Crashpad signal handler for the calling thread. If a
// crash signal is received, any previously installed (non-Crashpad) signal
// handler will be restored and the signal reraised.
static void DisableForThread() { disabled_for_thread_ = true; }
// Disables any installed Crashpad signal handler. If a crash signal is
// received, any previously installed (non-Crashpad) signal handler will be
// restored and the signal reraised.
static void Disable() {
if (!handler_->disabled_.test_and_set()) {
handler_->WakeThreads();
}
}
void SetFirstChanceHandler(CrashpadClient::FirstChanceHandler handler) {
first_chance_handler_ = handler;
@ -183,6 +189,9 @@ class SignalHandler {
virtual void HandleCrashImpl() = 0;
private:
static constexpr int32_t kDumpNotDone = 0;
static constexpr int32_t kDumpDone = 1;
// The signal handler installed at OS-level.
static void HandleOrReraiseSignal(int signo,
siginfo_t* siginfo,
@ -193,24 +202,60 @@ class SignalHandler {
return;
}
if (!handler_->disabled_for_thread_) {
// Only handle the first fatal signal observed. If another thread receives a
// crash signal, it waits for the first dump to complete instead of
// requesting another.
if (!handler_->disabled_.test_and_set()) {
handler_->HandleCrash(signo, siginfo, context);
handler_->WakeThreads();
} else {
// Processes on Android normally have several chained signal handlers that
// co-operate to report crashes. e.g. WebView will have this signal
// handler installed, the app embedding WebView may have a signal handler
// installed, and Bionic will have a signal handler. Each signal handler
// runs in succession, possibly managed by libsigchain. This wait is
// intended to avoid ill-effects from multiple signal handlers from
// different layers (possibly all trying to use ptrace()) from running
// simultaneously. It does not block forever so that in most conditions,
// those signal handlers will still have a chance to run and ensures
// process termination in case the first crashing thread crashes again in
// its signal handler. Though less typical, this situation also occurs on
// other Linuxes, e.g. to produce in-process stack traces for debug
// builds.
handler_->WaitForDumpDone();
}
Signals::RestoreHandlerAndReraiseSignalOnReturn(
siginfo, handler_->old_actions_.ActionForSignal(signo));
}
void WaitForDumpDone() {
kernel_timespec timeout;
timeout.tv_sec = 5;
timeout.tv_nsec = 0;
sys_futex(&dump_done_futex_,
FUTEX_WAIT_PRIVATE,
kDumpNotDone,
&timeout,
nullptr,
0);
}
void WakeThreads() {
dump_done_futex_ = kDumpDone;
sys_futex(
&dump_done_futex_, FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
}
Signals::OldActions old_actions_ = {};
ExceptionInformation exception_information_ = {};
CrashpadClient::FirstChanceHandler first_chance_handler_ = nullptr;
int32_t dump_done_futex_ = kDumpNotDone;
std::atomic_flag disabled_ = ATOMIC_FLAG_INIT;
static SignalHandler* handler_;
static thread_local bool disabled_for_thread_;
};
SignalHandler* SignalHandler::handler_ = nullptr;
thread_local bool SignalHandler::disabled_for_thread_ = false;
// Launches a single use handler to snapshot this process.
class LaunchAtCrashHandler : public SignalHandler {
@ -664,7 +709,7 @@ void CrashpadClient::DumpWithoutCrash(NativeCPUContext* context) {
// static
void CrashpadClient::CrashWithoutDump(const std::string& message) {
SignalHandler::DisableForThread();
SignalHandler::Disable();
LOG(FATAL) << message;
}