From 90054edf6202285fc3f1d0e0912f3320b61748b1 Mon Sep 17 00:00:00 2001 From: Mark Mentovai Date: Wed, 4 Oct 2017 11:40:23 -0400 Subject: [PATCH] win: De-flake hanging_program.exe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hanging_program.exe is used by crash_other_program.exe, which is in turn used by end_to_end_test.py. It hangs by loading loader_lock_dll.dll, which squats in its entry point function while the loader lock is held. hanging_program.exe needs to do some work in its Thread1() before the loader lock is taken (a SetThreadPriority() call), and needs to do some work in its main thread once the loader lock is held (it needs to signal crash_other_program.exe that it’s successfully wedged itself). Previously, proper synchronization was not provided. A 1-second Sleep() was used to wait for the loader lock to be taken. Thread1() pre-work was only achieved before the loader lock was taken by sheer luck. Things didn’t always work out so nicely. This uses an event handle to provide synchronization. An environment variable is used to pass the handle to loader_lock_dll.dll, because there aren’t many better options available. This eliminates both flake and the unnecessary 1-second delay in hanging_program.exe, and since this program runs twice during end_to_end_test.py, it improves that test’s runtime by 2 seconds. Bug: crashpad:197 Test: end_to_end_test.py Change-Id: Ib9883215ef96bed7571464cc68e09b6ab6310ae6 Reviewed-on: https://chromium-review.googlesource.com/700076 Commit-Queue: Mark Mentovai Reviewed-by: Leonard Mosescu Reviewed-by: Scott Graham --- handler/win/hanging_program.cc | 85 ++++++++++++++++++++++++------ handler/win/loader_lock_dll.cc | 92 ++++++++++++++++++++++++++++++++- snapshot/win/end_to_end_test.py | 5 +- util/win/ntstatus_logging.cc | 5 ++ 4 files changed, 167 insertions(+), 20 deletions(-) diff --git a/handler/win/hanging_program.cc b/handler/win/hanging_program.cc index 63876634..b09d94a8 100644 --- a/handler/win/hanging_program.cc +++ b/handler/win/hanging_program.cc @@ -18,40 +18,62 @@ #include "base/debug/alias.h" #include "base/logging.h" #include "base/macros.h" +#include "base/strings/stringprintf.h" +#include "base/strings/utf_string_conversions.h" #include "client/crashpad_client.h" #include "client/crashpad_info.h" -DWORD WINAPI Thread1(LPVOID dummy) { - // We set the thread priority up by one as a hacky way to signal to the other - // test program that this is the thread we want to dump. - SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL); +namespace { + +DWORD WINAPI Thread1(LPVOID context) { + HANDLE event = context; + + // Increase the thread priority as a hacky way to signal to + // crash_other_program.exe that this is the thread to dump. + PCHECK(SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL)); + + // Let the main thread proceed. + PCHECK(SetEvent(event)); + Sleep(INFINITE); + + NOTREACHED(); return 0; } DWORD WINAPI Thread2(LPVOID dummy) { Sleep(INFINITE); + NOTREACHED(); return 0; } -DWORD WINAPI Thread3(LPVOID dummy) { +DWORD WINAPI Thread3(LPVOID context) { + // This is a convenient way to pass the event handle to loader_lock_dll.dll. + HANDLE event = context; + PCHECK(SetEnvironmentVariable( + L"CRASHPAD_TEST_DLL_EVENT", + base::UTF8ToUTF16(base::StringPrintf("%p", event)).c_str())); + HMODULE dll = LoadLibrary(L"loader_lock_dll.dll"); if (!dll) - PLOG(ERROR) << "LoadLibrary"; + PLOG(FATAL) << "LoadLibrary"; // This call is not expected to return. if (!FreeLibrary(dll)) - PLOG(ERROR) << "FreeLibrary"; + PLOG(FATAL) << "FreeLibrary"; + NOTREACHED(); return 0; } +} // namespace + int wmain(int argc, wchar_t* argv[]) { crashpad::CrashpadClient client; if (argc == 2) { if (!client.SetHandlerIPCPipe(argv[1])) { - LOG(ERROR) << "SetHandler"; + LOG(ERROR) << "SetHandlerIPCPipe"; return EXIT_FAILURE; } } else { @@ -64,21 +86,50 @@ int wmain(int argc, wchar_t* argv[]) { crashpad::CrashpadInfo::GetCrashpadInfo(); base::debug::Alias(crashpad_info); + HANDLE event = CreateEvent(nullptr, + false, // bManualReset + false, + nullptr); + if (!event) { + PLOG(ERROR) << "CreateEvent"; + return EXIT_FAILURE; + } + HANDLE threads[3]; - threads[0] = CreateThread(nullptr, 0, Thread1, nullptr, 0, nullptr); + threads[0] = CreateThread(nullptr, 0, Thread1, event, 0, nullptr); + threads[1] = CreateThread(nullptr, 0, Thread2, nullptr, 0, nullptr); - threads[2] = CreateThread(nullptr, 0, Thread3, nullptr, 0, nullptr); - // Our whole process is going to hang when the loaded DLL hangs in its - // DllMain(), so we can't signal to our parent that we're "ready". So, use a - // hokey delay of 1s after we spawn the threads, and hope that we make it to - // the FreeLibrary call by then. - Sleep(1000); + // Wait for Thread1() to complete its work and reach its Sleep() before + // starting the next thread, which will hold the loader lock and potentially + // block any further progress. + if (WaitForSingleObject(event, INFINITE) != WAIT_OBJECT_0) { + PLOG(ERROR) << "WaitForSingleObject"; + return EXIT_FAILURE; + } + // Recycle the event handle, which was automatically reset. + threads[2] = CreateThread(nullptr, 0, Thread3, event, 0, nullptr); + + // Wait for loader_lock_dll.dll to signal that the loader lock is held and + // won’t be released. + if (WaitForSingleObject(event, INFINITE) != WAIT_OBJECT_0) { + PLOG(ERROR) << "WaitForSingleObject"; + return EXIT_FAILURE; + } + + // Signal to the parent that everything is ready. fprintf(stdout, " "); fflush(stdout); - WaitForMultipleObjects(ARRAYSIZE(threads), threads, true, INFINITE); + // This is not expected to return. + DWORD count = + WaitForMultipleObjects(arraysize(threads), threads, true, INFINITE); + if (count == WAIT_FAILED) { + PLOG(ERROR) << "WaitForMultipleObjects"; + } else { + LOG(ERROR) << "WaitForMultipleObjects: " << count; + } - return 0; + return EXIT_FAILURE; } diff --git a/handler/win/loader_lock_dll.cc b/handler/win/loader_lock_dll.cc index 0a05af3d..cfa098b2 100644 --- a/handler/win/loader_lock_dll.cc +++ b/handler/win/loader_lock_dll.cc @@ -12,8 +12,70 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include +#include #include +namespace { + +// This custom logging function is to avoid taking a dependency on base in this +// standalone DLL. Don’t call this directly, use the LOG_FATAL() and +// PLOG_FATAL() macros below. +__declspec(noreturn) void LogFatal(const char* file, + int line, + bool get_last_error, + const char* format, + ...) { + DWORD last_error = ERROR_SUCCESS; + if (get_last_error) { + last_error = GetLastError(); + } + + char fname[_MAX_FNAME]; + char ext[_MAX_EXT]; + if (_splitpath_s(file, + nullptr, + 0, + nullptr, + 0, + fname, + sizeof(fname), + ext, + sizeof(ext)) == 0) { + fprintf(stderr, "%s%s", fname, ext); + } else { + fputs(file, stderr); + } + fprintf(stderr, ":%d: ", line); + + va_list va; + va_start(va, format); + vfprintf(stderr, format, va); + va_end(va); + + if (get_last_error) { + fprintf(stderr, ": error %u", last_error); + } + + fputs("\n", stderr); + fflush(stderr); + + TerminateProcess(GetCurrentProcess(), 1); + __fastfail(FAST_FAIL_FATAL_APP_EXIT); +} + +#define LOG_FATAL(...) \ + do { \ + LogFatal(__FILE__, __LINE__, false, __VA_ARGS__); \ + } while (false) +#define PLOG_FATAL(...) \ + do { \ + LogFatal(__FILE__, __LINE__, true, __VA_ARGS__); \ + } while (false) + +} // namespace + // This program intentionally blocks in DllMain which is executed with the // loader lock locked. This allows us to test that // CrashpadClient::DumpAndCrashTargetProcess() can still dump the target in this @@ -21,8 +83,36 @@ BOOL WINAPI DllMain(HINSTANCE, DWORD reason, LPVOID) { switch (reason) { case DLL_PROCESS_DETACH: - case DLL_THREAD_DETACH: + case DLL_THREAD_DETACH: { + // Recover the event handle stashed by the main executable. + static constexpr size_t kEventStringSize = 19; + wchar_t event_string[kEventStringSize]; + SetLastError(ERROR_SUCCESS); + DWORD size = GetEnvironmentVariable( + L"CRASHPAD_TEST_DLL_EVENT", event_string, kEventStringSize); + if (size == 0 && GetLastError() != ERROR_SUCCESS) { + PLOG_FATAL("GetEnvironmentVariable"); + } + if (size == 0 || size >= kEventStringSize) { + LOG_FATAL("GetEnvironmentVariable: size %u", size); + } + + HANDLE event; + int converted = swscanf(event_string, L"%p", &event); + if (converted != 1) { + LOG_FATAL("swscanf: converted %d", converted); + } + + // Let the main thread know that the loader lock is and will remain held. + if (!SetEvent(event)) { + PLOG_FATAL("SetEvent"); + } + Sleep(INFINITE); + + break; + } } + return TRUE; } diff --git a/snapshot/win/end_to_end_test.py b/snapshot/win/end_to_end_test.py index 5afdac38..7fda9445 100755 --- a/snapshot/win/end_to_end_test.py +++ b/snapshot/win/end_to_end_test.py @@ -123,7 +123,7 @@ def GetDumpFromProgram( test_database] + list(args)) if exit_code != expect_exit_code: - raise CalledProcessError(exit_code, executable_name) + raise subprocess.CalledProcessError(exit_code, executable_name) out = subprocess.check_output([ os.path.join(out_dir, 'crashpad_database_util.exe'), @@ -368,7 +368,8 @@ def RunTests(cdb_path, out.Check('Unknown exception - code deadbea7', 'other program dump exception code') out.Check('!Sleep', 'other program reasonable location') - out.Check('hanging_program!Thread1', 'other program dump right thread') + out.Check("hanging_program!`anonymous namespace'::Thread1", + 'other program dump right thread') count = 0 while True: match_obj = out.Find(r'Id.*Suspend: (\d+) ') diff --git a/util/win/ntstatus_logging.cc b/util/win/ntstatus_logging.cc index e06fe248..442e49f4 100644 --- a/util/win/ntstatus_logging.cc +++ b/util/win/ntstatus_logging.cc @@ -32,6 +32,11 @@ std::string FormatNtstatus(DWORD ntstatus) { arraysize(msgbuf), nullptr); if (len) { + // Most system messages end in a period and a space. Remove the space if + // it’s there, because ~NtstatusLogMessage() includes one. + if (len >= 1 && msgbuf[len - 1] == ' ') { + msgbuf[len - 1] = '\0'; + } return msgbuf; } else { return base::StringPrintf("",