2015-04-29 18:53:47 -07:00
|
|
|
|
// Copyright 2015 The Crashpad Authors. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
|
//
|
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
//
|
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
|
|
#include "client/crashpad_client.h"
|
|
|
|
|
|
|
|
|
|
#include <windows.h>
|
2018-12-03 09:28:09 -05:00
|
|
|
|
|
2016-11-17 14:00:21 -08:00
|
|
|
|
#include <signal.h>
|
2016-01-06 12:22:50 -05:00
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <string.h>
|
2015-04-29 18:53:47 -07:00
|
|
|
|
|
2016-04-25 12:13:07 -07:00
|
|
|
|
#include <memory>
|
|
|
|
|
|
2015-08-14 15:22:09 -07:00
|
|
|
|
#include "base/atomicops.h"
|
2015-04-29 18:53:47 -07:00
|
|
|
|
#include "base/logging.h"
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
#include "base/macros.h"
|
2015-11-06 16:55:31 -05:00
|
|
|
|
#include "base/scoped_generic.h"
|
2015-08-14 15:22:09 -07:00
|
|
|
|
#include "base/strings/string16.h"
|
2015-11-02 13:59:36 -05:00
|
|
|
|
#include "base/strings/stringprintf.h"
|
2015-08-14 15:22:09 -07:00
|
|
|
|
#include "base/strings/utf_string_conversions.h"
|
2015-09-25 13:45:32 -07:00
|
|
|
|
#include "base/synchronization/lock.h"
|
2015-08-14 15:22:09 -07:00
|
|
|
|
#include "util/file/file_io.h"
|
2018-02-08 16:25:22 -08:00
|
|
|
|
#include "util/misc/capture_context.h"
|
2017-04-28 10:08:35 -04:00
|
|
|
|
#include "util/misc/from_pointer_cast.h"
|
2016-10-21 13:08:18 -07:00
|
|
|
|
#include "util/misc/random_string.h"
|
2016-04-22 10:03:59 -07:00
|
|
|
|
#include "util/win/address_types.h"
|
2015-11-02 13:59:36 -05:00
|
|
|
|
#include "util/win/command_line.h"
|
2018-12-12 12:58:24 -08:00
|
|
|
|
#include "util/win/context_wrappers.h"
|
2015-10-16 14:55:14 -07:00
|
|
|
|
#include "util/win/critical_section_with_debug_info.h"
|
2015-11-06 16:55:31 -05:00
|
|
|
|
#include "util/win/get_function.h"
|
2015-11-05 14:00:26 -05:00
|
|
|
|
#include "util/win/handle.h"
|
2016-10-21 13:08:18 -07:00
|
|
|
|
#include "util/win/initial_client_data.h"
|
2019-12-03 10:20:34 -08:00
|
|
|
|
#include "util/win/loader_lock.h"
|
2016-04-22 10:03:59 -07:00
|
|
|
|
#include "util/win/nt_internals.h"
|
|
|
|
|
#include "util/win/ntstatus_logging.h"
|
|
|
|
|
#include "util/win/process_info.h"
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
#include "util/win/registration_protocol_win.h"
|
win: Wrap TerminateProcess() to accept cdecl patches on x86
TerminateProcess(), like most of the Windows API, is declared WINAPI,
which is __stdcall on 32-bit x86. That means that the callee,
TerminateProcess() itself, is responsible for cleaning up parameters on
the stack on return. In https://crashpad.chromium.org/bug/179, crashes
in ExceptionHandlerServer::OnNonCrashDumpEvent() were observed in ways
that make it evident that TerminateProcess() has been patched with a
__cdecl routine. The crucial difference between __stdcall and __cdecl is
that the caller is responsible for stack parameter cleanup in __cdecl.
The mismatch means that nobody cleans parameters from the stack, and the
stack pointer has an unexpected value, which in the case of the Crashpad
handler crash, results in TerminateProcess()’s second argument
erroneously being used as the lock address in the call to
ReleaseSRWLockExclusive() or LeaveCriticalSection().
As a workaround, on 32-bit x86, call through SafeTerminateProcess(), a
custom assembly routine that’s compatible with either __stdcall or
__cdecl implementations of TerminateProcess() by not trusting the value
of the stack pointer on return from that function. Instead, the stack
pointer is restored directly from the frame pointer.
Bug: crashpad:179
Test: crashpad_util_test SafeTerminateProcess.*, others
Change-Id: If9508f4eb7631020ea69ddbbe4a22eb335cdb325
Reviewed-on: https://chromium-review.googlesource.com/481180
Reviewed-by: Scott Graham <scottmg@chromium.org>
2017-04-19 13:22:08 -04:00
|
|
|
|
#include "util/win/safe_terminate_process.h"
|
2016-04-22 10:03:59 -07:00
|
|
|
|
#include "util/win/scoped_process_suspend.h"
|
2016-09-28 20:18:39 -07:00
|
|
|
|
#include "util/win/termination_codes.h"
|
2016-10-21 13:08:18 -07:00
|
|
|
|
#include "util/win/xp_compat.h"
|
|
|
|
|
|
|
|
|
|
namespace crashpad {
|
2015-04-29 18:53:47 -07:00
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
2015-09-25 13:45:32 -07:00
|
|
|
|
// This handle is never closed. This is used to signal to the server that a dump
|
|
|
|
|
// should be taken in the event of a crash.
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
HANDLE g_signal_exception = INVALID_HANDLE_VALUE;
|
2015-04-29 18:53:47 -07:00
|
|
|
|
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
// Where we store the exception information that the crash handler reads.
|
2016-10-21 13:08:18 -07:00
|
|
|
|
ExceptionInformation g_crash_exception_information;
|
2015-09-25 13:45:32 -07:00
|
|
|
|
|
|
|
|
|
// These handles are never closed. g_signal_non_crash_dump is used to signal to
|
|
|
|
|
// the server to take a dump (not due to an exception), and the server will
|
|
|
|
|
// signal g_non_crash_dump_done when the dump is completed.
|
|
|
|
|
HANDLE g_signal_non_crash_dump = INVALID_HANDLE_VALUE;
|
|
|
|
|
HANDLE g_non_crash_dump_done = INVALID_HANDLE_VALUE;
|
|
|
|
|
|
|
|
|
|
// Guards multiple simultaneous calls to DumpWithoutCrash(). This is leaked.
|
|
|
|
|
base::Lock* g_non_crash_dump_lock;
|
|
|
|
|
|
|
|
|
|
// Where we store a pointer to the context information when taking a non-crash
|
|
|
|
|
// dump.
|
2016-10-21 13:08:18 -07:00
|
|
|
|
ExceptionInformation g_non_crash_exception_information;
|
2015-08-14 15:22:09 -07:00
|
|
|
|
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
enum class StartupState : int {
|
2018-12-03 09:28:09 -05:00
|
|
|
|
kNotReady = 0, // This must be value 0 because it is the initial value of a
|
|
|
|
|
// global AtomicWord.
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
kSucceeded = 1, // The CreateProcess() for the handler succeeded.
|
2018-12-03 09:28:09 -05:00
|
|
|
|
kFailed = 2, // The handler failed to start.
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// This is a tri-state of type StartupState. It starts at 0 == kNotReady, and
|
|
|
|
|
// when the handler is known to have started successfully, or failed to start
|
|
|
|
|
// the value will be updated. The unhandled exception filter will not proceed
|
|
|
|
|
// until one of those two cases happens.
|
|
|
|
|
base::subtle::AtomicWord g_handler_startup_state;
|
|
|
|
|
|
2015-10-15 13:18:08 -07:00
|
|
|
|
// A CRITICAL_SECTION initialized with
|
|
|
|
|
// RTL_CRITICAL_SECTION_FLAG_FORCE_DEBUG_INFO to force it to be allocated with a
|
|
|
|
|
// valid .DebugInfo field. The address of this critical section is given to the
|
|
|
|
|
// handler. All critical sections with debug info are linked in a doubly-linked
|
|
|
|
|
// list, so this allows the handler to capture all of them.
|
|
|
|
|
CRITICAL_SECTION g_critical_section_with_debug_info;
|
|
|
|
|
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
void SetHandlerStartupState(StartupState state) {
|
2018-12-03 09:28:09 -05:00
|
|
|
|
DCHECK(state == StartupState::kSucceeded || state == StartupState::kFailed);
|
2019-08-07 11:02:57 -04:00
|
|
|
|
base::subtle::Release_Store(&g_handler_startup_state,
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
static_cast<base::subtle::AtomicWord>(state));
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-03 09:38:14 -07:00
|
|
|
|
StartupState BlockUntilHandlerStartedOrFailed() {
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
// Wait until we know the handler has either succeeded or failed to start.
|
|
|
|
|
base::subtle::AtomicWord startup_state;
|
|
|
|
|
while (
|
2019-08-07 11:02:57 -04:00
|
|
|
|
(startup_state = base::subtle::Acquire_Load(&g_handler_startup_state)) ==
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
static_cast<int>(StartupState::kNotReady)) {
|
|
|
|
|
Sleep(1);
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-03 09:38:14 -07:00
|
|
|
|
return static_cast<StartupState>(startup_state);
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-07 13:42:50 -08:00
|
|
|
|
#if defined(ADDRESS_SANITIZER)
|
|
|
|
|
extern "C" LONG __asan_unhandled_exception_filter(EXCEPTION_POINTERS* info);
|
|
|
|
|
#endif
|
|
|
|
|
|
2016-11-03 09:38:14 -07:00
|
|
|
|
LONG WINAPI UnhandledExceptionHandler(EXCEPTION_POINTERS* exception_pointers) {
|
2016-12-07 13:42:50 -08:00
|
|
|
|
#if defined(ADDRESS_SANITIZER)
|
|
|
|
|
// In ASan builds, delegate to the ASan exception filter.
|
|
|
|
|
LONG status = __asan_unhandled_exception_filter(exception_pointers);
|
|
|
|
|
if (status != EXCEPTION_CONTINUE_SEARCH)
|
|
|
|
|
return status;
|
|
|
|
|
#endif
|
|
|
|
|
|
2016-11-03 09:38:14 -07:00
|
|
|
|
if (BlockUntilHandlerStartedOrFailed() == StartupState::kFailed) {
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
// If we know for certain that the handler has failed to start, then abort
|
|
|
|
|
// here, rather than trying to signal to a handler that will never arrive,
|
|
|
|
|
// and then sleeping unnecessarily.
|
|
|
|
|
LOG(ERROR) << "crash server failed to launch, self-terminating";
|
win: Wrap TerminateProcess() to accept cdecl patches on x86
TerminateProcess(), like most of the Windows API, is declared WINAPI,
which is __stdcall on 32-bit x86. That means that the callee,
TerminateProcess() itself, is responsible for cleaning up parameters on
the stack on return. In https://crashpad.chromium.org/bug/179, crashes
in ExceptionHandlerServer::OnNonCrashDumpEvent() were observed in ways
that make it evident that TerminateProcess() has been patched with a
__cdecl routine. The crucial difference between __stdcall and __cdecl is
that the caller is responsible for stack parameter cleanup in __cdecl.
The mismatch means that nobody cleans parameters from the stack, and the
stack pointer has an unexpected value, which in the case of the Crashpad
handler crash, results in TerminateProcess()’s second argument
erroneously being used as the lock address in the call to
ReleaseSRWLockExclusive() or LeaveCriticalSection().
As a workaround, on 32-bit x86, call through SafeTerminateProcess(), a
custom assembly routine that’s compatible with either __stdcall or
__cdecl implementations of TerminateProcess() by not trusting the value
of the stack pointer on return from that function. Instead, the stack
pointer is restored directly from the frame pointer.
Bug: crashpad:179
Test: crashpad_util_test SafeTerminateProcess.*, others
Change-Id: If9508f4eb7631020ea69ddbbe4a22eb335cdb325
Reviewed-on: https://chromium-review.googlesource.com/481180
Reviewed-by: Scott Graham <scottmg@chromium.org>
2017-04-19 13:22:08 -04:00
|
|
|
|
SafeTerminateProcess(GetCurrentProcess(), kTerminationCodeCrashNoDump);
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
return EXCEPTION_CONTINUE_SEARCH;
|
|
|
|
|
}
|
2016-11-03 09:38:14 -07:00
|
|
|
|
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
// Otherwise, we know the handler startup has succeeded, and we can continue.
|
|
|
|
|
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
// Tracks whether a thread has already entered UnhandledExceptionHandler.
|
|
|
|
|
static base::subtle::AtomicWord have_crashed;
|
|
|
|
|
|
2015-08-14 15:22:09 -07:00
|
|
|
|
// This is a per-process handler. While this handler is being invoked, other
|
|
|
|
|
// threads are still executing as usual, so multiple threads could enter at
|
|
|
|
|
// the same time. Because we're in a crashing state, we shouldn't be doing
|
|
|
|
|
// anything that might cause allocations, call into kernel mode, etc. So, we
|
|
|
|
|
// don't want to take a critical section here to avoid simultaneous access to
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
// the global exception pointers in ExceptionInformation. Because the crash
|
|
|
|
|
// handler will record all threads, it's fine to simply have the second and
|
|
|
|
|
// subsequent entrants block here. They will soon be suspended by the crash
|
|
|
|
|
// handler, and then the entire process will be terminated below. This means
|
|
|
|
|
// that we won't save the exception pointers from the second and further
|
|
|
|
|
// crashes, but contention here is very unlikely, and we'll still have a stack
|
|
|
|
|
// that's blocked at this location.
|
|
|
|
|
if (base::subtle::Barrier_AtomicIncrement(&have_crashed, 1) > 1) {
|
2015-08-18 12:25:19 -07:00
|
|
|
|
SleepEx(INFINITE, false);
|
2015-08-14 15:22:09 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Otherwise, we're the first thread, so record the exception pointer and
|
|
|
|
|
// signal the crash handler.
|
2015-09-25 13:45:32 -07:00
|
|
|
|
g_crash_exception_information.thread_id = GetCurrentThreadId();
|
|
|
|
|
g_crash_exception_information.exception_pointers =
|
2017-04-28 10:08:35 -04:00
|
|
|
|
FromPointerCast<WinVMAddress>(exception_pointers);
|
2015-04-29 18:53:47 -07:00
|
|
|
|
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
// Now signal the crash server, which will take a dump and then terminate us
|
|
|
|
|
// when it's complete.
|
|
|
|
|
SetEvent(g_signal_exception);
|
2015-08-14 15:22:09 -07:00
|
|
|
|
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
// Time to wait for the handler to create a dump.
|
2017-07-25 19:15:48 -04:00
|
|
|
|
constexpr DWORD kMillisecondsUntilTerminate = 60 * 1000;
|
2015-08-14 15:22:09 -07:00
|
|
|
|
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
// Sleep for a while to allow it to process us. Eventually, we terminate
|
|
|
|
|
// ourselves in case the crash server is gone, so that we don't leave zombies
|
|
|
|
|
// around. This would ideally never happen.
|
|
|
|
|
Sleep(kMillisecondsUntilTerminate);
|
|
|
|
|
|
|
|
|
|
LOG(ERROR) << "crash server did not respond, self-terminating";
|
|
|
|
|
|
win: Wrap TerminateProcess() to accept cdecl patches on x86
TerminateProcess(), like most of the Windows API, is declared WINAPI,
which is __stdcall on 32-bit x86. That means that the callee,
TerminateProcess() itself, is responsible for cleaning up parameters on
the stack on return. In https://crashpad.chromium.org/bug/179, crashes
in ExceptionHandlerServer::OnNonCrashDumpEvent() were observed in ways
that make it evident that TerminateProcess() has been patched with a
__cdecl routine. The crucial difference between __stdcall and __cdecl is
that the caller is responsible for stack parameter cleanup in __cdecl.
The mismatch means that nobody cleans parameters from the stack, and the
stack pointer has an unexpected value, which in the case of the Crashpad
handler crash, results in TerminateProcess()’s second argument
erroneously being used as the lock address in the call to
ReleaseSRWLockExclusive() or LeaveCriticalSection().
As a workaround, on 32-bit x86, call through SafeTerminateProcess(), a
custom assembly routine that’s compatible with either __stdcall or
__cdecl implementations of TerminateProcess() by not trusting the value
of the stack pointer on return from that function. Instead, the stack
pointer is restored directly from the frame pointer.
Bug: crashpad:179
Test: crashpad_util_test SafeTerminateProcess.*, others
Change-Id: If9508f4eb7631020ea69ddbbe4a22eb335cdb325
Reviewed-on: https://chromium-review.googlesource.com/481180
Reviewed-by: Scott Graham <scottmg@chromium.org>
2017-04-19 13:22:08 -04:00
|
|
|
|
SafeTerminateProcess(GetCurrentProcess(), kTerminationCodeCrashNoDump);
|
2015-08-14 15:22:09 -07:00
|
|
|
|
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
return EXCEPTION_CONTINUE_SEARCH;
|
2015-08-14 15:22:09 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-11-17 14:00:21 -08:00
|
|
|
|
void HandleAbortSignal(int signum) {
|
|
|
|
|
DCHECK_EQ(signum, SIGABRT);
|
|
|
|
|
|
|
|
|
|
CONTEXT context;
|
|
|
|
|
CaptureContext(&context);
|
|
|
|
|
|
|
|
|
|
EXCEPTION_RECORD record = {};
|
|
|
|
|
record.ExceptionCode = STATUS_FATAL_APP_EXIT;
|
|
|
|
|
record.ExceptionFlags = EXCEPTION_NONCONTINUABLE;
|
2018-12-12 12:58:24 -08:00
|
|
|
|
record.ExceptionAddress = ProgramCounterFromCONTEXT(&context);
|
2016-11-17 14:00:21 -08:00
|
|
|
|
|
|
|
|
|
EXCEPTION_POINTERS exception_pointers;
|
|
|
|
|
exception_pointers.ContextRecord = &context;
|
|
|
|
|
exception_pointers.ExceptionRecord = &record;
|
|
|
|
|
|
|
|
|
|
UnhandledExceptionHandler(&exception_pointers);
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-02 13:59:36 -05:00
|
|
|
|
std::wstring FormatArgumentString(const std::string& name,
|
|
|
|
|
const std::wstring& value) {
|
|
|
|
|
return std::wstring(L"--") + base::UTF8ToUTF16(name) + L"=" + value;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-06 16:55:31 -05:00
|
|
|
|
struct ScopedProcThreadAttributeListTraits {
|
2016-10-21 13:08:18 -07:00
|
|
|
|
static PPROC_THREAD_ATTRIBUTE_LIST InvalidValue() { return nullptr; }
|
2015-11-06 16:55:31 -05:00
|
|
|
|
|
|
|
|
|
static void Free(PPROC_THREAD_ATTRIBUTE_LIST proc_thread_attribute_list) {
|
|
|
|
|
// This is able to use GET_FUNCTION_REQUIRED() instead of GET_FUNCTION()
|
|
|
|
|
// because it will only be called if InitializeProcThreadAttributeList() and
|
|
|
|
|
// UpdateProcThreadAttribute() are present.
|
|
|
|
|
static const auto delete_proc_thread_attribute_list =
|
|
|
|
|
GET_FUNCTION_REQUIRED(L"kernel32.dll", ::DeleteProcThreadAttributeList);
|
|
|
|
|
delete_proc_thread_attribute_list(proc_thread_attribute_list);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
using ScopedProcThreadAttributeList =
|
|
|
|
|
base::ScopedGeneric<PPROC_THREAD_ATTRIBUTE_LIST,
|
|
|
|
|
ScopedProcThreadAttributeListTraits>;
|
|
|
|
|
|
2015-11-24 16:36:27 -08:00
|
|
|
|
bool IsInheritableHandle(HANDLE handle) {
|
|
|
|
|
if (!handle || handle == INVALID_HANDLE_VALUE)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// File handles (FILE_TYPE_DISK) and pipe handles (FILE_TYPE_PIPE) are known
|
|
|
|
|
// to be inheritable. Console handles (FILE_TYPE_CHAR) are not inheritable via
|
|
|
|
|
// PROC_THREAD_ATTRIBUTE_HANDLE_LIST. See
|
|
|
|
|
// https://crashpad.chromium.org/bug/77.
|
|
|
|
|
DWORD handle_type = GetFileType(handle);
|
|
|
|
|
return handle_type == FILE_TYPE_DISK || handle_type == FILE_TYPE_PIPE;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-19 11:25:52 -08:00
|
|
|
|
// Adds |handle| to |handle_list| if it appears valid, and is not already in
|
|
|
|
|
// |handle_list|.
|
2015-11-06 16:55:31 -05:00
|
|
|
|
//
|
|
|
|
|
// Invalid handles (including INVALID_HANDLE_VALUE and null handles) cannot be
|
|
|
|
|
// added to a PPROC_THREAD_ATTRIBUTE_LIST’s PROC_THREAD_ATTRIBUTE_HANDLE_LIST.
|
|
|
|
|
// If INVALID_HANDLE_VALUE appears, CreateProcess() will fail with
|
|
|
|
|
// ERROR_INVALID_PARAMETER. If a null handle appears, the child process will
|
|
|
|
|
// silently not inherit any handles.
|
|
|
|
|
//
|
|
|
|
|
// Use this function to add handles with uncertain validities.
|
2015-11-24 16:36:27 -08:00
|
|
|
|
void AddHandleToListIfValidAndInheritable(std::vector<HANDLE>* handle_list,
|
|
|
|
|
HANDLE handle) {
|
2015-11-19 11:25:52 -08:00
|
|
|
|
// There doesn't seem to be any documentation of this, but if there's a handle
|
|
|
|
|
// duplicated in this list, CreateProcess() fails with
|
|
|
|
|
// ERROR_INVALID_PARAMETER.
|
2015-11-24 16:36:27 -08:00
|
|
|
|
if (IsInheritableHandle(handle) &&
|
2015-11-19 11:25:52 -08:00
|
|
|
|
std::find(handle_list->begin(), handle_list->end(), handle) ==
|
|
|
|
|
handle_list->end()) {
|
2015-11-06 16:55:31 -05:00
|
|
|
|
handle_list->push_back(handle);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-22 10:03:59 -07:00
|
|
|
|
void AddUint32(std::vector<unsigned char>* data_vector, uint32_t data) {
|
|
|
|
|
data_vector->push_back(static_cast<unsigned char>(data & 0xff));
|
|
|
|
|
data_vector->push_back(static_cast<unsigned char>((data & 0xff00) >> 8));
|
|
|
|
|
data_vector->push_back(static_cast<unsigned char>((data & 0xff0000) >> 16));
|
|
|
|
|
data_vector->push_back(static_cast<unsigned char>((data & 0xff000000) >> 24));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void AddUint64(std::vector<unsigned char>* data_vector, uint64_t data) {
|
|
|
|
|
AddUint32(data_vector, static_cast<uint32_t>(data & 0xffffffffULL));
|
|
|
|
|
AddUint32(data_vector,
|
|
|
|
|
static_cast<uint32_t>((data & 0xffffffff00000000ULL) >> 32));
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
//! \brief Creates a randomized pipe name to listen for client registrations
|
|
|
|
|
//! on and returns its name.
|
|
|
|
|
//!
|
|
|
|
|
//! \param[out] pipe_name The pipe name that will be listened on.
|
|
|
|
|
//! \param[out] pipe_handle The first pipe instance corresponding for the pipe.
|
|
|
|
|
void CreatePipe(std::wstring* pipe_name, ScopedFileHANDLE* pipe_instance) {
|
|
|
|
|
int tries = 5;
|
|
|
|
|
std::string pipe_name_base =
|
2017-08-02 14:16:26 -07:00
|
|
|
|
base::StringPrintf("\\\\.\\pipe\\crashpad_%lu_", GetCurrentProcessId());
|
2016-10-21 13:08:18 -07:00
|
|
|
|
do {
|
|
|
|
|
*pipe_name = base::UTF8ToUTF16(pipe_name_base + RandomString());
|
|
|
|
|
|
|
|
|
|
pipe_instance->reset(CreateNamedPipeInstance(*pipe_name, true));
|
|
|
|
|
|
|
|
|
|
// CreateNamedPipe() is documented as setting the error to
|
|
|
|
|
// ERROR_ACCESS_DENIED if FILE_FLAG_FIRST_PIPE_INSTANCE is specified and the
|
|
|
|
|
// pipe name is already in use. However it may set the error to other codes
|
|
|
|
|
// such as ERROR_PIPE_BUSY (if the pipe already exists and has reached its
|
|
|
|
|
// maximum instance count) or ERROR_INVALID_PARAMETER (if the pipe already
|
|
|
|
|
// exists and its attributes differ from those specified to
|
|
|
|
|
// CreateNamedPipe()). Some of these errors may be ambiguous: for example,
|
|
|
|
|
// ERROR_INVALID_PARAMETER may also occur if CreateNamedPipe() is called
|
|
|
|
|
// incorrectly even in the absence of an existing pipe by the same name.
|
|
|
|
|
// Rather than chasing down all of the possible errors that might indicate
|
|
|
|
|
// that a pipe name is already in use, retry up to a few times on any error.
|
|
|
|
|
} while (!pipe_instance->is_valid() && --tries);
|
|
|
|
|
|
|
|
|
|
PCHECK(pipe_instance->is_valid()) << "CreateNamedPipe";
|
2015-04-29 18:53:47 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
struct BackgroundHandlerStartThreadData {
|
|
|
|
|
BackgroundHandlerStartThreadData(
|
|
|
|
|
const base::FilePath& handler,
|
|
|
|
|
const base::FilePath& database,
|
|
|
|
|
const base::FilePath& metrics_dir,
|
|
|
|
|
const std::string& url,
|
|
|
|
|
const std::map<std::string, std::string>& annotations,
|
|
|
|
|
const std::vector<std::string>& arguments,
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
const std::wstring& ipc_pipe,
|
2016-10-21 13:08:18 -07:00
|
|
|
|
ScopedFileHANDLE ipc_pipe_handle)
|
|
|
|
|
: handler(handler),
|
|
|
|
|
database(database),
|
|
|
|
|
metrics_dir(metrics_dir),
|
|
|
|
|
url(url),
|
|
|
|
|
annotations(annotations),
|
|
|
|
|
arguments(arguments),
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
ipc_pipe(ipc_pipe),
|
2016-10-21 13:08:18 -07:00
|
|
|
|
ipc_pipe_handle(std::move(ipc_pipe_handle)) {}
|
|
|
|
|
|
|
|
|
|
base::FilePath handler;
|
|
|
|
|
base::FilePath database;
|
|
|
|
|
base::FilePath metrics_dir;
|
|
|
|
|
std::string url;
|
|
|
|
|
std::map<std::string, std::string> annotations;
|
|
|
|
|
std::vector<std::string> arguments;
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
std::wstring ipc_pipe;
|
2016-10-21 13:08:18 -07:00
|
|
|
|
ScopedFileHANDLE ipc_pipe_handle;
|
|
|
|
|
};
|
2015-11-02 13:59:36 -05:00
|
|
|
|
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
// Ensures that SetHandlerStartupState() is called on scope exit. Assumes
|
|
|
|
|
// failure, and on success, SetSuccessful() should be called.
|
|
|
|
|
class ScopedCallSetHandlerStartupState {
|
|
|
|
|
public:
|
|
|
|
|
ScopedCallSetHandlerStartupState() : successful_(false) {}
|
|
|
|
|
|
|
|
|
|
~ScopedCallSetHandlerStartupState() {
|
|
|
|
|
SetHandlerStartupState(successful_ ? StartupState::kSucceeded
|
|
|
|
|
: StartupState::kFailed);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void SetSuccessful() { successful_ = true; }
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
bool successful_;
|
|
|
|
|
|
|
|
|
|
DISALLOW_COPY_AND_ASSIGN(ScopedCallSetHandlerStartupState);
|
|
|
|
|
};
|
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
bool StartHandlerProcess(
|
|
|
|
|
std::unique_ptr<BackgroundHandlerStartThreadData> data) {
|
2019-12-03 10:20:34 -08:00
|
|
|
|
CHECK(!IsThreadInLoaderLock());
|
|
|
|
|
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
ScopedCallSetHandlerStartupState scoped_startup_state_caller;
|
|
|
|
|
|
2015-11-02 13:59:36 -05:00
|
|
|
|
std::wstring command_line;
|
2016-10-21 13:08:18 -07:00
|
|
|
|
AppendCommandLineArgument(data->handler.value(), &command_line);
|
|
|
|
|
for (const std::string& argument : data->arguments) {
|
2015-11-02 13:59:36 -05:00
|
|
|
|
AppendCommandLineArgument(base::UTF8ToUTF16(argument), &command_line);
|
|
|
|
|
}
|
2016-10-21 13:08:18 -07:00
|
|
|
|
if (!data->database.value().empty()) {
|
|
|
|
|
AppendCommandLineArgument(
|
|
|
|
|
FormatArgumentString("database", data->database.value()),
|
|
|
|
|
&command_line);
|
2015-11-02 13:59:36 -05:00
|
|
|
|
}
|
2016-10-21 13:08:18 -07:00
|
|
|
|
if (!data->metrics_dir.value().empty()) {
|
2016-09-16 13:21:50 -07:00
|
|
|
|
AppendCommandLineArgument(
|
2016-10-21 13:08:18 -07:00
|
|
|
|
FormatArgumentString("metrics-dir", data->metrics_dir.value()),
|
2016-09-16 13:21:50 -07:00
|
|
|
|
&command_line);
|
|
|
|
|
}
|
2016-10-21 13:08:18 -07:00
|
|
|
|
if (!data->url.empty()) {
|
|
|
|
|
AppendCommandLineArgument(
|
|
|
|
|
FormatArgumentString("url", base::UTF8ToUTF16(data->url)),
|
|
|
|
|
&command_line);
|
2015-11-02 13:59:36 -05:00
|
|
|
|
}
|
2016-10-21 13:08:18 -07:00
|
|
|
|
for (const auto& kv : data->annotations) {
|
2015-11-02 13:59:36 -05:00
|
|
|
|
AppendCommandLineArgument(
|
|
|
|
|
FormatArgumentString("annotation",
|
|
|
|
|
base::UTF8ToUTF16(kv.first + '=' + kv.second)),
|
|
|
|
|
&command_line);
|
|
|
|
|
}
|
2016-10-21 13:08:18 -07:00
|
|
|
|
|
|
|
|
|
ScopedKernelHANDLE this_process(
|
|
|
|
|
OpenProcess(kXPProcessAllAccess, true, GetCurrentProcessId()));
|
|
|
|
|
if (!this_process.is_valid()) {
|
|
|
|
|
PLOG(ERROR) << "OpenProcess";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
InitialClientData initial_client_data(
|
|
|
|
|
g_signal_exception,
|
|
|
|
|
g_signal_non_crash_dump,
|
|
|
|
|
g_non_crash_dump_done,
|
|
|
|
|
data->ipc_pipe_handle.get(),
|
|
|
|
|
this_process.get(),
|
2017-04-28 10:08:35 -04:00
|
|
|
|
FromPointerCast<WinVMAddress>(&g_crash_exception_information),
|
|
|
|
|
FromPointerCast<WinVMAddress>(&g_non_crash_exception_information),
|
|
|
|
|
FromPointerCast<WinVMAddress>(&g_critical_section_with_debug_info));
|
2015-11-03 19:26:18 -05:00
|
|
|
|
AppendCommandLineArgument(
|
2016-10-21 13:08:18 -07:00
|
|
|
|
base::UTF8ToUTF16(std::string("--initial-client-data=") +
|
|
|
|
|
initial_client_data.StringRepresentation()),
|
2015-11-03 19:26:18 -05:00
|
|
|
|
&command_line);
|
2015-11-02 13:59:36 -05:00
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
BOOL rv;
|
2015-11-06 16:55:31 -05:00
|
|
|
|
DWORD creation_flags;
|
|
|
|
|
STARTUPINFOEX startup_info = {};
|
|
|
|
|
startup_info.StartupInfo.dwFlags = STARTF_USESTDHANDLES;
|
|
|
|
|
startup_info.StartupInfo.hStdInput = GetStdHandle(STD_INPUT_HANDLE);
|
|
|
|
|
startup_info.StartupInfo.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE);
|
|
|
|
|
startup_info.StartupInfo.hStdError = GetStdHandle(STD_ERROR_HANDLE);
|
|
|
|
|
|
|
|
|
|
std::vector<HANDLE> handle_list;
|
2016-04-25 12:13:07 -07:00
|
|
|
|
std::unique_ptr<uint8_t[]> proc_thread_attribute_list_storage;
|
2015-11-06 16:55:31 -05:00
|
|
|
|
ScopedProcThreadAttributeList proc_thread_attribute_list_owner;
|
|
|
|
|
|
|
|
|
|
static const auto initialize_proc_thread_attribute_list =
|
|
|
|
|
GET_FUNCTION(L"kernel32.dll", ::InitializeProcThreadAttributeList);
|
|
|
|
|
static const auto update_proc_thread_attribute =
|
|
|
|
|
initialize_proc_thread_attribute_list
|
|
|
|
|
? GET_FUNCTION(L"kernel32.dll", ::UpdateProcThreadAttribute)
|
|
|
|
|
: nullptr;
|
|
|
|
|
if (!initialize_proc_thread_attribute_list || !update_proc_thread_attribute) {
|
|
|
|
|
// The OS doesn’t allow handle inheritance to be restricted, so the handler
|
|
|
|
|
// will inherit every inheritable handle.
|
|
|
|
|
creation_flags = 0;
|
|
|
|
|
startup_info.StartupInfo.cb = sizeof(startup_info.StartupInfo);
|
|
|
|
|
} else {
|
|
|
|
|
// Restrict handle inheritance to just those needed in the handler.
|
|
|
|
|
|
|
|
|
|
creation_flags = EXTENDED_STARTUPINFO_PRESENT;
|
|
|
|
|
startup_info.StartupInfo.cb = sizeof(startup_info);
|
|
|
|
|
SIZE_T size;
|
|
|
|
|
rv = initialize_proc_thread_attribute_list(nullptr, 1, 0, &size);
|
|
|
|
|
if (rv) {
|
|
|
|
|
LOG(ERROR) << "InitializeProcThreadAttributeList (size) succeeded, "
|
|
|
|
|
"expected failure";
|
|
|
|
|
return false;
|
|
|
|
|
} else if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
|
|
|
|
PLOG(ERROR) << "InitializeProcThreadAttributeList (size)";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
proc_thread_attribute_list_storage.reset(new uint8_t[size]);
|
|
|
|
|
startup_info.lpAttributeList =
|
|
|
|
|
reinterpret_cast<PPROC_THREAD_ATTRIBUTE_LIST>(
|
|
|
|
|
proc_thread_attribute_list_storage.get());
|
|
|
|
|
rv = initialize_proc_thread_attribute_list(
|
|
|
|
|
startup_info.lpAttributeList, 1, 0, &size);
|
|
|
|
|
if (!rv) {
|
|
|
|
|
PLOG(ERROR) << "InitializeProcThreadAttributeList";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
proc_thread_attribute_list_owner.reset(startup_info.lpAttributeList);
|
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
handle_list.reserve(8);
|
|
|
|
|
handle_list.push_back(g_signal_exception);
|
|
|
|
|
handle_list.push_back(g_signal_non_crash_dump);
|
|
|
|
|
handle_list.push_back(g_non_crash_dump_done);
|
|
|
|
|
handle_list.push_back(data->ipc_pipe_handle.get());
|
|
|
|
|
handle_list.push_back(this_process.get());
|
2015-11-24 16:36:27 -08:00
|
|
|
|
AddHandleToListIfValidAndInheritable(&handle_list,
|
|
|
|
|
startup_info.StartupInfo.hStdInput);
|
|
|
|
|
AddHandleToListIfValidAndInheritable(&handle_list,
|
|
|
|
|
startup_info.StartupInfo.hStdOutput);
|
|
|
|
|
AddHandleToListIfValidAndInheritable(&handle_list,
|
|
|
|
|
startup_info.StartupInfo.hStdError);
|
2015-11-06 16:55:31 -05:00
|
|
|
|
rv = update_proc_thread_attribute(
|
|
|
|
|
startup_info.lpAttributeList,
|
|
|
|
|
0,
|
|
|
|
|
PROC_THREAD_ATTRIBUTE_HANDLE_LIST,
|
|
|
|
|
&handle_list[0],
|
|
|
|
|
handle_list.size() * sizeof(handle_list[0]),
|
|
|
|
|
nullptr,
|
|
|
|
|
nullptr);
|
|
|
|
|
if (!rv) {
|
|
|
|
|
PLOG(ERROR) << "UpdateProcThreadAttribute";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-03 09:28:09 -05:00
|
|
|
|
// If the embedded crashpad handler is being started via an entry point in a
|
|
|
|
|
// DLL (the handler executable is rundll32.exe), then don't pass
|
|
|
|
|
// the application name to CreateProcess as this appears to generate an
|
|
|
|
|
// invalid command line where the first argument needed by rundll32 is not in
|
|
|
|
|
// the correct format as required in:
|
|
|
|
|
// https://support.microsoft.com/en-ca/help/164787/info-windows-rundll-and-rundll32-interface
|
|
|
|
|
const base::StringPiece16 kRunDll32Exe(L"rundll32.exe");
|
|
|
|
|
bool is_embedded_in_dll = false;
|
|
|
|
|
if (data->handler.value().size() >= kRunDll32Exe.size() &&
|
|
|
|
|
_wcsicmp(data->handler.value()
|
|
|
|
|
.substr(data->handler.value().size() - kRunDll32Exe.size())
|
|
|
|
|
.c_str(),
|
|
|
|
|
kRunDll32Exe.data()) == 0) {
|
|
|
|
|
is_embedded_in_dll = true;
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-02 13:59:36 -05:00
|
|
|
|
PROCESS_INFORMATION process_info;
|
2018-12-03 09:28:09 -05:00
|
|
|
|
rv = CreateProcess(
|
|
|
|
|
is_embedded_in_dll ? nullptr : data->handler.value().c_str(),
|
|
|
|
|
&command_line[0],
|
|
|
|
|
nullptr,
|
|
|
|
|
nullptr,
|
|
|
|
|
true,
|
|
|
|
|
creation_flags,
|
|
|
|
|
nullptr,
|
|
|
|
|
nullptr,
|
|
|
|
|
&startup_info.StartupInfo,
|
|
|
|
|
&process_info);
|
2015-11-02 13:59:36 -05:00
|
|
|
|
if (!rv) {
|
|
|
|
|
PLOG(ERROR) << "CreateProcess";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rv = CloseHandle(process_info.hThread);
|
|
|
|
|
PLOG_IF(WARNING, !rv) << "CloseHandle thread";
|
|
|
|
|
|
|
|
|
|
rv = CloseHandle(process_info.hProcess);
|
|
|
|
|
PLOG_IF(WARNING, !rv) << "CloseHandle process";
|
|
|
|
|
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
// It is important to close our side of the pipe here before confirming that
|
|
|
|
|
// we can communicate with the server. By doing so, the only remaining copy of
|
|
|
|
|
// the server side of the pipe belongs to the exception handler process we
|
|
|
|
|
// just spawned. Otherwise, the pipe will continue to exist indefinitely, so
|
|
|
|
|
// the connection loop will not detect that it will never be serviced.
|
|
|
|
|
data->ipc_pipe_handle.reset();
|
|
|
|
|
|
|
|
|
|
// Confirm that the server is waiting for connections before continuing.
|
|
|
|
|
ClientToServerMessage message = {};
|
|
|
|
|
message.type = ClientToServerMessage::kPing;
|
|
|
|
|
ServerToClientMessage response = {};
|
|
|
|
|
if (!SendToCrashHandlerServer(data->ipc_pipe, message, &response)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
scoped_startup_state_caller.SetSuccessful();
|
2016-10-21 13:08:18 -07:00
|
|
|
|
return true;
|
|
|
|
|
}
|
2015-11-03 19:26:18 -05:00
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
DWORD WINAPI BackgroundHandlerStartThreadProc(void* data) {
|
|
|
|
|
std::unique_ptr<BackgroundHandlerStartThreadData> data_as_ptr(
|
|
|
|
|
reinterpret_cast<BackgroundHandlerStartThreadData*>(data));
|
|
|
|
|
return StartHandlerProcess(std::move(data_as_ptr)) ? 0 : 1;
|
|
|
|
|
}
|
2015-11-03 19:26:18 -05:00
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
void CommonInProcessInitialization() {
|
|
|
|
|
// We create this dummy CRITICAL_SECTION with the
|
|
|
|
|
// RTL_CRITICAL_SECTION_FLAG_FORCE_DEBUG_INFO flag set to have an entry point
|
|
|
|
|
// into the doubly-linked list of RTL_CRITICAL_SECTION_DEBUG objects. This
|
|
|
|
|
// allows us to walk the list at crash time to gather data for !locks. A
|
|
|
|
|
// debugger would instead inspect ntdll!RtlCriticalSectionList to get the head
|
|
|
|
|
// of the list. But that is not an exported symbol, so on an arbitrary client
|
|
|
|
|
// machine, we don't have a way of getting that pointer.
|
|
|
|
|
InitializeCriticalSectionWithDebugInfoIfPossible(
|
|
|
|
|
&g_critical_section_with_debug_info);
|
2015-11-03 19:26:18 -05:00
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
g_non_crash_dump_lock = new base::Lock();
|
2015-04-29 18:53:47 -07:00
|
|
|
|
}
|
|
|
|
|
|
2016-11-17 14:00:21 -08:00
|
|
|
|
void RegisterHandlers() {
|
|
|
|
|
SetUnhandledExceptionFilter(&UnhandledExceptionHandler);
|
|
|
|
|
|
|
|
|
|
// The Windows CRT's signal.h lists:
|
|
|
|
|
// - SIGINT
|
|
|
|
|
// - SIGILL
|
|
|
|
|
// - SIGFPE
|
|
|
|
|
// - SIGSEGV
|
|
|
|
|
// - SIGTERM
|
|
|
|
|
// - SIGBREAK
|
|
|
|
|
// - SIGABRT
|
|
|
|
|
// SIGILL and SIGTERM are documented as not being generated. SIGBREAK and
|
|
|
|
|
// SIGINT are for Ctrl-Break and Ctrl-C, and aren't something for which
|
|
|
|
|
// capturing a dump is warranted. SIGFPE and SIGSEGV are captured as regular
|
|
|
|
|
// exceptions through the unhandled exception filter. This leaves SIGABRT. In
|
|
|
|
|
// the standard CRT, abort() is implemented as a synchronous call to the
|
|
|
|
|
// SIGABRT signal handler if installed, but after doing so, the unhandled
|
|
|
|
|
// exception filter is not triggered (it instead __fastfail()s). So, register
|
|
|
|
|
// to handle SIGABRT to catch abort() calls, as client code might use this and
|
|
|
|
|
// expect it to cause a crash dump. This will only work when the abort()
|
|
|
|
|
// that's called in client code is the same (or has the same behavior) as the
|
|
|
|
|
// one in use here.
|
2016-11-17 14:18:06 -08:00
|
|
|
|
void (*rv)(int) = signal(SIGABRT, HandleAbortSignal);
|
2016-11-17 14:00:21 -08:00
|
|
|
|
DCHECK_NE(rv, SIG_ERR);
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
CrashpadClient::CrashpadClient() : ipc_pipe_(), handler_start_thread_() {}
|
|
|
|
|
|
|
|
|
|
CrashpadClient::~CrashpadClient() {}
|
|
|
|
|
|
|
|
|
|
bool CrashpadClient::StartHandler(
|
|
|
|
|
const base::FilePath& handler,
|
|
|
|
|
const base::FilePath& database,
|
|
|
|
|
const base::FilePath& metrics_dir,
|
|
|
|
|
const std::string& url,
|
|
|
|
|
const std::map<std::string, std::string>& annotations,
|
|
|
|
|
const std::vector<std::string>& arguments,
|
|
|
|
|
bool restartable,
|
|
|
|
|
bool asynchronous_start) {
|
2015-11-02 17:00:06 -05:00
|
|
|
|
DCHECK(ipc_pipe_.empty());
|
2015-11-02 13:59:36 -05:00
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
// Both the pipe and the signalling events have to be created on the main
|
|
|
|
|
// thread (not the spawning thread) so that they're valid after we return from
|
|
|
|
|
// this function.
|
|
|
|
|
ScopedFileHANDLE ipc_pipe_handle;
|
|
|
|
|
CreatePipe(&ipc_pipe_, &ipc_pipe_handle);
|
2015-11-02 13:59:36 -05:00
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
SECURITY_ATTRIBUTES security_attributes = {0};
|
|
|
|
|
security_attributes.nLength = sizeof(SECURITY_ATTRIBUTES);
|
|
|
|
|
security_attributes.bInheritHandle = true;
|
2015-11-02 13:59:36 -05:00
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
g_signal_exception =
|
|
|
|
|
CreateEvent(&security_attributes, false /* auto reset */, false, nullptr);
|
|
|
|
|
g_signal_non_crash_dump =
|
|
|
|
|
CreateEvent(&security_attributes, false /* auto reset */, false, nullptr);
|
|
|
|
|
g_non_crash_dump_done =
|
|
|
|
|
CreateEvent(&security_attributes, false /* auto reset */, false, nullptr);
|
|
|
|
|
|
|
|
|
|
CommonInProcessInitialization();
|
|
|
|
|
|
2016-11-17 14:00:21 -08:00
|
|
|
|
RegisterHandlers();
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
auto data = new BackgroundHandlerStartThreadData(handler,
|
|
|
|
|
database,
|
|
|
|
|
metrics_dir,
|
|
|
|
|
url,
|
|
|
|
|
annotations,
|
|
|
|
|
arguments,
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
ipc_pipe_,
|
2016-10-21 13:08:18 -07:00
|
|
|
|
std::move(ipc_pipe_handle));
|
|
|
|
|
|
|
|
|
|
if (asynchronous_start) {
|
|
|
|
|
// It is important that the current thread not be synchronized with the
|
|
|
|
|
// thread that is created here. StartHandler() needs to be callable inside a
|
|
|
|
|
// DllMain(). In that case, the background thread will not start until the
|
|
|
|
|
// current DllMain() completes, which would cause deadlock if it was waited
|
|
|
|
|
// upon.
|
|
|
|
|
handler_start_thread_.reset(CreateThread(nullptr,
|
|
|
|
|
0,
|
|
|
|
|
&BackgroundHandlerStartThreadProc,
|
|
|
|
|
reinterpret_cast<void*>(data),
|
|
|
|
|
0,
|
|
|
|
|
nullptr));
|
|
|
|
|
if (!handler_start_thread_.is_valid()) {
|
|
|
|
|
PLOG(ERROR) << "CreateThread";
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
SetHandlerStartupState(StartupState::kFailed);
|
2016-10-21 13:08:18 -07:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// In asynchronous mode, we can't report on the overall success or failure
|
|
|
|
|
// of initialization at this point.
|
|
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
return StartHandlerProcess(
|
|
|
|
|
std::unique_ptr<BackgroundHandlerStartThreadData>(data));
|
|
|
|
|
}
|
2015-11-02 23:15:22 -05:00
|
|
|
|
}
|
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
bool CrashpadClient::SetHandlerIPCPipe(const std::wstring& ipc_pipe) {
|
|
|
|
|
DCHECK(ipc_pipe_.empty());
|
|
|
|
|
DCHECK(!ipc_pipe.empty());
|
|
|
|
|
|
|
|
|
|
ipc_pipe_ = ipc_pipe;
|
|
|
|
|
|
2015-11-02 17:00:06 -05:00
|
|
|
|
DCHECK(!ipc_pipe_.empty());
|
2015-09-25 13:45:32 -07:00
|
|
|
|
DCHECK_EQ(g_signal_exception, INVALID_HANDLE_VALUE);
|
|
|
|
|
DCHECK_EQ(g_signal_non_crash_dump, INVALID_HANDLE_VALUE);
|
|
|
|
|
DCHECK_EQ(g_non_crash_dump_done, INVALID_HANDLE_VALUE);
|
2015-10-15 13:18:08 -07:00
|
|
|
|
DCHECK(!g_critical_section_with_debug_info.DebugInfo);
|
2015-11-19 11:25:52 -08:00
|
|
|
|
DCHECK(!g_non_crash_dump_lock);
|
2015-09-25 13:45:32 -07:00
|
|
|
|
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
ClientToServerMessage message;
|
|
|
|
|
memset(&message, 0, sizeof(message));
|
|
|
|
|
message.type = ClientToServerMessage::kRegister;
|
2015-09-04 11:52:07 -07:00
|
|
|
|
message.registration.version = RegistrationRequest::kMessageVersion;
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
message.registration.client_process_id = GetCurrentProcessId();
|
2015-09-25 13:45:32 -07:00
|
|
|
|
message.registration.crash_exception_information =
|
2017-04-28 10:08:35 -04:00
|
|
|
|
FromPointerCast<WinVMAddress>(&g_crash_exception_information);
|
2015-09-25 13:45:32 -07:00
|
|
|
|
message.registration.non_crash_exception_information =
|
2017-04-28 10:08:35 -04:00
|
|
|
|
FromPointerCast<WinVMAddress>(&g_non_crash_exception_information);
|
2015-08-14 15:22:09 -07:00
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
CommonInProcessInitialization();
|
|
|
|
|
|
|
|
|
|
message.registration.critical_section_address =
|
2017-04-28 10:08:35 -04:00
|
|
|
|
FromPointerCast<WinVMAddress>(&g_critical_section_with_debug_info);
|
2015-10-15 13:18:08 -07:00
|
|
|
|
|
2015-11-19 15:09:59 -08:00
|
|
|
|
ServerToClientMessage response = {};
|
2015-08-14 15:22:09 -07:00
|
|
|
|
|
2015-11-02 17:00:06 -05:00
|
|
|
|
if (!SendToCrashHandlerServer(ipc_pipe_, message, &response)) {
|
2015-08-14 15:22:09 -07:00
|
|
|
|
return false;
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
}
|
2015-08-14 15:22:09 -07:00
|
|
|
|
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
SetHandlerStartupState(StartupState::kSucceeded);
|
2016-11-17 14:00:21 -08:00
|
|
|
|
|
|
|
|
|
RegisterHandlers();
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
|
2015-08-14 15:22:09 -07:00
|
|
|
|
// The server returns these already duplicated to be valid in this process.
|
2015-11-05 14:00:26 -05:00
|
|
|
|
g_signal_exception =
|
|
|
|
|
IntToHandle(response.registration.request_crash_dump_event);
|
|
|
|
|
g_signal_non_crash_dump =
|
|
|
|
|
IntToHandle(response.registration.request_non_crash_dump_event);
|
|
|
|
|
g_non_crash_dump_done =
|
|
|
|
|
IntToHandle(response.registration.non_crash_dump_completed_event);
|
2015-09-25 13:45:32 -07:00
|
|
|
|
|
2015-04-29 18:53:47 -07:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
|
std::wstring CrashpadClient::GetHandlerIPCPipe() const {
|
|
|
|
|
DCHECK(!ipc_pipe_.empty());
|
|
|
|
|
return ipc_pipe_;
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-12 20:57:48 -08:00
|
|
|
|
bool CrashpadClient::WaitForHandlerStart(unsigned int timeout_ms) {
|
2016-10-21 13:08:18 -07:00
|
|
|
|
DCHECK(handler_start_thread_.is_valid());
|
2016-12-12 20:57:48 -08:00
|
|
|
|
DWORD result = WaitForSingleObject(handler_start_thread_.get(), timeout_ms);
|
|
|
|
|
if (result == WAIT_TIMEOUT) {
|
|
|
|
|
LOG(ERROR) << "WaitForSingleObject timed out";
|
|
|
|
|
return false;
|
|
|
|
|
} else if (result == WAIT_ABANDONED) {
|
|
|
|
|
LOG(ERROR) << "WaitForSingleObject abandoned";
|
|
|
|
|
return false;
|
|
|
|
|
} else if (result != WAIT_OBJECT_0) {
|
2016-10-21 13:08:18 -07:00
|
|
|
|
PLOG(ERROR) << "WaitForSingleObject";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DWORD exit_code;
|
|
|
|
|
if (!GetExitCodeThread(handler_start_thread_.get(), &exit_code)) {
|
|
|
|
|
PLOG(ERROR) << "GetExitCodeThread";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
handler_start_thread_.reset();
|
|
|
|
|
return exit_code == 0;
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-25 13:45:32 -07:00
|
|
|
|
// static
|
|
|
|
|
void CrashpadClient::DumpWithoutCrash(const CONTEXT& context) {
|
|
|
|
|
if (g_signal_non_crash_dump == INVALID_HANDLE_VALUE ||
|
|
|
|
|
g_non_crash_dump_done == INVALID_HANDLE_VALUE) {
|
2016-11-03 09:38:14 -07:00
|
|
|
|
LOG(ERROR) << "not connected";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (BlockUntilHandlerStartedOrFailed() == StartupState::kFailed) {
|
|
|
|
|
// If we know for certain that the handler has failed to start, then abort
|
|
|
|
|
// here, as we would otherwise wait indefinitely for the
|
|
|
|
|
// g_non_crash_dump_done event that would never be signalled.
|
|
|
|
|
LOG(ERROR) << "crash server failed to launch, no dump captured";
|
2015-09-25 13:45:32 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// In the non-crashing case, we aren't concerned about avoiding calls into
|
|
|
|
|
// Win32 APIs, so just use regular locking here in case of multiple threads
|
|
|
|
|
// calling this function. If a crash occurs while we're in here, the worst
|
|
|
|
|
// that can happen is that the server captures a partial dump for this path
|
win: Wrap TerminateProcess() to accept cdecl patches on x86
TerminateProcess(), like most of the Windows API, is declared WINAPI,
which is __stdcall on 32-bit x86. That means that the callee,
TerminateProcess() itself, is responsible for cleaning up parameters on
the stack on return. In https://crashpad.chromium.org/bug/179, crashes
in ExceptionHandlerServer::OnNonCrashDumpEvent() were observed in ways
that make it evident that TerminateProcess() has been patched with a
__cdecl routine. The crucial difference between __stdcall and __cdecl is
that the caller is responsible for stack parameter cleanup in __cdecl.
The mismatch means that nobody cleans parameters from the stack, and the
stack pointer has an unexpected value, which in the case of the Crashpad
handler crash, results in TerminateProcess()’s second argument
erroneously being used as the lock address in the call to
ReleaseSRWLockExclusive() or LeaveCriticalSection().
As a workaround, on 32-bit x86, call through SafeTerminateProcess(), a
custom assembly routine that’s compatible with either __stdcall or
__cdecl implementations of TerminateProcess() by not trusting the value
of the stack pointer on return from that function. Instead, the stack
pointer is restored directly from the frame pointer.
Bug: crashpad:179
Test: crashpad_util_test SafeTerminateProcess.*, others
Change-Id: If9508f4eb7631020ea69ddbbe4a22eb335cdb325
Reviewed-on: https://chromium-review.googlesource.com/481180
Reviewed-by: Scott Graham <scottmg@chromium.org>
2017-04-19 13:22:08 -04:00
|
|
|
|
// because another thread’s crash processing finished and the process was
|
|
|
|
|
// terminated before this thread’s non-crash processing could be completed.
|
2015-09-25 13:45:32 -07:00
|
|
|
|
base::AutoLock lock(*g_non_crash_dump_lock);
|
|
|
|
|
|
|
|
|
|
// Create a fake EXCEPTION_POINTERS to give the handler something to work
|
|
|
|
|
// with.
|
2015-11-19 15:09:59 -08:00
|
|
|
|
EXCEPTION_POINTERS exception_pointers = {};
|
2015-09-25 13:45:32 -07:00
|
|
|
|
|
|
|
|
|
// This is logically const, but EXCEPTION_POINTERS does not declare it as
|
|
|
|
|
// const, so we have to cast that away from the argument.
|
|
|
|
|
exception_pointers.ContextRecord = const_cast<CONTEXT*>(&context);
|
|
|
|
|
|
|
|
|
|
// We include a fake exception and use a code of '0x517a7ed' (something like
|
|
|
|
|
// "simulated") so that it's relatively obvious in windbg that it's not
|
|
|
|
|
// actually an exception. Most values in
|
2017-11-20 16:57:43 -05:00
|
|
|
|
// https://msdn.microsoft.com/library/aa363082.aspx have some of the top
|
|
|
|
|
// nibble set, so we make sure to pick a value that doesn't, so as to be
|
|
|
|
|
// unlikely to conflict.
|
2017-07-25 19:15:48 -04:00
|
|
|
|
constexpr uint32_t kSimulatedExceptionCode = 0x517a7ed;
|
2015-11-19 15:09:59 -08:00
|
|
|
|
EXCEPTION_RECORD record = {};
|
2015-09-25 13:45:32 -07:00
|
|
|
|
record.ExceptionCode = kSimulatedExceptionCode;
|
2018-12-12 12:58:24 -08:00
|
|
|
|
record.ExceptionAddress = ProgramCounterFromCONTEXT(&context);
|
2015-09-25 13:45:32 -07:00
|
|
|
|
|
|
|
|
|
exception_pointers.ExceptionRecord = &record;
|
|
|
|
|
|
|
|
|
|
g_non_crash_exception_information.thread_id = GetCurrentThreadId();
|
|
|
|
|
g_non_crash_exception_information.exception_pointers =
|
2017-04-28 10:08:35 -04:00
|
|
|
|
FromPointerCast<WinVMAddress>(&exception_pointers);
|
2015-09-25 13:45:32 -07:00
|
|
|
|
|
2015-10-22 14:32:13 -07:00
|
|
|
|
bool set_event_result = !!SetEvent(g_signal_non_crash_dump);
|
2015-09-25 13:45:32 -07:00
|
|
|
|
PLOG_IF(ERROR, !set_event_result) << "SetEvent";
|
|
|
|
|
|
|
|
|
|
DWORD wfso_result = WaitForSingleObject(g_non_crash_dump_done, INFINITE);
|
|
|
|
|
PLOG_IF(ERROR, wfso_result != WAIT_OBJECT_0) << "WaitForSingleObject";
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-06 14:08:13 -08:00
|
|
|
|
// static
|
|
|
|
|
void CrashpadClient::DumpAndCrash(EXCEPTION_POINTERS* exception_pointers) {
|
2016-04-05 13:18:30 -07:00
|
|
|
|
if (g_signal_exception == INVALID_HANDLE_VALUE) {
|
2016-11-03 09:38:14 -07:00
|
|
|
|
LOG(ERROR) << "not connected";
|
win: Wrap TerminateProcess() to accept cdecl patches on x86
TerminateProcess(), like most of the Windows API, is declared WINAPI,
which is __stdcall on 32-bit x86. That means that the callee,
TerminateProcess() itself, is responsible for cleaning up parameters on
the stack on return. In https://crashpad.chromium.org/bug/179, crashes
in ExceptionHandlerServer::OnNonCrashDumpEvent() were observed in ways
that make it evident that TerminateProcess() has been patched with a
__cdecl routine. The crucial difference between __stdcall and __cdecl is
that the caller is responsible for stack parameter cleanup in __cdecl.
The mismatch means that nobody cleans parameters from the stack, and the
stack pointer has an unexpected value, which in the case of the Crashpad
handler crash, results in TerminateProcess()’s second argument
erroneously being used as the lock address in the call to
ReleaseSRWLockExclusive() or LeaveCriticalSection().
As a workaround, on 32-bit x86, call through SafeTerminateProcess(), a
custom assembly routine that’s compatible with either __stdcall or
__cdecl implementations of TerminateProcess() by not trusting the value
of the stack pointer on return from that function. Instead, the stack
pointer is restored directly from the frame pointer.
Bug: crashpad:179
Test: crashpad_util_test SafeTerminateProcess.*, others
Change-Id: If9508f4eb7631020ea69ddbbe4a22eb335cdb325
Reviewed-on: https://chromium-review.googlesource.com/481180
Reviewed-by: Scott Graham <scottmg@chromium.org>
2017-04-19 13:22:08 -04:00
|
|
|
|
SafeTerminateProcess(GetCurrentProcess(),
|
|
|
|
|
kTerminationCodeNotConnectedToHandler);
|
2016-04-05 13:18:30 -07:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-03 09:38:14 -07:00
|
|
|
|
// We don't need to check for handler startup here, as
|
|
|
|
|
// UnhandledExceptionHandler() necessarily does that.
|
|
|
|
|
|
2015-11-06 14:08:13 -08:00
|
|
|
|
UnhandledExceptionHandler(exception_pointers);
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-02 12:47:44 -08:00
|
|
|
|
// static
|
2016-04-22 10:03:59 -07:00
|
|
|
|
bool CrashpadClient::DumpAndCrashTargetProcess(HANDLE process,
|
|
|
|
|
HANDLE blame_thread,
|
2018-02-02 12:47:44 -08:00
|
|
|
|
DWORD exception_code) {
|
2016-04-22 10:03:59 -07:00
|
|
|
|
// Confirm we're on Vista or later.
|
|
|
|
|
const DWORD version = GetVersion();
|
|
|
|
|
const DWORD major_version = LOBYTE(LOWORD(version));
|
|
|
|
|
if (major_version < 6) {
|
|
|
|
|
LOG(ERROR) << "unavailable before Vista";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Confirm that our bitness is the same as the process we're crashing.
|
|
|
|
|
ProcessInfo process_info;
|
|
|
|
|
if (!process_info.Initialize(process)) {
|
|
|
|
|
LOG(ERROR) << "ProcessInfo::Initialize";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
#if defined(ARCH_CPU_64_BITS)
|
|
|
|
|
if (!process_info.Is64Bit()) {
|
|
|
|
|
LOG(ERROR) << "DumpAndCrashTargetProcess currently not supported x64->x86";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
#endif // ARCH_CPU_64_BITS
|
|
|
|
|
|
|
|
|
|
ScopedProcessSuspend suspend(process);
|
|
|
|
|
|
|
|
|
|
// If no thread handle was provided, or the thread has already exited, we pass
|
|
|
|
|
// 0 to the handler, which indicates no fake exception record to be created.
|
|
|
|
|
DWORD thread_id = 0;
|
|
|
|
|
if (blame_thread) {
|
|
|
|
|
// Now that we've suspended the process, if our thread hasn't exited, we
|
|
|
|
|
// know we're relatively safe to pass the thread id through.
|
|
|
|
|
if (WaitForSingleObject(blame_thread, 0) == WAIT_TIMEOUT) {
|
|
|
|
|
static const auto get_thread_id =
|
|
|
|
|
GET_FUNCTION_REQUIRED(L"kernel32.dll", ::GetThreadId);
|
|
|
|
|
thread_id = get_thread_id(blame_thread);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-25 19:15:48 -04:00
|
|
|
|
constexpr size_t kInjectBufferSize = 4 * 1024;
|
2016-04-22 10:03:59 -07:00
|
|
|
|
WinVMAddress inject_memory =
|
2017-04-28 10:08:35 -04:00
|
|
|
|
FromPointerCast<WinVMAddress>(VirtualAllocEx(process,
|
|
|
|
|
nullptr,
|
|
|
|
|
kInjectBufferSize,
|
|
|
|
|
MEM_RESERVE | MEM_COMMIT,
|
|
|
|
|
PAGE_READWRITE));
|
2016-04-22 10:03:59 -07:00
|
|
|
|
if (!inject_memory) {
|
|
|
|
|
PLOG(ERROR) << "VirtualAllocEx";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Because we're the same bitness as our target, we can rely kernel32 being
|
|
|
|
|
// loaded at the same address in our process as the target, and just look up
|
|
|
|
|
// its address here.
|
|
|
|
|
WinVMAddress raise_exception_address =
|
2017-04-28 10:08:35 -04:00
|
|
|
|
FromPointerCast<WinVMAddress>(&RaiseException);
|
2016-04-22 10:03:59 -07:00
|
|
|
|
|
|
|
|
|
WinVMAddress code_entry_point = 0;
|
|
|
|
|
std::vector<unsigned char> data_to_write;
|
|
|
|
|
if (process_info.Is64Bit()) {
|
|
|
|
|
// Data written is first, the data for the 4th argument (lpArguments) to
|
|
|
|
|
// RaiseException(). A two element array:
|
|
|
|
|
//
|
|
|
|
|
// DWORD64: thread_id
|
|
|
|
|
// DWORD64: exception_code
|
|
|
|
|
//
|
|
|
|
|
// Following that, code which sets the arguments to RaiseException() and
|
|
|
|
|
// then calls it:
|
|
|
|
|
//
|
|
|
|
|
// mov r9, <data_array_address>
|
|
|
|
|
// mov r8d, 2 ; nNumberOfArguments
|
|
|
|
|
// mov edx, 1 ; dwExceptionFlags = EXCEPTION_NONCONTINUABLE
|
|
|
|
|
// mov ecx, 0xcca11ed ; dwExceptionCode, interpreted specially by the
|
|
|
|
|
// ; handler.
|
|
|
|
|
// jmp <address_of_RaiseException>
|
|
|
|
|
//
|
|
|
|
|
// Note that the first three arguments to RaiseException() are DWORDs even
|
|
|
|
|
// on x64, so only the 4th argument (a pointer) is a full-width register.
|
|
|
|
|
//
|
|
|
|
|
// We also don't need to set up a stack or use call, since the only
|
|
|
|
|
// registers modified are volatile ones, and we can just jmp straight to
|
|
|
|
|
// RaiseException().
|
|
|
|
|
|
|
|
|
|
// The data array.
|
|
|
|
|
AddUint64(&data_to_write, thread_id);
|
|
|
|
|
AddUint64(&data_to_write, exception_code);
|
|
|
|
|
|
|
|
|
|
// The thread entry point.
|
|
|
|
|
code_entry_point = inject_memory + data_to_write.size();
|
|
|
|
|
|
|
|
|
|
// r9 = pointer to data.
|
|
|
|
|
data_to_write.push_back(0x49);
|
|
|
|
|
data_to_write.push_back(0xb9);
|
|
|
|
|
AddUint64(&data_to_write, inject_memory);
|
|
|
|
|
|
|
|
|
|
// r8d = 2 for nNumberOfArguments.
|
|
|
|
|
data_to_write.push_back(0x41);
|
|
|
|
|
data_to_write.push_back(0xb8);
|
|
|
|
|
AddUint32(&data_to_write, 2);
|
|
|
|
|
|
|
|
|
|
// edx = 1 for dwExceptionFlags.
|
|
|
|
|
data_to_write.push_back(0xba);
|
|
|
|
|
AddUint32(&data_to_write, 1);
|
|
|
|
|
|
|
|
|
|
// ecx = kTriggeredExceptionCode for dwExceptionCode.
|
|
|
|
|
data_to_write.push_back(0xb9);
|
|
|
|
|
AddUint32(&data_to_write, kTriggeredExceptionCode);
|
|
|
|
|
|
|
|
|
|
// jmp to RaiseException() via rax.
|
|
|
|
|
data_to_write.push_back(0x48); // mov rax, imm.
|
|
|
|
|
data_to_write.push_back(0xb8);
|
|
|
|
|
AddUint64(&data_to_write, raise_exception_address);
|
|
|
|
|
data_to_write.push_back(0xff); // jmp rax.
|
|
|
|
|
data_to_write.push_back(0xe0);
|
|
|
|
|
} else {
|
|
|
|
|
// Data written is first, the data for the 4th argument (lpArguments) to
|
|
|
|
|
// RaiseException(). A two element array:
|
|
|
|
|
//
|
|
|
|
|
// DWORD: thread_id
|
|
|
|
|
// DWORD: exception_code
|
|
|
|
|
//
|
|
|
|
|
// Following that, code which pushes our arguments to RaiseException() and
|
|
|
|
|
// then calls it:
|
|
|
|
|
//
|
|
|
|
|
// push <data_array_address>
|
|
|
|
|
// push 2 ; nNumberOfArguments
|
|
|
|
|
// push 1 ; dwExceptionFlags = EXCEPTION_NONCONTINUABLE
|
|
|
|
|
// push 0xcca11ed ; dwExceptionCode, interpreted specially by the handler.
|
|
|
|
|
// call <address_of_RaiseException>
|
|
|
|
|
// ud2 ; Generate invalid opcode to make sure we still crash if we return
|
|
|
|
|
// ; for some reason.
|
|
|
|
|
//
|
|
|
|
|
// No need to clean up the stack, as RaiseException() is __stdcall.
|
|
|
|
|
|
|
|
|
|
// The data array.
|
|
|
|
|
AddUint32(&data_to_write, thread_id);
|
|
|
|
|
AddUint32(&data_to_write, exception_code);
|
|
|
|
|
|
|
|
|
|
// The thread entry point.
|
|
|
|
|
code_entry_point = inject_memory + data_to_write.size();
|
|
|
|
|
|
|
|
|
|
// Push data address.
|
|
|
|
|
data_to_write.push_back(0x68);
|
|
|
|
|
AddUint32(&data_to_write, static_cast<uint32_t>(inject_memory));
|
|
|
|
|
|
|
|
|
|
// Push 2 for nNumberOfArguments.
|
|
|
|
|
data_to_write.push_back(0x6a);
|
|
|
|
|
data_to_write.push_back(2);
|
|
|
|
|
|
|
|
|
|
// Push 1 for dwExceptionCode.
|
|
|
|
|
data_to_write.push_back(0x6a);
|
|
|
|
|
data_to_write.push_back(1);
|
|
|
|
|
|
|
|
|
|
// Push dwExceptionFlags.
|
|
|
|
|
data_to_write.push_back(0x68);
|
|
|
|
|
AddUint32(&data_to_write, kTriggeredExceptionCode);
|
|
|
|
|
|
|
|
|
|
// Relative call to RaiseException().
|
|
|
|
|
int64_t relative_address_to_raise_exception =
|
|
|
|
|
raise_exception_address - (inject_memory + data_to_write.size() + 5);
|
|
|
|
|
data_to_write.push_back(0xe8);
|
|
|
|
|
AddUint32(&data_to_write,
|
|
|
|
|
static_cast<uint32_t>(relative_address_to_raise_exception));
|
|
|
|
|
|
|
|
|
|
// ud2.
|
|
|
|
|
data_to_write.push_back(0x0f);
|
|
|
|
|
data_to_write.push_back(0x0b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DCHECK_LT(data_to_write.size(), kInjectBufferSize);
|
|
|
|
|
|
|
|
|
|
SIZE_T bytes_written;
|
|
|
|
|
if (!WriteProcessMemory(process,
|
|
|
|
|
reinterpret_cast<void*>(inject_memory),
|
|
|
|
|
data_to_write.data(),
|
|
|
|
|
data_to_write.size(),
|
|
|
|
|
&bytes_written)) {
|
|
|
|
|
PLOG(ERROR) << "WriteProcessMemory";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (bytes_written != data_to_write.size()) {
|
|
|
|
|
LOG(ERROR) << "WriteProcessMemory unexpected number of bytes";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!FlushInstructionCache(
|
|
|
|
|
process, reinterpret_cast<void*>(inject_memory), bytes_written)) {
|
|
|
|
|
PLOG(ERROR) << "FlushInstructionCache";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DWORD old_protect;
|
|
|
|
|
if (!VirtualProtectEx(process,
|
|
|
|
|
reinterpret_cast<void*>(inject_memory),
|
|
|
|
|
kInjectBufferSize,
|
|
|
|
|
PAGE_EXECUTE_READ,
|
|
|
|
|
&old_protect)) {
|
|
|
|
|
PLOG(ERROR) << "VirtualProtectEx";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Cause an exception in the target process by creating a thread which calls
|
|
|
|
|
// RaiseException with our arguments above. Note that we cannot get away with
|
|
|
|
|
// using DebugBreakProcess() (nothing happens unless a debugger is attached)
|
|
|
|
|
// and we cannot get away with CreateRemoteThread() because it doesn't work if
|
|
|
|
|
// the target is hung waiting for the loader lock. We use NtCreateThreadEx()
|
|
|
|
|
// with the SKIP_THREAD_ATTACH flag, which skips various notifications,
|
|
|
|
|
// letting this cause an exception, even when the target is stuck in the
|
|
|
|
|
// loader lock.
|
|
|
|
|
HANDLE injected_thread;
|
2017-07-25 19:15:48 -04:00
|
|
|
|
|
|
|
|
|
// This is what DebugBreakProcess() uses.
|
|
|
|
|
constexpr size_t kStackSize = 0x4000;
|
|
|
|
|
|
2016-04-22 10:03:59 -07:00
|
|
|
|
NTSTATUS status = NtCreateThreadEx(&injected_thread,
|
|
|
|
|
STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL,
|
|
|
|
|
nullptr,
|
|
|
|
|
process,
|
|
|
|
|
reinterpret_cast<void*>(code_entry_point),
|
|
|
|
|
nullptr,
|
|
|
|
|
THREAD_CREATE_FLAGS_SKIP_THREAD_ATTACH,
|
|
|
|
|
0,
|
|
|
|
|
kStackSize,
|
|
|
|
|
0,
|
|
|
|
|
nullptr);
|
|
|
|
|
if (!NT_SUCCESS(status)) {
|
|
|
|
|
NTSTATUS_LOG(ERROR, status) << "NtCreateThreadEx";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-04 13:38:25 -04:00
|
|
|
|
// The injected thread raises an exception and ultimately results in process
|
|
|
|
|
// termination. The suspension must be made aware that the process may be
|
|
|
|
|
// terminating, otherwise it’ll log an extraneous error.
|
|
|
|
|
suspend.TolerateTermination();
|
|
|
|
|
|
2016-04-22 10:03:59 -07:00
|
|
|
|
bool result = true;
|
|
|
|
|
if (WaitForSingleObject(injected_thread, 60 * 1000) != WAIT_OBJECT_0) {
|
|
|
|
|
PLOG(ERROR) << "WaitForSingleObject";
|
|
|
|
|
result = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
status = NtClose(injected_thread);
|
|
|
|
|
if (!NT_SUCCESS(status)) {
|
|
|
|
|
NTSTATUS_LOG(ERROR, status) << "NtClose";
|
|
|
|
|
result = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-29 18:53:47 -07:00
|
|
|
|
} // namespace crashpad
|