win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
// Copyright 2015 The Crashpad Authors. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
#ifndef CRASHPAD_UTIL_WIN_REGISTRATION_PROTOCOL_WIN_H_
|
|
|
|
#define CRASHPAD_UTIL_WIN_REGISTRATION_PROTOCOL_WIN_H_
|
|
|
|
|
|
|
|
#include <windows.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
#include "base/strings/string16.h"
|
|
|
|
#include "util/win/address_types.h"
|
|
|
|
|
|
|
|
namespace crashpad {
|
|
|
|
|
|
|
|
#pragma pack(push, 1)
|
|
|
|
|
|
|
|
//! \brief Structure read out of the client process by the crash handler when an
|
|
|
|
//! exception occurs.
|
|
|
|
struct ExceptionInformation {
|
|
|
|
//! \brief The address of an EXCEPTION_POINTERS structure in the client
|
|
|
|
//! process that describes the exception.
|
|
|
|
WinVMAddress exception_pointers;
|
|
|
|
|
|
|
|
//! \brief The thread on which the exception happened.
|
|
|
|
DWORD thread_id;
|
|
|
|
};
|
|
|
|
|
|
|
|
//! \brief A client registration request.
|
|
|
|
struct RegistrationRequest {
|
2015-09-04 11:52:07 -07:00
|
|
|
//! \brief The expected value of `version`. This should be changed whenever
|
|
|
|
//! the messages or ExceptionInformation are modified incompatibly.
|
|
|
|
enum { kMessageVersion = 1 };
|
|
|
|
|
|
|
|
//! \brief Version field to detect skew between client and server. Should be
|
|
|
|
//! set to kMessageVersion.
|
|
|
|
int version;
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
|
|
|
|
//! \brief The PID of the client process.
|
|
|
|
DWORD client_process_id;
|
2015-09-04 11:52:07 -07:00
|
|
|
|
2015-10-15 13:18:08 -07:00
|
|
|
//! \brief The address, in the client process's address space, of an
|
2015-09-25 13:45:32 -07:00
|
|
|
//! ExceptionInformation structure, used when handling a crash dump
|
|
|
|
//! request.
|
|
|
|
WinVMAddress crash_exception_information;
|
|
|
|
|
2015-10-15 13:18:08 -07:00
|
|
|
//! \brief The address, in the client process's address space, of an
|
2015-09-25 13:45:32 -07:00
|
|
|
//! ExceptionInformation structure, used when handling a non-crashing dump
|
|
|
|
//! request.
|
|
|
|
WinVMAddress non_crash_exception_information;
|
2015-10-15 13:18:08 -07:00
|
|
|
|
|
|
|
//! \brief The address, in the client process's address space, of a
|
|
|
|
//! `CRITICAL_SECTION` allocated with a valid .DebugInfo field. This can
|
2015-10-16 14:55:14 -07:00
|
|
|
//! be accomplished by using
|
|
|
|
//! InitializeCriticalSectionWithDebugInfoIfPossible() or equivalent. This
|
|
|
|
//! value can be `0`, however then limited lock data will be available in
|
|
|
|
//! minidumps.
|
2015-10-15 13:18:08 -07:00
|
|
|
WinVMAddress critical_section_address;
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
//! \brief A message only sent to the server by itself to trigger shutdown.
|
|
|
|
struct ShutdownRequest {
|
|
|
|
//! \brief A randomly generated token used to validate the the shutdown
|
|
|
|
//! request was not sent from another process.
|
|
|
|
uint64_t token;
|
|
|
|
};
|
|
|
|
|
|
|
|
//! \brief The message passed from client to server by
|
|
|
|
//! SendToCrashHandlerServer().
|
|
|
|
struct ClientToServerMessage {
|
|
|
|
//! \brief Indicates which field of the union is in use.
|
|
|
|
enum Type : uint32_t {
|
|
|
|
//! \brief For RegistrationRequest.
|
|
|
|
kRegister,
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
//! \brief For ShutdownRequest.
|
|
|
|
kShutdown,
|
win: Address failure-to-start-handler case for async startup
Second follow up to https://chromium-review.googlesource.com/c/400015/
The ideal would be that if we fail to start the handler, then we don't
end up passing through our unhandled exception filter at all.
In the case of the non-initial client (i.e. renderers) we can do this by
not setting our UnhandledExceptionFilter until after we know we've
connected successfully (because those connections are synchronous from
its point of view). We also change WaitForNamedPipe in the connection
message to block forever, so as long as the precreated pipe exists,
they'll wait to connect. After the initial client has passed the server
side of that pipe to the handler, the handler has the only handle to it.
So, if the handler has disappeared for whatever reason, pipe-connecting
clients will fail with FILE_NOT_FOUND, and will not stick around in the
connection loop. This means non-initial clients do not need additional
logic to avoid getting stuck in our UnhandledExceptionFilter.
For the initial client, it would be ideal to avoid passing through our
UEF too, but none of the 3 options are great:
1. Block until we find out if we started, and then install the filter.
We don't want to do that, because we don't want to wait.
2. Restore the old filter if it turns out we failed to start. We can't
do that because Chrome disables ::SetUnhandledExceptionFilter()
immediately after StartHandler/SetHandlerIPCPipe returns.
3. Don't install our filter until we've successfully started. We don't
want to do that because we'd miss early crashes, negating the benefit
of deferred startup.
So, we do need to pass through our UnhandledExceptionFilter. I don't
want more Win32 API calls during the vulnerable filter function. So, at
any point during async startup where there's a failure, set a global
atomic that allows the filter function to abort without trying to signal
a handler that's known to not exist.
One further improvement we might want to look at is unexpected
termination of the handler (as opposed to a failure to start) which
would still result in a useless Sleep(60s). This isn't new behaviour,
but now we have a clear thing to do if we detect the handler is gone.
(Also a missing DWORD/size_t cast for the _x64 bots.)
R=mark@chromium.org
BUG=chromium:567850,chromium:656800
Change-Id: I5be831ca39bd8b2e5c962b9647c8bd469e2be878
Reviewed-on: https://chromium-review.googlesource.com/400985
Reviewed-by: Mark Mentovai <mark@chromium.org>
2016-11-02 14:24:21 -07:00
|
|
|
|
|
|
|
//! \brief An empty message sent by the initial client in asynchronous mode.
|
|
|
|
//! No data is required, this just confirms that the server is ready to
|
|
|
|
//! accept client registrations.
|
|
|
|
kPing,
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
} type;
|
|
|
|
|
|
|
|
union {
|
|
|
|
RegistrationRequest registration;
|
|
|
|
ShutdownRequest shutdown;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2015-11-06 15:03:13 -05:00
|
|
|
//! \brief A client registration response.
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
struct RegistrationResponse {
|
|
|
|
//! \brief An event `HANDLE`, valid in the client process, that should be
|
2015-11-06 15:03:13 -05:00
|
|
|
//! signaled to request a crash report. Clients should convert the value
|
|
|
|
//! to a `HANDLE` by calling IntToHandle().
|
|
|
|
int request_crash_dump_event;
|
2015-09-25 13:45:32 -07:00
|
|
|
|
|
|
|
//! \brief An event `HANDLE`, valid in the client process, that should be
|
2015-11-06 15:03:13 -05:00
|
|
|
//! signaled to request a non-crashing dump be taken. Clients should
|
|
|
|
//! convert the value to a `HANDLE` by calling IntToHandle().
|
|
|
|
int request_non_crash_dump_event;
|
2015-09-25 13:45:32 -07:00
|
|
|
|
|
|
|
//! \brief An event `HANDLE`, valid in the client process, that will be
|
2015-11-06 15:03:13 -05:00
|
|
|
//! signaled by the server when the non-crashing dump is complete. Clients
|
|
|
|
//! should convert the value to a `HANDLE` by calling IntToHandle().
|
|
|
|
int non_crash_dump_completed_event;
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
//! \brief The response sent back to the client via SendToCrashHandlerServer().
|
|
|
|
union ServerToClientMessage {
|
|
|
|
RegistrationResponse registration;
|
|
|
|
};
|
|
|
|
|
|
|
|
#pragma pack(pop)
|
|
|
|
|
|
|
|
//! \brief Connect over the given \a pipe_name, passing \a message to the
|
|
|
|
//! server, storing the server's reply into \a response.
|
|
|
|
//!
|
|
|
|
//! Typically clients will not use this directly, instead using
|
|
|
|
//! CrashpadClient::SetHandler().
|
|
|
|
//!
|
|
|
|
//! \sa CrashpadClient::SetHandler()
|
|
|
|
bool SendToCrashHandlerServer(const base::string16& pipe_name,
|
|
|
|
const ClientToServerMessage& message,
|
|
|
|
ServerToClientMessage* response);
|
|
|
|
|
2016-10-21 13:08:18 -07:00
|
|
|
//! \brief Wraps CreateNamedPipe() to create a single named pipe instance.
|
|
|
|
//!
|
|
|
|
//! \param[in] pipe_name The name to use for the pipe.
|
|
|
|
//! \param[in] first_instance If `true`, the named pipe instance will be
|
|
|
|
//! created with `FILE_FLAG_FIRST_PIPE_INSTANCE`. This ensures that the the
|
|
|
|
//! pipe name is not already in use when created. The first instance will be
|
|
|
|
//! created with an untrusted integrity SACL so instances of this pipe can
|
|
|
|
//! be connected to by processes of any integrity level.
|
|
|
|
HANDLE CreateNamedPipeInstance(const std::wstring& pipe_name,
|
|
|
|
bool first_instance);
|
|
|
|
|
2016-12-07 11:35:07 -08:00
|
|
|
//! \brief Returns the SECURITY_DESCRIPTOR blob that will be used for creating
|
|
|
|
//! the connection pipe in CreateNamedPipeInstance().
|
|
|
|
//!
|
|
|
|
//! This function is exposed for only for testing.
|
|
|
|
//!
|
|
|
|
//! \param[out] size The size of the returned blob. May be `nullptr` if not
|
|
|
|
//! required.
|
|
|
|
//!
|
|
|
|
//! \return A pointer to a self-relative `SECURITY_DESCRIPTOR`. Ownership is not
|
|
|
|
//! transferred to the caller.
|
|
|
|
const void* GetSecurityDescriptorForNamedPipeInstance(size_t* size);
|
|
|
|
|
win: Crash handler server
This replaces the registration server, and adds dispatch to a delegate
on crash requests.
(As you are already aware) we went around in circles on trying to come
up with a slightly-too-fancy threading design. All of them seemed to
have problems when it comes to out of order events, and orderly
shutdown, so I've gone back to something not-too-fancy.
Two named pipe instances (that clients connect to) are created. These
are used only for registration (which should take <1ms), so 2 should be
sufficient to avoid any waits. When a client registers, we duplicate
an event to it, which is used to signal when it wants a dump taken.
The server registers threadpool waits on that event, and also on the
process handle (which will be signalled when the client process exits).
These requests (in particular the taking of the dump) are serviced
on the threadpool, which avoids us needing to manage those threads,
but still allows parallelism in taking dumps. On process termination,
we use an IO Completion Port to post a message back to the main thread
to request cleanup. This complexity is necessary so that we can
unregister the threadpool waits without being on the threadpool, which
we need to do synchronously so that we can be sure that no further
callbacks will execute (and expect to have the client data around
still).
In a followup, I will readd support for DumpWithoutCrashing -- I don't
think it will be too difficult now that we have an orderly way to
clean up client records in the server.
R=cpu@chromium.org, mark@chromium.org, jschuh@chromium.org
BUG=crashpad:1,crashpad:45
Review URL: https://codereview.chromium.org/1301853002 .
2015-09-03 11:06:17 -07:00
|
|
|
} // namespace crashpad
|
|
|
|
|
|
|
|
#endif // CRASHPAD_UTIL_WIN_REGISTRATION_PROTOCOL_WIN_H_
|