From 0c322ecc3f711c34fbf85b2cbe69f38b8dbccf05 Mon Sep 17 00:00:00 2001 From: Mark Mentovai Date: Wed, 15 Feb 2017 19:54:19 -0500 Subject: [PATCH] Use zlib to gzip-compress uploads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds zlib to Crashpad. By default in standalone Crashpad builds, the system zlib will be used where available. A copy of Chromium’s zlib (currently a slightly patched 1.2.11) is checked out via DEPS into third_party for use on Windows, which does not have a system zlib. zlib is used to produce gzip streams for HTTP upload request bodies sent by crashpad_handler by default. The Content-Encoding: gzip header is set for these compressed request bodies. Compression can be disabled for upload to servers without corresponding decompression support by starting crashpad_handler with the --no-upload-gzip option. Most minidumps compress quite well with zlib. A size reduction of 90% is not uncommon. BUG=crashpad:157 TEST=crashpad_util_test GzipHTTPBodyStream.*:HTTPTransport.* Change-Id: I99b86db3952c3685cd78f5dc858a60b54399c513 Reviewed-on: https://chromium-review.googlesource.com/438585 Reviewed-by: Robert Sesek --- .gitignore | 1 + DEPS | 3 + handler/crash_report_upload_thread.cc | 15 +- handler/crash_report_upload_thread.h | 5 +- handler/crashpad_handler.md | 8 ++ handler/handler_main.cc | 11 +- third_party/zlib/README.crashpad | 18 +++ third_party/zlib/zlib.gyp | 137 ++++++++++++++++++ third_party/zlib/zlib_crashpad.h | 32 +++++ util/misc/zlib.cc | 37 +++++ util/misc/zlib.h | 42 ++++++ util/net/http_body_gzip.cc | 126 +++++++++++++++++ util/net/http_body_gzip.h | 67 +++++++++ util/net/http_body_gzip_test.cc | 178 ++++++++++++++++++++++++ util/net/http_headers.cc | 2 +- util/net/http_headers.h | 3 + util/net/http_multipart_builder.cc | 28 +++- util/net/http_multipart_builder.h | 17 ++- util/net/http_multipart_builder_test.cc | 3 + util/net/http_transport_test.cc | 16 ++- util/net/http_transport_test_server.py | 8 ++ util/util.gyp | 5 + util/util_test.gyp | 2 + 23 files changed, 748 insertions(+), 16 deletions(-) create mode 100644 third_party/zlib/README.crashpad create mode 100644 third_party/zlib/zlib.gyp create mode 100644 third_party/zlib/zlib_crashpad.h create mode 100644 util/misc/zlib.cc create mode 100644 util/misc/zlib.h create mode 100644 util/net/http_body_gzip.cc create mode 100644 util/net/http_body_gzip.h create mode 100644 util/net/http_body_gzip_test.cc diff --git a/.gitignore b/.gitignore index 65bb4418..43ab9c0a 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,6 @@ /third_party/gyp/gyp /third_party/llvm /third_party/mini_chromium/mini_chromium +/third_party/zlib/zlib /xcodebuild tags diff --git a/DEPS b/DEPS index 0da988b7..4aa8dd5a 100644 --- a/DEPS +++ b/DEPS @@ -39,6 +39,9 @@ deps = { 'crashpad/third_party/mini_chromium/mini_chromium': Var('chromium_git') + '/chromium/mini_chromium@' + 'f65519e442d23498937251e680a3b113927613b0', + 'crashpad/third_party/zlib/zlib': + Var('chromium_git') + '/chromium/src/third_party/zlib@' + + '13dc246a58e4b72104d35f9b1809af95221ebda7', } hooks = [ diff --git a/handler/crash_report_upload_thread.cc b/handler/crash_report_upload_thread.cc index 5bd1fbf9..2c2b69b9 100644 --- a/handler/crash_report_upload_thread.cc +++ b/handler/crash_report_upload_thread.cc @@ -139,7 +139,8 @@ class CallRecordUploadAttempt { CrashReportUploadThread::CrashReportUploadThread(CrashReportDatabase* database, const std::string& url, - bool rate_limit) + bool rate_limit, + bool upload_gzip) : url_(url), // Check for pending reports every 15 minutes, even in the absence of a // signal from the handler thread. This allows for failed uploads to be @@ -147,7 +148,8 @@ CrashReportUploadThread::CrashReportUploadThread(CrashReportDatabase* database, // processes to be recognized. thread_(15 * 60, this), database_(database), - rate_limit_(rate_limit) { + rate_limit_(rate_limit), + upload_gzip_(upload_gzip) { } CrashReportUploadThread::~CrashReportUploadThread() { @@ -308,6 +310,7 @@ CrashReportUploadThread::UploadResult CrashReportUploadThread::UploadReport( } HTTPMultipartBuilder http_multipart_builder; + http_multipart_builder.SetGzipEnabled(upload_gzip_); const char kMinidumpKey[] = "upload_file_minidump"; @@ -332,9 +335,11 @@ CrashReportUploadThread::UploadResult CrashReportUploadThread::UploadReport( std::unique_ptr http_transport(HTTPTransport::Create()); http_transport->SetURL(url_); - HTTPHeaders::value_type content_type = - http_multipart_builder.GetContentType(); - http_transport->SetHeader(content_type.first, content_type.second); + HTTPHeaders content_headers; + http_multipart_builder.PopulateContentHeaders(&content_headers); + for (const auto& content_header : content_headers) { + http_transport->SetHeader(content_header.first, content_header.second); + } http_transport->SetBodyStream(http_multipart_builder.GetBodyStream()); // TODO(mark): The timeout should be configurable by the client. http_transport->SetTimeout(60.0); // 1 minute. diff --git a/handler/crash_report_upload_thread.h b/handler/crash_report_upload_thread.h index a9601d14..14debacd 100644 --- a/handler/crash_report_upload_thread.h +++ b/handler/crash_report_upload_thread.h @@ -45,9 +45,11 @@ class CrashReportUploadThread : public WorkerThread::Delegate { //! \param[in] url The URL of the server to upload crash reports to. //! \param[in] rate_limit Whether uploads should be throttled to a (currently //! hardcoded) rate. + //! \param[in] upload_gzip Whether uploads should use `gzip` compression. CrashReportUploadThread(CrashReportDatabase* database, const std::string& url, - bool rate_limit); + bool rate_limit, + bool upload_gzip); ~CrashReportUploadThread(); //! \brief Starts a dedicated upload thread, which executes ThreadMain(). @@ -139,6 +141,7 @@ class CrashReportUploadThread : public WorkerThread::Delegate { WorkerThread thread_; CrashReportDatabase* database_; // weak bool rate_limit_; + bool upload_gzip_; DISALLOW_COPY_AND_ASSIGN(CrashReportUploadThread); }; diff --git a/handler/crashpad_handler.md b/handler/crashpad_handler.md index 31c0b347..30dacfe7 100644 --- a/handler/crashpad_handler.md +++ b/handler/crashpad_handler.md @@ -144,6 +144,14 @@ establish the Crashpad client environment before running a program. throttled to one per hour. Using this option disables that behavior, and Crashpad will attempt to upload all captured reports. + * **--no-upload-gzip** + + Do not use `gzip` compression for uploaded crash reports. Normally, the + entire request body is compressed into a `gzip` stream and transmitted with + `Content-Encoding: gzip`. This option disables compression, and is intended + for use with collection servers that don’t accept uploads compressed in this + way. + * **--pipe-name**=_PIPE_ Listen on the given pipe name for connections from clients. _PIPE_ must be of diff --git a/handler/handler_main.cc b/handler/handler_main.cc index 9433a6af..97322576 100644 --- a/handler/handler_main.cc +++ b/handler/handler_main.cc @@ -93,6 +93,7 @@ void Usage(const base::FilePath& me) { #endif // OS_MACOSX " --metrics-dir=DIR store metrics files in DIR (only in Chromium)\n" " --no-rate-limit don't rate limit crash uploads\n" +" --no-upload-gzip don't use gzip compression when uploading\n" #if defined(OS_MACOSX) " --reset-own-crash-exception-port-to-system-default\n" " reset the server's exception handler to default\n" @@ -291,6 +292,7 @@ int HandlerMain(int argc, char* argv[]) { #endif // OS_MACOSX kOptionMetrics, kOptionNoRateLimit, + kOptionNoUploadGzip, #if defined(OS_MACOSX) kOptionResetOwnCrashExceptionPortToSystemDefault, #elif defined(OS_WIN) @@ -317,11 +319,13 @@ int HandlerMain(int argc, char* argv[]) { InitialClientData initial_client_data; #endif // OS_MACOSX bool rate_limit; + bool upload_gzip; } options = {}; #if defined(OS_MACOSX) options.handshake_fd = -1; #endif options.rate_limit = true; + options.upload_gzip = true; const option long_options[] = { {"annotation", required_argument, nullptr, kOptionAnnotation}, @@ -340,6 +344,7 @@ int HandlerMain(int argc, char* argv[]) { #endif // OS_MACOSX {"metrics-dir", required_argument, nullptr, kOptionMetrics}, {"no-rate-limit", no_argument, nullptr, kOptionNoRateLimit}, + {"no-upload-gzip", no_argument, nullptr, kOptionNoUploadGzip}, #if defined(OS_MACOSX) {"reset-own-crash-exception-port-to-system-default", no_argument, @@ -407,6 +412,10 @@ int HandlerMain(int argc, char* argv[]) { options.rate_limit = false; break; } + case kOptionNoUploadGzip: { + options.upload_gzip = false; + break; + } #if defined(OS_MACOSX) case kOptionResetOwnCrashExceptionPortToSystemDefault: { options.reset_own_crash_exception_port_to_system_default = true; @@ -555,7 +564,7 @@ int HandlerMain(int argc, char* argv[]) { // configurable database setting to control upload limiting. // See https://crashpad.chromium.org/bug/23. CrashReportUploadThread upload_thread( - database.get(), options.url, options.rate_limit); + database.get(), options.url, options.rate_limit, options.upload_gzip); upload_thread.Start(); PruneCrashReportThread prune_thread(database.get(), diff --git a/third_party/zlib/README.crashpad b/third_party/zlib/README.crashpad new file mode 100644 index 00000000..83f47e31 --- /dev/null +++ b/third_party/zlib/README.crashpad @@ -0,0 +1,18 @@ +Name: zlib +Short Name: zlib +URL: http://zlib.net/ +Revision: See zlib/README.chromium +License: zlib +License File: zlib/LICENSE +Security Critical: yes + +Description: +“A massively spiffy yet delicately unobtrusive compression library.” + +zlib is a free, general-purpose, legally unencumbered lossless data-compression +library. zlib implements the “deflate” compression algorithm described by RFC +1951, which combines the LZ77 (Lempel-Ziv) algorithm with Huffman coding. zlib +also implements the zlib (RFC 1950) and gzip (RFC 1952) wrapper formats. + +Local Modifications: +See zlib/README.chromium. diff --git a/third_party/zlib/zlib.gyp b/third_party/zlib/zlib.gyp new file mode 100644 index 00000000..f92cdf26 --- /dev/null +++ b/third_party/zlib/zlib.gyp @@ -0,0 +1,137 @@ +# Copyright 2017 The Crashpad Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{ + 'variables': { + 'conditions': [ + # Use the system zlib by default where available, as it is on most + # platforms. Windows does not have a system zlib, so use “embedded” which + # directs the build to use the source code in the zlib subdirectory. + ['OS!="win"', { + 'zlib_source%': 'system', + }, { + 'zlib_source%': 'embedded', + }], + ], + }, + 'targets': [ + { + 'target_name': 'zlib', + 'conditions': [ + ['zlib_source=="system"', { + 'type': 'none', + 'direct_dependent_settings': { + 'defines': [ + 'CRASHPAD_ZLIB_SOURCE_SYSTEM', + ], + }, + 'link_settings': { + 'conditions': [ + ['OS=="mac"', { + 'libraries': [ + '$(SDKROOT)/usr/lib/libz.dylib', + ], + }, { + 'libraries': [ + '-lz', + ], + }], + ], + }, + }], + ['zlib_source=="embedded"', { + 'type': 'static_library', + 'include_dirs': [ + 'zlib', + ], + 'defines': [ + 'CRASHPAD_ZLIB_SOURCE_EMBEDDED', + 'HAVE_STDARG_H', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + 'zlib', + ], + 'defines': [ + 'CRASHPAD_ZLIB_SOURCE_EMBEDDED', + ], + }, + 'sources': [ + 'zlib/adler32.c', + 'zlib/compress.c', + 'zlib/crc32.c', + 'zlib/crc32.h', + 'zlib/crc_folding.c', + 'zlib/deflate.c', + 'zlib/deflate.h', + 'zlib/fill_window_sse.c', + 'zlib/gzclose.c', + 'zlib/gzguts.h', + 'zlib/gzlib.c', + 'zlib/gzread.c', + 'zlib/gzwrite.c', + 'zlib/infback.c', + 'zlib/inffast.c', + 'zlib/inffast.h', + 'zlib/inffixed.h', + 'zlib/inflate.c', + 'zlib/inflate.h', + 'zlib/inftrees.c', + 'zlib/inftrees.h', + 'zlib/names.h', + 'zlib/simd_stub.c', + 'zlib/trees.c', + 'zlib/trees.h', + 'zlib/uncompr.c', + 'zlib/x86.c', + 'zlib/x86.h', + 'zlib/zconf.h', + 'zlib/zlib.h', + 'zlib/zutil.c', + 'zlib/zutil.h', + 'zlib_crashpad.h', + ], + 'conditions': [ + ['target_arch=="x86" or target_arch=="amd64"', { + 'sources!': [ + 'zlib/simd_stub.c', + ], + }, { + 'sources!': [ + 'zlib/crc_folding.c', + 'zlib/fill_window_sse.c', + 'zlib/x86.c', + 'zlib/x86.h', + ], + }], + ['OS!="win"', { + 'defines': [ + 'HAVE_HIDDEN', + 'HAVE_UNISTD_H', + ], + }, { + 'msvs_disabled_warnings': [ + 4131, # uses old-style declarator + 4244, # conversion from 't1' to 't2', possible loss of data + 4245, # conversion from 't1' to 't2', signed/unsigned mismatch + 4267, # conversion from 'size_t' to 't', possible loss of data + 4324, # structure was padded due to alignment specifier + ], + }], + ], + }], + ], + }, + ], +} diff --git a/third_party/zlib/zlib_crashpad.h b/third_party/zlib/zlib_crashpad.h new file mode 100644 index 00000000..2ab542e0 --- /dev/null +++ b/third_party/zlib/zlib_crashpad.h @@ -0,0 +1,32 @@ +// Copyright 2017 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CRASHPAD_THIRD_PARTY_ZLIB_ZLIB_CRASHPAD_H_ +#define CRASHPAD_THIRD_PARTY_ZLIB_ZLIB_CRASHPAD_H_ + +// #include this file instead of the system version of or equivalent +// available at any other location in the source tree. It will #include the +// proper depending on how the build has been configured. + +#if defined(CRASHPAD_ZLIB_SOURCE_SYSTEM) +#include +#elif defined(CRASHPAD_ZLIB_SOURCE_EMBEDDED) +#include "third_party/zlib/zlib/zlib.h" +#elif defined(CRASHPAD_ZLIB_SOURCE_CHROMIUM) +#include "third_party/zlib/zlib.h" +#else +#error Unknown zlib source +#endif + +#endif // CRASHPAD_THIRD_PARTY_ZLIB_ZLIB_CRASHPAD_H_ diff --git a/util/misc/zlib.cc b/util/misc/zlib.cc new file mode 100644 index 00000000..b26f9c90 --- /dev/null +++ b/util/misc/zlib.cc @@ -0,0 +1,37 @@ +// Copyright 2017 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "util/misc/zlib.h" + +#include "base/logging.h" +#include "base/strings/stringprintf.h" +#include "third_party/zlib/zlib_crashpad.h" + +namespace crashpad { + +int ZlibWindowBitsWithGzipWrapper(int window_bits) { + // See the documentation for deflateInit2() and inflateInit2() in . 0 + // is only valid during decompression. + + DCHECK(window_bits == 0 || (window_bits >= 8 && window_bits <= 15)) + << window_bits; + + return 16 + window_bits; +} + +std::string ZlibErrorString(int zr) { + return base::StringPrintf("%s (%d)", zError(zr), zr); +} + +} // namespace crashpad diff --git a/util/misc/zlib.h b/util/misc/zlib.h new file mode 100644 index 00000000..e3da6438 --- /dev/null +++ b/util/misc/zlib.h @@ -0,0 +1,42 @@ +// Copyright 2017 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CRASHPAD_UTIL_MISC_ZLIB_H_ +#define CRASHPAD_UTIL_MISC_ZLIB_H_ + +#include + +namespace crashpad { + +//! \brief Obtain a \a window_bits parameter to pass to `deflateInit2()` or +//! `inflateInit2()` that specifies a `gzip` wrapper instead of the default +//! zlib wrapper. +//! +//! \param[in] A \a window_bits value that only specifies the base-2 logarithm +//! of the deflate sliding window size. +//! +//! \return \a window_bits adjusted to specify a `gzip` wrapper, to be passed to +//! `deflateInit2()` or `inflateInit2()`. +int ZlibWindowBitsWithGzipWrapper(int window_bits); + +//! \brief Formats a string for an error received from the zlib library. +//! +//! \param[in] zr A zlib result code, such as `Z_STREAM_ERROR`. +//! +//! \return A formatted string. +std::string ZlibErrorString(int zr); + +} // namespace crashpad + +#endif // CRASHPAD_UTIL_MISC_ZLIB_H_ diff --git a/util/net/http_body_gzip.cc b/util/net/http_body_gzip.cc new file mode 100644 index 00000000..70f4db35 --- /dev/null +++ b/util/net/http_body_gzip.cc @@ -0,0 +1,126 @@ +// Copyright 2017 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "util/net/http_body_gzip.h" + +#include + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "third_party/zlib/zlib_crashpad.h" +#include "util/misc/zlib.h" + +namespace crashpad { + +GzipHTTPBodyStream::GzipHTTPBodyStream(std::unique_ptr source) + : input_(), + source_(std::move(source)), + z_stream_(new z_stream()), + state_(State::kUninitialized) {} + +GzipHTTPBodyStream::~GzipHTTPBodyStream() { + DCHECK(state_ == State::kUninitialized || + state_ == State::kFinished || + state_ == State::kError); +} + +FileOperationResult GzipHTTPBodyStream::GetBytesBuffer(uint8_t* buffer, + size_t max_len) { + if (state_ == State::kError) { + return -1; + } + + if (state_ == State::kFinished) { + return 0; + } + + if (state_ == State::kUninitialized) { + z_stream_->zalloc = Z_NULL; + z_stream_->zfree = Z_NULL; + z_stream_->opaque = Z_NULL; + + // The default values for zlib’s internal MAX_WBITS and DEF_MEM_LEVEL. These + // are the values that deflateInit() would use, but they’re not exported + // from zlib. deflateInit2() is used instead of deflateInit() to get the + // gzip wrapper. + const int kZlibMaxWindowBits = 15; + const int kZlibDefaultMemoryLevel = 8; + + int zr = deflateInit2(z_stream_.get(), + Z_DEFAULT_COMPRESSION, + Z_DEFLATED, + ZlibWindowBitsWithGzipWrapper(kZlibMaxWindowBits), + kZlibDefaultMemoryLevel, + Z_DEFAULT_STRATEGY); + if (zr != Z_OK) { + LOG(ERROR) << "deflateInit2: " << ZlibErrorString(zr); + state_ = State::kError; + return -1; + } + + state_ = State::kOperating; + } + + z_stream_->next_out = buffer; + z_stream_->avail_out = base::saturated_cast(max_len); + + while (state_ != State::kFinished && z_stream_->avail_out > 0) { + if (state_ != State::kInputEOF && z_stream_->avail_in == 0) { + FileOperationResult input_bytes = + source_->GetBytesBuffer(input_, sizeof(input_)); + if (input_bytes == -1) { + Done(State::kError); + return -1; + } + + if (input_bytes == 0) { + state_ = State::kInputEOF; + } + + z_stream_->next_in = input_; + z_stream_->avail_in = base::checked_cast(input_bytes); + } + + int zr = deflate(z_stream_.get(), + state_ == State::kInputEOF ? Z_FINISH : Z_NO_FLUSH); + if (state_ == State::kInputEOF && zr == Z_STREAM_END) { + Done(State::kFinished); + if (state_ == State::kError) { + return -1; + } + } else if (zr != Z_OK) { + LOG(ERROR) << "deflate: " << ZlibErrorString(zr); + Done(State::kError); + return -1; + } + } + + DCHECK_LE(z_stream_->avail_out, max_len); + return max_len - z_stream_->avail_out; +} + +void GzipHTTPBodyStream::Done(State state) { + DCHECK(state_ == State::kOperating || state_ == State::kInputEOF) << state_; + DCHECK(state == State::kFinished || state == State::kError) << state; + + int zr = deflateEnd(z_stream_.get()); + if (zr != Z_OK) { + LOG(ERROR) << "deflateEnd: " << ZlibErrorString(zr); + state_ = State::kError; + } else { + state_ = state; + } +} + +} // namespace crashpad diff --git a/util/net/http_body_gzip.h b/util/net/http_body_gzip.h new file mode 100644 index 00000000..da3a5f24 --- /dev/null +++ b/util/net/http_body_gzip.h @@ -0,0 +1,67 @@ +// Copyright 2017 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CRASHPAD_UTIL_NET_HTTP_BODY_GZIP_H_ +#define CRASHPAD_UTIL_NET_HTTP_BODY_GZIP_H_ + +#include +#include + +#include + +#include "base/macros.h" +#include "util/file/file_io.h" +#include "util/net/http_body.h" + +extern "C" { +typedef struct z_stream_s z_stream; +} // extern "C" + +namespace crashpad { + +//! \brief An implementation of HTTPBodyStream that `gzip`-compresses another +//! HTTPBodyStream. +class GzipHTTPBodyStream : public HTTPBodyStream { + public: + explicit GzipHTTPBodyStream(std::unique_ptr source); + + ~GzipHTTPBodyStream() override; + + // HTTPBodyStream: + FileOperationResult GetBytesBuffer(uint8_t* buffer, size_t max_len) override; + + private: + enum State : int { + kUninitialized, + kOperating, + kInputEOF, + kFinished, + kError, + }; + + // Calls deflateEnd() and transitions state_ to state. If deflateEnd() fails, + // logs a message and transitions state_ to State::kError. + void Done(State state); + + uint8_t input_[4096]; + std::unique_ptr source_; + std::unique_ptr z_stream_; + State state_; + + DISALLOW_COPY_AND_ASSIGN(GzipHTTPBodyStream); +}; + +} // namespace crashpad + +#endif // CRASHPAD_UTIL_NET_HTTP_BODY_GZIP_H_ diff --git a/util/net/http_body_gzip_test.cc b/util/net/http_body_gzip_test.cc new file mode 100644 index 00000000..a7b97b93 --- /dev/null +++ b/util/net/http_body_gzip_test.cc @@ -0,0 +1,178 @@ +// Copyright 2017 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "util/net/http_body_gzip.h" + +#include + +#include +#include +#include +#include + +#include "base/macros.h" +#include "base/rand_util.h" +#include "base/numerics/safe_conversions.h" +#include "gtest/gtest.h" +#include "third_party/zlib/zlib_crashpad.h" +#include "util/misc/zlib.h" +#include "util/net/http_body.h" + +namespace crashpad { +namespace test { +namespace { + +class ScopedZlibInflateStream { + public: + explicit ScopedZlibInflateStream(z_stream* zlib) : zlib_(zlib) {} + ~ScopedZlibInflateStream() { + int zr = inflateEnd(zlib_); + EXPECT_EQ(Z_OK, zr) << "inflateEnd: " << ZlibErrorString(zr); + } + + private: + z_stream* zlib_; // weak + DISALLOW_COPY_AND_ASSIGN(ScopedZlibInflateStream); +}; + +void GzipInflate(const std::string& compressed, + std::string* decompressed, + size_t buf_size) { + decompressed->clear(); + + // There’s got to be at least a small buffer. + buf_size = std::max(buf_size, static_cast(1)); + + std::unique_ptr buf(new uint8_t[buf_size]); + z_stream zlib = {}; + zlib.zalloc = Z_NULL; + zlib.zfree = Z_NULL; + zlib.opaque = Z_NULL; + zlib.next_in = reinterpret_cast(const_cast(&compressed[0])); + zlib.avail_in = base::checked_cast(compressed.size()); + zlib.next_out = buf.get(); + zlib.avail_out = base::checked_cast(buf_size); + + int zr = inflateInit2(&zlib, ZlibWindowBitsWithGzipWrapper(0)); + ASSERT_EQ(Z_OK, zr) << "inflateInit2: " << ZlibErrorString(zr); + ScopedZlibInflateStream zlib_inflate(&zlib); + + zr = inflate(&zlib, Z_FINISH); + ASSERT_EQ(Z_STREAM_END, zr) << "inflate: " << ZlibErrorString(zr); + + ASSERT_LE(zlib.avail_out, buf_size); + decompressed->assign(reinterpret_cast(buf.get()), + buf_size - zlib.avail_out); +} + +void TestGzipDeflateInflate(const std::string& string) { + std::unique_ptr string_stream( + new StringHTTPBodyStream(string)); + GzipHTTPBodyStream gzip_stream(std::move(string_stream)); + + // The minimum size of a gzip wrapper per RFC 1952: a 10-byte header and an + // 8-byte trailer. + const size_t kGzipHeaderSize = 18; + + // Per http://www.zlib.net/zlib_tech.html, in the worst case, zlib will store + // uncompressed data as-is, at an overhead of 5 bytes per 16384-byte block. + // Zero-length input will “compress” to a 2-byte zlib stream. Add the overhead + // of the gzip wrapper, assuming no optional fields are present. + size_t buf_size = + string.size() + kGzipHeaderSize + + (string.empty() ? 2 : (((string.size() + 16383) / 16384) * 5)); + std::unique_ptr buf(new uint8_t[buf_size]); + FileOperationResult compressed_bytes = + gzip_stream.GetBytesBuffer(buf.get(), buf_size); + ASSERT_NE(compressed_bytes, -1); + ASSERT_LE(static_cast(compressed_bytes), buf_size); + + // Make sure that the stream is really at EOF. + uint8_t eof_buf[16]; + ASSERT_EQ(0, gzip_stream.GetBytesBuffer(eof_buf, sizeof(eof_buf))); + + std::string compressed(reinterpret_cast(buf.get()), compressed_bytes); + + ASSERT_GE(compressed.size(), kGzipHeaderSize); + EXPECT_EQ('\37', compressed[0]); + EXPECT_EQ('\213', compressed[1]); + EXPECT_EQ(Z_DEFLATED, compressed[2]); + + std::string decompressed; + ASSERT_NO_FATAL_FAILURE( + GzipInflate(compressed, &decompressed, string.size())); + + EXPECT_EQ(string, decompressed); + + // In block mode, compression should be identical. + string_stream.reset(new StringHTTPBodyStream(string)); + GzipHTTPBodyStream block_gzip_stream(std::move(string_stream)); + uint8_t block_buf[4096]; + std::string block_compressed; + FileOperationResult block_compressed_bytes; + while ((block_compressed_bytes = block_gzip_stream.GetBytesBuffer( + block_buf, sizeof(block_buf))) > 0) { + block_compressed.append(reinterpret_cast(block_buf), + block_compressed_bytes); + } + ASSERT_EQ(0, block_compressed_bytes); + EXPECT_EQ(compressed, block_compressed); +} + +std::string MakeString(size_t size) { + std::string string; + for (size_t i = 0; i < size; ++i) { + string.append(1, (i % 256) ^ ((i >> 8) % 256)); + } + return string; +} + +constexpr size_t kFourKBytes = 4096; +constexpr size_t kManyBytes = 375017; + +TEST(GzipHTTPBodyStream, Empty) { + TestGzipDeflateInflate(std::string()); +} + +TEST(GzipHTTPBodyStream, OneByte) { + TestGzipDeflateInflate(std::string("Z")); +} + +TEST(GzipHTTPBodyStream, FourKBytes_NUL) { + TestGzipDeflateInflate(std::string(kFourKBytes, '\0')); +} + +TEST(GzipHTTPBodyStream, ManyBytes_NUL) { + TestGzipDeflateInflate(std::string(kManyBytes, '\0')); +} + +TEST(GzipHTTPBodyStream, FourKBytes_Deterministic) { + TestGzipDeflateInflate(MakeString(kFourKBytes)); +} + +TEST(GzipHTTPBodyStream, ManyBytes_Deterministic) { + TestGzipDeflateInflate(MakeString(kManyBytes)); +} + +TEST(GzipHTTPBodyStream, FourKBytes_Random) { + TestGzipDeflateInflate(base::RandBytesAsString(kFourKBytes)); +} + +TEST(GzipHTTPBodyStream, ManyBytes_Random) { + TestGzipDeflateInflate(base::RandBytesAsString(kManyBytes)); +} + +} // namespace +} // namespace test +} // namespace crashpad diff --git a/util/net/http_headers.cc b/util/net/http_headers.cc index 09d61b3e..37d84c67 100644 --- a/util/net/http_headers.cc +++ b/util/net/http_headers.cc @@ -17,7 +17,7 @@ namespace crashpad { const char kContentType[] = "Content-Type"; - const char kContentLength[] = "Content-Length"; +const char kContentEncoding[] = "Content-Encoding"; } // namespace crashpad diff --git a/util/net/http_headers.h b/util/net/http_headers.h index 3633cb2d..851ff31c 100644 --- a/util/net/http_headers.h +++ b/util/net/http_headers.h @@ -29,6 +29,9 @@ extern const char kContentType[]; //! \brief The header name `"Content-Length"`. extern const char kContentLength[]; +//! \brief The header name `"Content-Encoding"`. +extern const char kContentEncoding[]; + } // namespace crashpad #endif // CRASHPAD_UTIL_NET_HTTP_HEADERS_H_ diff --git a/util/net/http_multipart_builder.cc b/util/net/http_multipart_builder.cc index 46f6f090..640d540a 100644 --- a/util/net/http_multipart_builder.cc +++ b/util/net/http_multipart_builder.cc @@ -23,6 +23,7 @@ #include "base/rand_util.h" #include "base/strings/stringprintf.h" #include "util/net/http_body.h" +#include "util/net/http_body_gzip.h" namespace crashpad { @@ -116,12 +117,18 @@ void AssertSafeMIMEType(const std::string& string) { } // namespace HTTPMultipartBuilder::HTTPMultipartBuilder() - : boundary_(GenerateBoundaryString()), form_data_(), file_attachments_() { -} + : boundary_(GenerateBoundaryString()), + form_data_(), + file_attachments_(), + gzip_enabled_(false) {} HTTPMultipartBuilder::~HTTPMultipartBuilder() { } +void HTTPMultipartBuilder::SetGzipEnabled(bool gzip_enabled) { + gzip_enabled_ = gzip_enabled; +} + void HTTPMultipartBuilder::SetFormData(const std::string& key, const std::string& value) { EraseKey(key); @@ -179,13 +186,24 @@ std::unique_ptr HTTPMultipartBuilder::GetBodyStream() { streams.push_back( new StringHTTPBodyStream("--" + boundary_ + "--" + kCRLF)); - return std::unique_ptr(new CompositeHTTPBodyStream(streams)); + auto composite = + std::unique_ptr(new CompositeHTTPBodyStream(streams)); + if (gzip_enabled_) { + return std::unique_ptr( + new GzipHTTPBodyStream(std::move(composite))); + } + return composite; } -HTTPHeaders::value_type HTTPMultipartBuilder::GetContentType() const { +void HTTPMultipartBuilder::PopulateContentHeaders( + HTTPHeaders* http_headers) const { std::string content_type = base::StringPrintf("multipart/form-data; boundary=%s", boundary_.c_str()); - return std::make_pair(kContentType, content_type); + (*http_headers)[kContentType] = content_type; + + if (gzip_enabled_) { + (*http_headers)[kContentEncoding] = "gzip"; + } } void HTTPMultipartBuilder::EraseKey(const std::string& key) { diff --git a/util/net/http_multipart_builder.h b/util/net/http_multipart_builder.h index 65602c71..e0c98fa4 100644 --- a/util/net/http_multipart_builder.h +++ b/util/net/http_multipart_builder.h @@ -34,6 +34,15 @@ class HTTPMultipartBuilder { HTTPMultipartBuilder(); ~HTTPMultipartBuilder(); + //! \brief Enables or disables `gzip` compression. + //! + //! \param[in] gzip_enabled Whether to enable or disable `gzip` compression. + //! + //! When `gzip` compression is enabled, the body stream returned by + //! GetBodyStream() will be `gzip`-compressed, and the content headers set by + //! PopulateContentHeaders() will contain `Content-Encoding: gzip`. + void SetGzipEnabled(bool gzip_enabled); + //! \brief Sets a `Content-Disposition: form-data` key-value pair. //! //! \param[in] key The key of the form data, specified as the `name` in the @@ -64,8 +73,11 @@ class HTTPMultipartBuilder { //! \return A caller-owned HTTPBodyStream object. std::unique_ptr GetBodyStream(); - //! \brief Gets the header pair for `"Content-Type"`. - HTTPHeaders::value_type GetContentType() const; + //! \brief Adds the appropriate content headers to \a http_headers. + //! + //! Any headers that this method adds will replace existing headers by the + //! same name in \a http_headers. + void PopulateContentHeaders(HTTPHeaders* http_headers) const; private: struct FileAttachment { @@ -81,6 +93,7 @@ class HTTPMultipartBuilder { std::string boundary_; std::map form_data_; std::map file_attachments_; + bool gzip_enabled_; DISALLOW_COPY_AND_ASSIGN(HTTPMultipartBuilder); }; diff --git a/util/net/http_multipart_builder_test.cc b/util/net/http_multipart_builder_test.cc index d019d058..fa20abe4 100644 --- a/util/net/http_multipart_builder_test.cc +++ b/util/net/http_multipart_builder_test.cc @@ -71,6 +71,7 @@ TEST(HTTPMultipartBuilder, ThreeStringFields) { ASSERT_TRUE(body.get()); std::string contents = ReadStreamToString(body.get()); auto lines = SplitCRLF(contents); + ASSERT_EQ(13u, lines.size()); auto lines_it = lines.begin(); // The first line is the boundary. All subsequent boundaries must match this. @@ -164,6 +165,7 @@ TEST(HTTPMultipartBuilder, OverwriteFormDataWithEscapedKey) { ASSERT_TRUE(body.get()); std::string contents = ReadStreamToString(body.get()); auto lines = SplitCRLF(contents); + ASSERT_EQ(5u, lines.size()); auto lines_it = lines.begin(); const std::string& boundary = *lines_it++; @@ -253,6 +255,7 @@ TEST(HTTPMultipartBuilder, SharedFormDataAndAttachmentKeyNamespace) { ASSERT_TRUE(body.get()); std::string contents = ReadStreamToString(body.get()); auto lines = SplitCRLF(contents); + ASSERT_EQ(9u, lines.size()); auto lines_it = lines.begin(); const std::string& boundary = *lines_it++; diff --git a/util/net/http_transport_test.cc b/util/net/http_transport_test.cc index e8495308..8dfa68a4 100644 --- a/util/net/http_transport_test.cc +++ b/util/net/http_transport_test.cc @@ -221,7 +221,21 @@ TEST(HTTPTransport, ValidFormData) { builder.SetFormData("key2", "--abcdefg123"); HTTPHeaders headers; - EXPECT_TRUE(headers.insert(builder.GetContentType()).second); + builder.PopulateContentHeaders(&headers); + + HTTPTransportTestFixture test( + headers, builder.GetBodyStream(), 200, &ValidFormData); + test.Run(); +} + +TEST(HTTPTransport, ValidFormData_Gzip) { + HTTPMultipartBuilder builder; + builder.SetGzipEnabled(true); + builder.SetFormData("key1", "test"); + builder.SetFormData("key2", "--abcdefg123"); + + HTTPHeaders headers; + builder.PopulateContentHeaders(&headers); HTTPTransportTestFixture test(headers, builder.GetBodyStream(), 200, &ValidFormData); diff --git a/util/net/http_transport_test_server.py b/util/net/http_transport_test_server.py index e79a428e..7ea15719 100755 --- a/util/net/http_transport_test_server.py +++ b/util/net/http_transport_test_server.py @@ -33,6 +33,7 @@ This could easily have been written in C++ instead. import BaseHTTPServer import struct import sys +import zlib class BufferedReadFile(object): """A File-like object that stores all read contents into a buffer.""" @@ -88,6 +89,13 @@ class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): length = int(self.headers.get('Content-Length', -1)) body = self.rfile.read(length) + if self.headers.get('Content-Encoding', '').lower() == 'gzip': + # 15 is the value of |wbits|, which should be at the maximum possible + # value to ensure that any gzip stream can be decoded. The offset of 16 + # specifies that the stream to decompress will be formatted with a gzip + # wrapper. + body = zlib.decompress(body, 16 + 15) + RequestHandler.raw_request += body self.send_response(self.response_code) diff --git a/util/util.gyp b/util/util.gyp index abf0bfdc..b2354d28 100644 --- a/util/util.gyp +++ b/util/util.gyp @@ -23,6 +23,7 @@ 'dependencies': [ '../compat/compat.gyp:crashpad_compat', '../third_party/mini_chromium/mini_chromium.gyp:base', + '../third_party/zlib/zlib.gyp:zlib', ], 'include_dirs': [ '..', @@ -106,8 +107,12 @@ 'misc/tri_state.h', 'misc/uuid.cc', 'misc/uuid.h', + 'misc/zlib.cc', + 'misc/zlib.h', 'net/http_body.cc', 'net/http_body.h', + 'net/http_body_gzip.cc', + 'net/http_body_gzip.h', 'net/http_headers.cc', 'net/http_headers.h', 'net/http_multipart_builder.cc', diff --git a/util/util_test.gyp b/util/util_test.gyp index e6bf5635..7636941b 100644 --- a/util/util_test.gyp +++ b/util/util_test.gyp @@ -29,6 +29,7 @@ '../third_party/gtest/gmock.gyp:gmock_main', '../third_party/gtest/gtest.gyp:gtest', '../third_party/mini_chromium/mini_chromium.gyp:base', + '../third_party/zlib/zlib.gyp:zlib', ], 'include_dirs': [ '..', @@ -62,6 +63,7 @@ 'misc/scoped_forbid_return_test.cc', 'misc/random_string_test.cc', 'misc/uuid_test.cc', + 'net/http_body_gzip_test.cc', 'net/http_body_test.cc', 'net/http_body_test_util.cc', 'net/http_body_test_util.h',