From d2675ce50b7d2374ce0dd8e1c8ab79beefddad29 Mon Sep 17 00:00:00 2001 From: Mark Mentovai Date: Fri, 1 Aug 2014 14:44:57 -0400 Subject: [PATCH] Introduce MinidumpStringWriter and its test. MinidumpStringWriter is responsible for writing variable-length UTF-16 (MINIDUMP_STRING) and UTF-8 (MinidumpUTF8String) strings to minidump files. This change depends on https://codereview.chromium.org/430003003/ and https://codereview.chromium.org/435013002/. TEST=minidump_test MinidumpStringWriter.* R=rsesek@chromium.org Review URL: https://codereview.chromium.org/431363002 --- minidump/minidump.gyp | 5 + minidump/minidump_string_writer.cc | 90 +++++++++++ minidump/minidump_string_writer.h | 123 +++++++++++++++ minidump/minidump_string_writer_test.cc | 202 ++++++++++++++++++++++++ minidump/minidump_writer_util.cc | 61 +++++++ minidump/minidump_writer_util.h | 90 +++++++++++ 6 files changed, 571 insertions(+) create mode 100644 minidump/minidump_string_writer.cc create mode 100644 minidump/minidump_string_writer.h create mode 100644 minidump/minidump_string_writer_test.cc create mode 100644 minidump/minidump_writer_util.cc create mode 100644 minidump/minidump_writer_util.h diff --git a/minidump/minidump.gyp b/minidump/minidump.gyp index 7f4bc5d1..b1b21ecb 100644 --- a/minidump/minidump.gyp +++ b/minidump/minidump.gyp @@ -30,8 +30,12 @@ ], 'sources': [ 'minidump_extensions.h', + 'minidump_string_writer.cc', + 'minidump_string_writer.h', 'minidump_writable.cc', 'minidump_writable.h', + 'minidump_writer_util.cc', + 'minidump_writer_util.h', ], }, { @@ -47,6 +51,7 @@ ], 'sources': [ '../third_party/gtest/gtest/src/gtest_main.cc', + 'minidump_string_writer_test.cc', 'minidump_writable_test.cc', ], }, diff --git a/minidump/minidump_string_writer.cc b/minidump/minidump_string_writer.cc new file mode 100644 index 00000000..4c57a43a --- /dev/null +++ b/minidump/minidump_string_writer.cc @@ -0,0 +1,90 @@ +// Copyright 2014 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "minidump/minidump_string_writer.h" + +#include "base/logging.h" +#include "minidump/minidump_writer_util.h" +#include "util/numeric/safe_assignment.h" + +namespace crashpad { +namespace internal { + +template +MinidumpStringWriter::MinidumpStringWriter() + : MinidumpWritable(), string_base_(), string_() { +} + +template +MinidumpStringWriter::~MinidumpStringWriter() { +} + +template +bool MinidumpStringWriter::Freeze() { + DCHECK_EQ(state(), kStateMutable); + + if (!MinidumpWritable::Freeze()) { + return false; + } + + size_t string_bytes = string_.size() * sizeof(string_[0]); + if (!AssignIfInRange(&string_base_.Length, string_bytes)) { + LOG(ERROR) << "string_bytes " << string_bytes << " out of range"; + return false; + } + + return true; +} + +template +size_t MinidumpStringWriter::SizeOfObject() { + DCHECK_GE(state(), kStateFrozen); + + // Include the NUL terminator. + return sizeof(string_base_) + (string_.size() + 1) * sizeof(string_[0]); +} + +template +bool MinidumpStringWriter::WriteObject( + FileWriterInterface* file_writer) { + DCHECK_EQ(state(), kStateWritable); + + // The string’s length is stored in string_base_, and its data is stored in + // string_. Write them both. + WritableIoVec iov; + iov.iov_base = &string_base_; + iov.iov_len = sizeof(string_base_); + std::vector iovecs(1, iov); + + // Include the NUL terminator. + iov.iov_base = &string_[0]; + iov.iov_len = (string_.size() + 1) * sizeof(string_[0]); + iovecs.push_back(iov); + + return file_writer->WriteIoVec(&iovecs); +} + +// Explicit template instantiation of the forms of MinidumpStringWriter<> used +// as base classes. +template class MinidumpStringWriter; +template class MinidumpStringWriter; + +void MinidumpUTF16StringWriter::SetUTF8(const std::string& string_utf8) { + DCHECK_EQ(state(), kStateMutable); + + set_string(MinidumpWriterUtil::ConvertUTF8ToUTF16(string_utf8)); +} + +} // namespace internal +} // namespace crashpad diff --git a/minidump/minidump_string_writer.h b/minidump/minidump_string_writer.h new file mode 100644 index 00000000..9f5fceb8 --- /dev/null +++ b/minidump/minidump_string_writer.h @@ -0,0 +1,123 @@ +// Copyright 2014 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CRASHPAD_MINIDUMP_MINIDUMP_STRING_WRITER_H_ +#define CRASHPAD_MINIDUMP_MINIDUMP_STRING_WRITER_H_ + +#include +#include +#include + +#include + +#include "base/basictypes.h" +#include "base/strings/string16.h" +#include "minidump/minidump_extensions.h" +#include "minidump/minidump_writable.h" +#include "util/file/file_writer.h" + +namespace crashpad { +namespace internal { + +//! \cond + +struct MinidumpStringWriterUTF16Traits { + typedef string16 StringType; + typedef MINIDUMP_STRING MinidumpStringType; +}; + +struct MinidumpStringWriterUTF8Traits { + typedef std::string StringType; + typedef MinidumpUTF8String MinidumpStringType; +}; + +//! \endcond + +//! \brief Writes a variable-length string to a minidump file in accordance with +//! the string type’s characteristics. +//! +//! MinidumpStringWriter objects should not be instantiated directly. To write +//! strings to minidump file, use the MinidumpUTF16StringWriter and +//! MinidumpUTF8StringWriter subclasses instead. +template +class MinidumpStringWriter : public MinidumpWritable { + public: + MinidumpStringWriter(); + ~MinidumpStringWriter(); + + protected: + typedef typename Traits::MinidumpStringType MinidumpStringType; + typedef typename Traits::StringType StringType; + + virtual bool Freeze() override; + virtual size_t SizeOfObject() override; + virtual bool WriteObject(FileWriterInterface* file_writer) override; + + //! \brief Sets the string to be written. + //! + //! \note Valid in #kStateMutable. + void set_string(const StringType& string) { string_.assign(string); } + + private: + MinidumpStringType string_base_; + StringType string_; + + DISALLOW_COPY_AND_ASSIGN(MinidumpStringWriter); +}; + +//! \brief Writes a variable-length UTF-16-encoded MINIDUMP_STRING to a minidump +//! file. +//! +//! MinidumpUTF16StringWriter objects should not be instantiated directly +//! outside of the MinidumpWritable family of classes. +class MinidumpUTF16StringWriter final + : public MinidumpStringWriter { + public: + MinidumpUTF16StringWriter() : MinidumpStringWriter() {} + ~MinidumpUTF16StringWriter() {} + + //! \brief Converts a UTF-8 string to UTF-16 and sets it as the string to be + //! written. + //! + //! \note Valid in #kStateMutable. + void SetUTF8(const std::string& string_utf8); + + private: + DISALLOW_COPY_AND_ASSIGN(MinidumpUTF16StringWriter); +}; + +//! \brief Writes a variable-length UTF-8-encoded MinidumpUTF8String to a +//! minidump file. +//! +//! MinidumpUTF8StringWriter objects should not be instantiated directly outside +//! of the MinidumpWritable family of classes. +class MinidumpUTF8StringWriter final + : public MinidumpStringWriter { + public: + MinidumpUTF8StringWriter() : MinidumpStringWriter() {} + ~MinidumpUTF8StringWriter() {} + + //! \brief Sets the string to be written. + //! + //! \note Valid in #kStateMutable. + void SetUTF8(const std::string& string_utf8) { set_string(string_utf8); } + + private: + DISALLOW_COPY_AND_ASSIGN(MinidumpUTF8StringWriter); +}; + +} // namespace internal +} // namespace crashpad + +#endif // CRASHPAD_MINIDUMP_MINIDUMP_STRING_WRITER_H_ diff --git a/minidump/minidump_string_writer_test.cc b/minidump/minidump_string_writer_test.cc new file mode 100644 index 00000000..3d114f9f --- /dev/null +++ b/minidump/minidump_string_writer_test.cc @@ -0,0 +1,202 @@ +// Copyright 2014 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "minidump/minidump_string_writer.h" + +#include + +#include + +#include "base/basictypes.h" +#include "base/strings/string16.h" +#include "base/strings/stringprintf.h" +#include "gtest/gtest.h" +#include "util/file/string_file_writer.h" + +namespace { + +using namespace crashpad; +using namespace testing; + +const MINIDUMP_STRING* MinidumpStringCast(const StringFileWriter& file_writer) { + return reinterpret_cast(&file_writer.string()[0]); +} + +TEST(MinidumpStringWriter, MinidumpUTF16StringWriter) { + StringFileWriter file_writer; + + { + SCOPED_TRACE("unset"); + file_writer.Reset(); + crashpad::internal::MinidumpUTF16StringWriter string_writer; + EXPECT_TRUE(string_writer.WriteEverything(&file_writer)); + ASSERT_EQ(6u, file_writer.string().size()); + const MINIDUMP_STRING* minidump_string = MinidumpStringCast(file_writer); + EXPECT_EQ(0u, minidump_string->Length); + EXPECT_EQ(0, minidump_string->Buffer[0]); + } + + const struct { + size_t input_length; + const char* input_string; + size_t output_length; + const char16 output_string[10]; + } kTestData[] = { + {0, "", 0, {}}, + {1, "a", 1, {'a'}}, + {2, "\0b", 2, {0, 'b'}}, + {3, "cde", 3, {'c', 'd', 'e'}}, + {9, "Hi world!", 9, {'H', 'i', ' ', 'w', 'o', 'r', 'l', 'd', '!'}}, + {7, "ret\nurn", 7, {'r', 'e', 't', '\n', 'u', 'r', 'n'}}, + {2, "\303\251", 1, {0x00e9}}, // é + + // oóöőo + {8, "o\303\263\303\266\305\221o", 5, {'o', 0x00f3, 0x00f6, 0x151, 'o'}}, + {4, "\360\220\204\202", 2, {0xd800, 0xdd02}}, // 𐄂 (non-BMP) + }; + + for (size_t index = 0; index < arraysize(kTestData); ++index) { + SCOPED_TRACE(base::StringPrintf( + "index %zu, input %s", index, kTestData[index].input_string)); + + // Make sure that the expected output string with its NUL terminator fits in + // the space provided. + ASSERT_EQ( + 0, + kTestData[index] + .output_string[arraysize(kTestData[index].output_string) - 1]); + + file_writer.Reset(); + crashpad::internal::MinidumpUTF16StringWriter string_writer; + string_writer.SetUTF8(std::string(kTestData[index].input_string, + kTestData[index].input_length)); + EXPECT_TRUE(string_writer.WriteEverything(&file_writer)); + + const size_t expected_utf16_units_with_nul = + kTestData[index].output_length + 1; + const size_t expected_utf16_bytes = + expected_utf16_units_with_nul * sizeof(MINIDUMP_STRING::Buffer[0]); + ASSERT_EQ(sizeof(MINIDUMP_STRING) + expected_utf16_bytes, + file_writer.string().size()); + const MINIDUMP_STRING* minidump_string = MinidumpStringCast(file_writer); + EXPECT_EQ( + kTestData[index].output_length * sizeof(minidump_string->Buffer[0]), + minidump_string->Length); + EXPECT_EQ(0, + base::c16memcmp(kTestData[index].output_string, + minidump_string->Buffer, + expected_utf16_units_with_nul)); + } +} + +TEST(MinidumpStringWriter, ConvertInvalidUTF8ToUTF16) { + StringFileWriter file_writer; + + const char* kTestData[] = { + "\200", // continuation byte + "\300", // start byte followed by EOF + "\310\177", // start byte without continuation + "\340\200", // EOF in middle of 3-byte sequence + "\340\200\115", // invalid 3-byte sequence + "\303\0\251", // NUL in middle of valid sequence + }; + + for (size_t index = 0; index < arraysize(kTestData); ++index) { + SCOPED_TRACE( + base::StringPrintf("index %zu, input %s", index, kTestData[index])); + file_writer.Reset(); + crashpad::internal::MinidumpUTF16StringWriter string_writer; + string_writer.SetUTF8(kTestData[index]); + EXPECT_TRUE(string_writer.WriteEverything(&file_writer)); + + // The requirements for conversion of invalid UTF-8 input are lax. Make sure + // that at least enough data was written for a string that has one unit and + // a NUL terminator, make sure that the length field matches the length of + // data written, make sure the data is NUL-terminated, and make sure that at + // least one U+FFFD replacement character was written. + ASSERT_GE(file_writer.string().size(), + sizeof(MINIDUMP_STRING) + 2 * sizeof(MINIDUMP_STRING::Buffer[0])); + const MINIDUMP_STRING* minidump_string = MinidumpStringCast(file_writer); + EXPECT_EQ(file_writer.string().size() - sizeof(MINIDUMP_STRING) - + sizeof(MINIDUMP_STRING::Buffer[0]), + minidump_string->Length); + size_t out_units = + minidump_string->Length / sizeof(minidump_string->Buffer[0]); + EXPECT_EQ(0, minidump_string->Buffer[out_units]); + EXPECT_NE(reinterpret_cast(NULL), + base::c16memchr(minidump_string->Buffer, 0xfffd, out_units)); + } +} + +const MinidumpUTF8String* MinidumpUTF8StringCast( + const StringFileWriter& file_writer) { + return reinterpret_cast(&file_writer.string()[0]); +} + +TEST(MinidumpStringWriter, MinidumpUTF8StringWriter) { + StringFileWriter file_writer; + + { + SCOPED_TRACE("unset"); + file_writer.Reset(); + crashpad::internal::MinidumpUTF8StringWriter string_writer; + EXPECT_TRUE(string_writer.WriteEverything(&file_writer)); + ASSERT_EQ(5u, file_writer.string().size()); + const MinidumpUTF8String* minidump_string = + MinidumpUTF8StringCast(file_writer); + EXPECT_EQ(0u, minidump_string->Length); + EXPECT_EQ(0, minidump_string->Buffer[0]); + } + + const struct { + size_t length; + const char* string; + } kTestData[] = { + {0, ""}, + {1, "a"}, + {2, "\0b"}, + {3, "cde"}, + {9, "Hi world!"}, + {7, "ret\nurn"}, + {2, "\303\251"}, // é + + // oóöőo + {8, "o\303\263\303\266\305\221o"}, + {4, "\360\220\204\202"}, // 𐄂 (non-BMP) + }; + + for (size_t index = 0; index < arraysize(kTestData); ++index) { + SCOPED_TRACE(base::StringPrintf( + "index %zu, input %s", index, kTestData[index].string)); + + file_writer.Reset(); + crashpad::internal::MinidumpUTF8StringWriter string_writer; + string_writer.SetUTF8( + std::string(kTestData[index].string, kTestData[index].length)); + EXPECT_TRUE(string_writer.WriteEverything(&file_writer)); + + const size_t expected_utf8_bytes_with_nul = kTestData[index].length + 1; + ASSERT_EQ(sizeof(MinidumpUTF8String) + expected_utf8_bytes_with_nul, + file_writer.string().size()); + const MinidumpUTF8String* minidump_string = + MinidumpUTF8StringCast(file_writer); + EXPECT_EQ(kTestData[index].length, minidump_string->Length); + EXPECT_EQ(0, + memcmp(kTestData[index].string, + minidump_string->Buffer, + expected_utf8_bytes_with_nul)); + } +} + +} // namespace diff --git a/minidump/minidump_writer_util.cc b/minidump/minidump_writer_util.cc new file mode 100644 index 00000000..6afcb1f2 --- /dev/null +++ b/minidump/minidump_writer_util.cc @@ -0,0 +1,61 @@ +// Copyright 2014 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "minidump/minidump_writer_util.h" + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "base/strings/utf_string_conversions.h" +#include "util/stdlib/strlcpy.h" + +namespace crashpad { +namespace internal { + +// static +void MinidumpWriterUtil::AssignTimeT(uint32_t* destination, time_t source) { + if (!base::IsValueInRangeForNumericType(source)) { + LOG(WARNING) << "timestamp " << source << " out of range"; + } + + *destination = source; +} + +// static +string16 MinidumpWriterUtil::ConvertUTF8ToUTF16(const std::string& utf8) { + string16 utf16; + if (!base::UTF8ToUTF16(utf8.data(), utf8.length(), &utf16)) { + LOG(WARNING) << "string " << utf8 + << " cannot be converted to UTF-16 losslessly"; + } + return utf16; +} + +// static +void MinidumpWriterUtil::AssignUTF8ToUTF16(char16* destination, + size_t destination_size, + const std::string& source) { + string16 source_utf16 = ConvertUTF8ToUTF16(source); + if (source_utf16.size() > destination_size - 1) { + LOG(WARNING) << "string " << source << " UTF-16 length " + << source_utf16.size() + << " will be truncated to UTF-16 length " + << destination_size - 1; + } + + source_utf16.resize(destination_size - 1); + c16lcpy(destination, source_utf16.c_str(), destination_size); +} + +} // namespace internal +} // namespace crashpad diff --git a/minidump/minidump_writer_util.h b/minidump/minidump_writer_util.h new file mode 100644 index 00000000..deb3648d --- /dev/null +++ b/minidump/minidump_writer_util.h @@ -0,0 +1,90 @@ +// Copyright 2014 The Crashpad Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CRASHPAD_MINIDUMP_MINIDUMP_WRITER_UTIL_H_ +#define CRASHPAD_MINIDUMP_MINIDUMP_WRITER_UTIL_H_ + +#include +#include +#include + +#include + +#include "base/basictypes.h" +#include "base/strings/string16.h" + +namespace crashpad { +namespace internal { + +//! \brief A collection of utility functions used by the MinidumpWritable family +//! of classes. +class MinidumpWriterUtil final { + public: + //! \brief Assigns a `time_t` value, logging a warning if the result overflows + //! the destination buffer and will be truncated. + //! + //! \param[out] destination A pointer to the variable to be assigned to. + //! \param[in] source The value to assign. + //! + //! The minidump format uses `uint32_t` for many timestamp values, but + //! `time_t` may be wider than this. These year 2038 bugs are a limitation of + //! the minidump format. An out-of-range error will be noted with a warning, + //! but is not considered fatal. \a source will be truncated and assigned to + //! \a destination in this case. + //! + //! For `time_t` values with nonfatal overflow semantics, this function is + //! used in preference to AssignIfInRange(), which fails without performing an + //! assignment when an out-of-range condition is detected. + static void AssignTimeT(uint32_t* destination, time_t source); + + //! \brief Converts a UTF-8 string to UTF-16 and returns it. If the string + //! cannot be converted losslessly, indicating that the input is not + //! well-formed UTF-8, a warning is logged. + //! + //! \param[in] utf8 The UTF-8-encoded string to convert. + //! + //! \return The \a utf8 string, converted to UTF-16 encoding. If the + //! conversion is lossy, U+FFFD “replacement characters” will be + //! introduced. + static string16 ConvertUTF8ToUTF16(const std::string& utf8); + + //! \brief Converts a UTF-8 string to UTF-16 and places it into a buffer of + //! fixed size, taking care to `NUL`-terminate the buffer and not to + //! overflow it. If the string will be truncated or if it cannot be + //! converted losslessly, a warning is logged. + //! + //! Any unused portion of the \a destination buffer that is not written to by + //! the converted string will be overwritten with `NUL` UTF-16 code units, + //! thus, this function always writes \a destination_size `char16` units. + //! + //! If the conversion is lossy, U+FFFD “replacement characters” will be + //! introduced. + //! + //! \param[out] destination A pointer to the destination buffer, where the + //! UTF-16-encoded string will be written. + //! \param[in] destination_size The size of \a destination in `char16` units, + //! including space used by a `NUL` terminator. + //! \param[in] source The UTF-8-encoded input string. + static void AssignUTF8ToUTF16(char16* destination, + size_t destination_size, + const std::string& source); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(MinidumpWriterUtil); +}; + +} // namespace internal +} // namespace crashpad + +#endif // CRASHPAD_MINIDUMP_MINIDUMP_WRITER_UTIL_H_