crashpad/snapshot/mac/mach_o_image_reader.cc
Mark Mentovai 6278690abe Update copyright boilerplate, 2022 edition (Crashpad)
sed -i '' -E -e 's/Copyright (.+) The Crashpad Authors\. All rights reserved\.$/Copyright \1 The Crashpad Authors/' $(git grep -El 'Copyright (.+) The Crashpad Authors\. All rights reserved\.$')

Bug: chromium:1098010
Change-Id: I8d6138469ddbe3d281a5d83f64cf918ec2491611
Reviewed-on: https://chromium-review.googlesource.com/c/crashpad/crashpad/+/3878262
Reviewed-by: Joshua Peraza <jperaza@chromium.org>
Commit-Queue: Mark Mentovai <mark@chromium.org>
2022-09-06 23:54:07 +00:00

734 lines
25 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2014 The Crashpad Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "snapshot/mac/mach_o_image_reader.h"
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include <string.h>
#include <iterator>
#include <limits>
#include <utility>
#include "base/logging.h"
#include "base/strings/stringprintf.h"
#include "client/crashpad_info.h"
#include "snapshot/mac/mach_o_image_segment_reader.h"
#include "snapshot/mac/mach_o_image_symbol_table_reader.h"
#include "snapshot/mac/process_reader_mac.h"
#include "util/mac/checked_mach_address_range.h"
#include "util/misc/implicit_cast.h"
namespace {
constexpr uint32_t kInvalidSegmentIndex = std::numeric_limits<uint32_t>::max();
} // namespace
namespace crashpad {
MachOImageReader::MachOImageReader()
: segments_(),
segment_map_(),
module_name_(),
module_info_(),
dylinker_name_(),
uuid_(),
address_(0),
size_(0),
slide_(0),
source_version_(0),
symtab_command_(),
dysymtab_command_(),
symbol_table_(),
id_dylib_command_(),
process_reader_(nullptr),
file_type_(0),
initialized_(),
symbol_table_initialized_() {
}
MachOImageReader::~MachOImageReader() {
}
bool MachOImageReader::Initialize(ProcessReaderMac* process_reader,
mach_vm_address_t address,
const std::string& name) {
INITIALIZATION_STATE_SET_INITIALIZING(initialized_);
process_reader_ = process_reader;
address_ = address;
module_name_ = name;
module_info_ =
base::StringPrintf(", module %s, address 0x%llx", name.c_str(), address);
process_types::mach_header mach_header;
if (!mach_header.Read(process_reader, address)) {
LOG(WARNING) << "could not read mach_header" << module_info_;
return false;
}
const bool is_64_bit = process_reader->Is64Bit();
const uint32_t kExpectedMagic = is_64_bit ? MH_MAGIC_64 : MH_MAGIC;
if (mach_header.magic != kExpectedMagic) {
LOG(WARNING) << base::StringPrintf("unexpected mach_header::magic 0x%08x",
mach_header.magic) << module_info_;
return false;
}
switch (mach_header.filetype) {
case MH_EXECUTE:
case MH_DYLIB:
case MH_DYLINKER:
case MH_BUNDLE:
file_type_ = mach_header.filetype;
break;
default:
LOG(WARNING) << base::StringPrintf(
"unexpected mach_header::filetype 0x%08x",
mach_header.filetype) << module_info_;
return false;
}
const uint32_t kExpectedSegmentCommand =
is_64_bit ? LC_SEGMENT_64 : LC_SEGMENT;
const uint32_t kUnexpectedSegmentCommand =
is_64_bit ? LC_SEGMENT : LC_SEGMENT_64;
const struct {
// Which method to call when encountering a load command matching |command|.
bool (MachOImageReader::*function)(mach_vm_address_t, const std::string&);
// The minimum size that may be allotted to store the load command.
size_t size;
// The load command to match.
uint32_t command;
// True if the load command must not appear more than one time.
bool singleton;
} kLoadCommandReaders[] = {
{
&MachOImageReader::ReadSegmentCommand,
process_types::segment_command::ExpectedSize(process_reader),
kExpectedSegmentCommand,
false,
},
{
&MachOImageReader::ReadSymTabCommand,
process_types::symtab_command::ExpectedSize(process_reader),
LC_SYMTAB,
true,
},
{
&MachOImageReader::ReadDySymTabCommand,
process_types::symtab_command::ExpectedSize(process_reader),
LC_DYSYMTAB,
true,
},
{
&MachOImageReader::ReadIdDylibCommand,
process_types::dylib_command::ExpectedSize(process_reader),
LC_ID_DYLIB,
true,
},
{
&MachOImageReader::ReadDylinkerCommand,
process_types::dylinker_command::ExpectedSize(process_reader),
LC_LOAD_DYLINKER,
true,
},
{
&MachOImageReader::ReadDylinkerCommand,
process_types::dylinker_command::ExpectedSize(process_reader),
LC_ID_DYLINKER,
true,
},
{
&MachOImageReader::ReadUUIDCommand,
process_types::uuid_command::ExpectedSize(process_reader),
LC_UUID,
true,
},
{
&MachOImageReader::ReadSourceVersionCommand,
process_types::source_version_command::ExpectedSize(process_reader),
LC_SOURCE_VERSION,
true,
},
// When reading a 64-bit process, no 32-bit segment commands should be
// present, and vice-versa.
{
&MachOImageReader::ReadUnexpectedCommand,
process_types::load_command::ExpectedSize(process_reader),
kUnexpectedSegmentCommand,
false,
},
};
// This vector is parallel to the kLoadCommandReaders array, and tracks
// whether a singleton load command matching the |command| field has been
// found yet.
std::vector<uint32_t> singleton_indices(std::size(kLoadCommandReaders),
kInvalidSegmentIndex);
size_t offset = mach_header.Size();
const mach_vm_address_t kLoadCommandAddressLimit =
address + offset + mach_header.sizeofcmds;
for (uint32_t load_command_index = 0;
load_command_index < mach_header.ncmds;
++load_command_index) {
mach_vm_address_t load_command_address = address + offset;
std::string load_command_info = base::StringPrintf(", load command %u/%u%s",
load_command_index,
mach_header.ncmds,
module_info_.c_str());
process_types::load_command load_command;
// Make sure that the basic load command structure doesnt overflow the
// space allotted for load commands.
if (load_command_address + load_command.ExpectedSize(process_reader) >
kLoadCommandAddressLimit) {
LOG(WARNING) << base::StringPrintf(
"load_command at 0x%llx exceeds sizeofcmds 0x%x",
load_command_address,
mach_header.sizeofcmds) << load_command_info;
return false;
}
if (!load_command.Read(process_reader, load_command_address)) {
LOG(WARNING) << "could not read load_command" << load_command_info;
return false;
}
load_command_info = base::StringPrintf(", load command 0x%x %u/%u%s",
load_command.cmd,
load_command_index,
mach_header.ncmds,
module_info_.c_str());
// Now that the load commands stated size is known, make sure that it
// doesnt overflow the space allotted for load commands.
if (load_command_address + load_command.cmdsize >
kLoadCommandAddressLimit) {
LOG(WARNING)
<< base::StringPrintf(
"load_command at 0x%llx cmdsize 0x%x exceeds sizeofcmds 0x%x",
load_command_address,
load_command.cmdsize,
mach_header.sizeofcmds) << load_command_info;
return false;
}
for (size_t reader_index = 0; reader_index < std::size(kLoadCommandReaders);
++reader_index) {
if (load_command.cmd != kLoadCommandReaders[reader_index].command) {
continue;
}
if (load_command.cmdsize < kLoadCommandReaders[reader_index].size) {
LOG(WARNING) << base::StringPrintf(
"load command cmdsize 0x%x insufficient for 0x%zx",
load_command.cmdsize,
kLoadCommandReaders[reader_index].size)
<< load_command_info;
return false;
}
if (kLoadCommandReaders[reader_index].singleton) {
if (singleton_indices[reader_index] != kInvalidSegmentIndex) {
LOG(WARNING) << "duplicate load command at "
<< singleton_indices[reader_index] << load_command_info;
return false;
}
singleton_indices[reader_index] = load_command_index;
}
if (!((this)->*(kLoadCommandReaders[reader_index].function))(
load_command_address, load_command_info)) {
return false;
}
break;
}
offset += load_command.cmdsize;
}
// Now that the slide is known, push it into the segments.
for (const auto& segment : segments_) {
segment->SetSlide(slide_);
// This was already checked for the unslid values while the segments were
// read, but now its possible to check the slid values too. The individual
// sections dont need to be checked because they were verified to be
// contained within their respective segments when the segments were read.
mach_vm_address_t slid_segment_address = segment->Address();
mach_vm_size_t slid_segment_size = segment->Size();
CheckedMachAddressRange slid_segment_range(
process_reader_->Is64Bit(), slid_segment_address, slid_segment_size);
if (!slid_segment_range.IsValid()) {
LOG(WARNING) << base::StringPrintf(
"invalid slid segment range 0x%llx + 0x%llx, "
"segment ",
slid_segment_address,
slid_segment_size) << segment->Name() << module_info_;
return false;
}
}
if (!segment_map_.count(SEG_TEXT)) {
// The __TEXT segment is required. Even a module with no executable code
// will have a __TEXT segment encompassing the Mach-O header and load
// commands. Without a __TEXT segment, |size_| will not have been computed.
LOG(WARNING) << "no " SEG_TEXT " segment" << module_info_;
return false;
}
if (mach_header.filetype == MH_DYLIB && !id_dylib_command_) {
// This doesnt render a module unusable, its just weird and worth noting.
LOG(INFO) << "no LC_ID_DYLIB" << module_info_;
}
INITIALIZATION_STATE_SET_VALID(initialized_);
return true;
}
const MachOImageSegmentReader* MachOImageReader::GetSegmentByName(
const std::string& segment_name) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
const auto& iterator = segment_map_.find(segment_name);
if (iterator == segment_map_.end()) {
return nullptr;
}
return segments_[iterator->second].get();
}
const process_types::section* MachOImageReader::GetSectionByName(
const std::string& segment_name,
const std::string& section_name,
mach_vm_address_t* address) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
const MachOImageSegmentReader* segment = GetSegmentByName(segment_name);
if (!segment) {
return nullptr;
}
return segment->GetSectionByName(section_name, address);
}
const process_types::section* MachOImageReader::GetSectionAtIndex(
size_t index,
const MachOImageSegmentReader** containing_segment,
mach_vm_address_t* address) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
static_assert(NO_SECT == 0, "NO_SECT must be zero");
if (index == NO_SECT) {
LOG(WARNING) << "section index " << index << " out of range";
return nullptr;
}
// Switch to a more comfortable 0-based index.
size_t local_index = index - 1;
for (const auto& segment : segments_) {
size_t nsects = segment->nsects();
if (local_index < nsects) {
const process_types::section* section =
segment->GetSectionAtIndex(local_index, address);
if (containing_segment) {
*containing_segment = segment.get();
}
return section;
}
local_index -= nsects;
}
LOG(WARNING) << "section index " << index << " out of range";
return nullptr;
}
bool MachOImageReader::LookUpExternalDefinedSymbol(
const std::string& name,
mach_vm_address_t* value) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
if (symbol_table_initialized_.is_uninitialized()) {
InitializeSymbolTable();
}
if (!symbol_table_initialized_.is_valid() || !symbol_table_) {
return false;
}
const MachOImageSymbolTableReader::SymbolInformation* symbol_info =
symbol_table_->LookUpExternalDefinedSymbol(name);
if (!symbol_info) {
return false;
}
if (symbol_info->section == NO_SECT) {
// This is an absolute (N_ABS) symbol, which requires no further validation
// or processing.
*value = symbol_info->value;
return true;
}
// This is a symbol defined in a particular section, so make sure that its
// valid for that section and fix it up for any “slide” as needed.
mach_vm_address_t section_address;
const MachOImageSegmentReader* segment;
const process_types::section* section =
GetSectionAtIndex(symbol_info->section, &segment, &section_address);
if (!section) {
return false;
}
mach_vm_address_t slid_value =
symbol_info->value + (segment->SegmentSlides() ? slide_ : 0);
// The __mh_execute_header (_MH_EXECUTE_SYM) symbol is weird. In
// position-independent executables, it shows up in the symbol table as a
// symbol in section 1, although its not really in that section. It points to
// the mach_header[_64], which is the beginning of the __TEXT segment, and the
// __text section normally begins after the load commands in the __TEXT
// segment. The range check below will fail for this symbol, because its not
// really in the section it claims to be in. See Xcode 5.1
// ld64-236.3/src/ld/OutputFile.cpp ld::tool::OutputFile::buildSymbolTable().
// There, ld takes symbols that refer to anything in the mach_header[_64] and
// marks them as being in section 1. Here, section 1 is treated in this same
// special way as long as its in the __TEXT segment that begins at the start
// of the image, which is normally the case, and as long as the symbols value
// is the base of the image.
//
// This only happens for PIE executables, because __mh_execute_header needs
// to slide. In non-PIE executables, __mh_execute_header is an absolute
// symbol.
CheckedMachAddressRange section_range(
process_reader_->Is64Bit(), section_address, section->size);
if (!section_range.ContainsValue(slid_value) &&
!(symbol_info->section == 1 && segment->Name() == SEG_TEXT &&
slid_value == Address())) {
std::string section_name_full =
MachOImageSegmentReader::SegmentAndSectionNameString(section->segname,
section->sectname);
LOG(WARNING) << base::StringPrintf(
"symbol %s (0x%llx) outside of section %s (0x%llx + "
"0x%llx)",
name.c_str(),
slid_value,
section_name_full.c_str(),
section_address,
section->size) << module_info_;
return false;
}
*value = slid_value;
return true;
}
uint32_t MachOImageReader::DylibVersion() const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
DCHECK_EQ(FileType(), implicit_cast<uint32_t>(MH_DYLIB));
if (id_dylib_command_) {
return id_dylib_command_->dylib_current_version;
}
// In case this was a weird dylib without an LC_ID_DYLIB command.
return 0;
}
void MachOImageReader::UUID(crashpad::UUID* uuid) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
memcpy(uuid, &uuid_, sizeof(uuid_));
}
bool MachOImageReader::GetCrashpadInfo(
process_types::CrashpadInfo* crashpad_info) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
mach_vm_address_t crashpad_info_address;
const process_types::section* crashpad_info_section =
GetSectionByName(SEG_DATA, "crashpad_info", &crashpad_info_address);
if (!crashpad_info_section) {
return false;
}
if (crashpad_info_section->size <
crashpad_info->MinimumSize(process_reader_)) {
LOG(WARNING) << "small crashpad info section size "
<< crashpad_info_section->size << module_info_;
return false;
}
// This Read() will zero out anything beyond the structures declared size.
if (!crashpad_info->Read(process_reader_, crashpad_info_address)) {
LOG(WARNING) << "could not read crashpad info" << module_info_;
return false;
}
if (crashpad_info->signature != CrashpadInfo::kSignature ||
crashpad_info->version != 1) {
LOG(WARNING) << base::StringPrintf(
"unexpected crashpad info signature 0x%x, version %u%s",
crashpad_info->signature,
crashpad_info->version,
module_info_.c_str());
return false;
}
// Dont require strict equality, to leave wiggle room for sloppy linkers.
if (crashpad_info->size > crashpad_info_section->size) {
LOG(WARNING) << "crashpad info struct size " << crashpad_info->size
<< " large for section size " << crashpad_info_section->size
<< module_info_;
return false;
}
if (crashpad_info->size > crashpad_info->ExpectedSize(process_reader_)) {
// This isnt strictly a problem, because unknown fields will simply be
// ignored, but it may be of diagnostic interest.
LOG(INFO) << "large crashpad info size " << crashpad_info->size
<< module_info_;
}
return true;
}
template <typename T>
bool MachOImageReader::ReadLoadCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info,
uint32_t expected_load_command_id,
T* load_command) {
if (!load_command->Read(process_reader_, load_command_address)) {
LOG(WARNING) << "could not read load command" << load_command_info;
return false;
}
DCHECK_GE(load_command->cmdsize, load_command->Size());
DCHECK_EQ(load_command->cmd, expected_load_command_id);
return true;
}
bool MachOImageReader::ReadSegmentCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
size_t segment_index = segments_.size();
segments_.push_back(std::make_unique<MachOImageSegmentReader>());
MachOImageSegmentReader* segment = segments_.back().get();
if (!segment->Initialize(process_reader_,
load_command_address,
load_command_info,
module_name_,
file_type_)) {
segments_.pop_back();
return false;
}
// At this point, the segment itself is considered valid, but if one of the
// next checks fails, it will render the module invalid. If any of the next
// checks fail, this method should return false, but it doesnt need to bother
// removing the segment from segments_. The segment will be properly released
// when the image is destroyed, and the image wont be usable because
// initialization wont have completed. Most importantly, leaving the segment
// in segments_ means that no other structures (such as perhaps segment_map_)
// become inconsistent or require cleanup.
const std::string segment_name = segment->Name();
const auto insert_result =
segment_map_.insert(std::make_pair(segment_name, segment_index));
if (!insert_result.second) {
LOG(WARNING) << base::StringPrintf("duplicate %s segment at %zu and %zu",
segment_name.c_str(),
insert_result.first->second,
segment_index) << load_command_info;
return false;
}
if (segment_name == SEG_TEXT) {
mach_vm_size_t vmsize = segment->vmsize();
if (vmsize == 0) {
LOG(WARNING) << "zero-sized " SEG_TEXT " segment" << load_command_info;
return false;
}
size_ = vmsize;
// The slide is computed as the difference between the __TEXT segments
// preferred and actual load addresses. This is the same way that dyld
// computes slide. See 10.9.2 dyld-239.4/src/dyldInitialization.cpp
// slideOfMainExecutable().
slide_ = address_ - segment->vmaddr();
}
return true;
}
bool MachOImageReader::ReadSymTabCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info) {
symtab_command_.reset(new process_types::symtab_command());
return ReadLoadCommand(load_command_address,
load_command_info,
LC_SYMTAB,
symtab_command_.get());
}
bool MachOImageReader::ReadDySymTabCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
dysymtab_command_.reset(new process_types::dysymtab_command());
return ReadLoadCommand(load_command_address,
load_command_info,
LC_DYSYMTAB,
dysymtab_command_.get());
}
bool MachOImageReader::ReadIdDylibCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
if (file_type_ != MH_DYLIB) {
LOG(WARNING) << base::StringPrintf(
"LC_ID_DYLIB inappropriate in non-dylib file type 0x%x",
file_type_) << load_command_info;
return false;
}
DCHECK(!id_dylib_command_);
id_dylib_command_.reset(new process_types::dylib_command());
return ReadLoadCommand(load_command_address,
load_command_info,
LC_ID_DYLIB,
id_dylib_command_.get());
}
bool MachOImageReader::ReadDylinkerCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
if (file_type_ != MH_EXECUTE && file_type_ != MH_DYLINKER) {
LOG(WARNING) << base::StringPrintf(
"LC_LOAD_DYLINKER/LC_ID_DYLINKER inappropriate in file "
"type 0x%x",
file_type_) << load_command_info;
return false;
}
const uint32_t kExpectedCommand =
file_type_ == MH_DYLINKER ? LC_ID_DYLINKER : LC_LOAD_DYLINKER;
process_types::dylinker_command dylinker_command;
if (!ReadLoadCommand(load_command_address,
load_command_info,
kExpectedCommand,
&dylinker_command)) {
return false;
}
if (!process_reader_->Memory()->ReadCStringSizeLimited(
load_command_address + dylinker_command.name,
dylinker_command.cmdsize - dylinker_command.name,
&dylinker_name_)) {
LOG(WARNING) << "could not read dylinker_command name" << load_command_info;
return false;
}
return true;
}
bool MachOImageReader::ReadUUIDCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info) {
process_types::uuid_command uuid_command;
if (!ReadLoadCommand(
load_command_address, load_command_info, LC_UUID, &uuid_command)) {
return false;
}
uuid_.InitializeFromBytes(uuid_command.uuid);
return true;
}
bool MachOImageReader::ReadSourceVersionCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
process_types::source_version_command source_version_command;
if (!ReadLoadCommand(load_command_address,
load_command_info,
LC_SOURCE_VERSION,
&source_version_command)) {
return false;
}
source_version_ = source_version_command.version;
return true;
}
bool MachOImageReader::ReadUnexpectedCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
LOG(WARNING) << "unexpected load command" << load_command_info;
return false;
}
void MachOImageReader::InitializeSymbolTable() const {
DCHECK(symbol_table_initialized_.is_uninitialized());
symbol_table_initialized_.set_invalid();
if (!symtab_command_) {
// Its technically valid for there to be no LC_SYMTAB, and in that case,
// any symbol lookups should fail. Mark the symbol table as valid, and
// LookUpExternalDefinedSymbol() will understand what it means when this is
// valid but symbol_table_ is not present.
symbol_table_initialized_.set_valid();
return;
}
// Find the __LINKEDIT segment. Technically, the symbol table can be in any
// mapped segment, but by convention, its in the one named __LINKEDIT.
const MachOImageSegmentReader* linkedit_segment =
GetSegmentByName(SEG_LINKEDIT);
if (!linkedit_segment) {
LOG(WARNING) << "no " SEG_LINKEDIT " segment";
return;
}
symbol_table_.reset(new MachOImageSymbolTableReader());
if (!symbol_table_->Initialize(process_reader_,
symtab_command_.get(),
dysymtab_command_.get(),
linkedit_segment,
module_info_)) {
symbol_table_.reset();
return;
}
symbol_table_initialized_.set_valid();
}
} // namespace crashpad