crashpad/snapshot/mac/mach_o_image_reader.cc

735 lines
25 KiB
C++
Raw Normal View History

// Copyright 2014 The Crashpad Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "snapshot/mac/mach_o_image_reader.h"
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include <string.h>
#include <limits>
#include <utility>
#include "base/logging.h"
#include "base/stl_util.h"
#include "base/strings/stringprintf.h"
#include "client/crashpad_info.h"
#include "snapshot/mac/mach_o_image_segment_reader.h"
#include "snapshot/mac/mach_o_image_symbol_table_reader.h"
#include "snapshot/mac/process_reader_mac.h"
#include "util/mac/checked_mach_address_range.h"
#include "util/misc/implicit_cast.h"
namespace {
constexpr uint32_t kInvalidSegmentIndex = std::numeric_limits<uint32_t>::max();
} // namespace
namespace crashpad {
MachOImageReader::MachOImageReader()
: segments_(),
segment_map_(),
module_name_(),
module_info_(),
dylinker_name_(),
uuid_(),
address_(0),
size_(0),
slide_(0),
source_version_(0),
symtab_command_(),
dysymtab_command_(),
symbol_table_(),
id_dylib_command_(),
process_reader_(nullptr),
file_type_(0),
initialized_(),
symbol_table_initialized_() {
}
MachOImageReader::~MachOImageReader() {
}
bool MachOImageReader::Initialize(ProcessReaderMac* process_reader,
mach_vm_address_t address,
const std::string& name) {
INITIALIZATION_STATE_SET_INITIALIZING(initialized_);
process_reader_ = process_reader;
address_ = address;
module_name_ = name;
module_info_ =
base::StringPrintf(", module %s, address 0x%llx", name.c_str(), address);
process_types::mach_header mach_header;
if (!mach_header.Read(process_reader, address)) {
LOG(WARNING) << "could not read mach_header" << module_info_;
return false;
}
const bool is_64_bit = process_reader->Is64Bit();
const uint32_t kExpectedMagic = is_64_bit ? MH_MAGIC_64 : MH_MAGIC;
if (mach_header.magic != kExpectedMagic) {
LOG(WARNING) << base::StringPrintf("unexpected mach_header::magic 0x%08x",
mach_header.magic) << module_info_;
return false;
}
switch (mach_header.filetype) {
case MH_EXECUTE:
case MH_DYLIB:
case MH_DYLINKER:
case MH_BUNDLE:
file_type_ = mach_header.filetype;
break;
default:
LOG(WARNING) << base::StringPrintf(
"unexpected mach_header::filetype 0x%08x",
mach_header.filetype) << module_info_;
return false;
}
const uint32_t kExpectedSegmentCommand =
is_64_bit ? LC_SEGMENT_64 : LC_SEGMENT;
const uint32_t kUnexpectedSegmentCommand =
is_64_bit ? LC_SEGMENT : LC_SEGMENT_64;
const struct {
// Which method to call when encountering a load command matching |command|.
bool (MachOImageReader::*function)(mach_vm_address_t, const std::string&);
// The minimum size that may be allotted to store the load command.
size_t size;
// The load command to match.
uint32_t command;
// True if the load command must not appear more than one time.
bool singleton;
} kLoadCommandReaders[] = {
{
&MachOImageReader::ReadSegmentCommand,
process_types::segment_command::ExpectedSize(process_reader),
kExpectedSegmentCommand,
false,
},
{
&MachOImageReader::ReadSymTabCommand,
process_types::symtab_command::ExpectedSize(process_reader),
LC_SYMTAB,
true,
},
{
&MachOImageReader::ReadDySymTabCommand,
process_types::symtab_command::ExpectedSize(process_reader),
LC_DYSYMTAB,
true,
},
{
&MachOImageReader::ReadIdDylibCommand,
process_types::dylib_command::ExpectedSize(process_reader),
LC_ID_DYLIB,
true,
},
{
&MachOImageReader::ReadDylinkerCommand,
process_types::dylinker_command::ExpectedSize(process_reader),
LC_LOAD_DYLINKER,
true,
},
{
&MachOImageReader::ReadDylinkerCommand,
process_types::dylinker_command::ExpectedSize(process_reader),
LC_ID_DYLINKER,
true,
},
{
&MachOImageReader::ReadUUIDCommand,
process_types::uuid_command::ExpectedSize(process_reader),
LC_UUID,
true,
},
{
&MachOImageReader::ReadSourceVersionCommand,
process_types::source_version_command::ExpectedSize(process_reader),
LC_SOURCE_VERSION,
true,
},
// When reading a 64-bit process, no 32-bit segment commands should be
// present, and vice-versa.
{
&MachOImageReader::ReadUnexpectedCommand,
process_types::load_command::ExpectedSize(process_reader),
kUnexpectedSegmentCommand,
false,
},
};
// This vector is parallel to the kLoadCommandReaders array, and tracks
// whether a singleton load command matching the |command| field has been
// found yet.
std::vector<uint32_t> singleton_indices(base::size(kLoadCommandReaders),
kInvalidSegmentIndex);
size_t offset = mach_header.Size();
const mach_vm_address_t kLoadCommandAddressLimit =
address + offset + mach_header.sizeofcmds;
for (uint32_t load_command_index = 0;
load_command_index < mach_header.ncmds;
++load_command_index) {
mach_vm_address_t load_command_address = address + offset;
std::string load_command_info = base::StringPrintf(", load command %u/%u%s",
load_command_index,
mach_header.ncmds,
module_info_.c_str());
process_types::load_command load_command;
// Make sure that the basic load command structure doesnt overflow the
// space allotted for load commands.
if (load_command_address + load_command.ExpectedSize(process_reader) >
kLoadCommandAddressLimit) {
LOG(WARNING) << base::StringPrintf(
"load_command at 0x%llx exceeds sizeofcmds 0x%x",
load_command_address,
mach_header.sizeofcmds) << load_command_info;
return false;
}
if (!load_command.Read(process_reader, load_command_address)) {
LOG(WARNING) << "could not read load_command" << load_command_info;
return false;
}
load_command_info = base::StringPrintf(", load command 0x%x %u/%u%s",
load_command.cmd,
load_command_index,
mach_header.ncmds,
module_info_.c_str());
// Now that the load commands stated size is known, make sure that it
// doesnt overflow the space allotted for load commands.
if (load_command_address + load_command.cmdsize >
kLoadCommandAddressLimit) {
LOG(WARNING)
<< base::StringPrintf(
"load_command at 0x%llx cmdsize 0x%x exceeds sizeofcmds 0x%x",
load_command_address,
load_command.cmdsize,
mach_header.sizeofcmds) << load_command_info;
return false;
}
for (size_t reader_index = 0;
reader_index < base::size(kLoadCommandReaders);
++reader_index) {
if (load_command.cmd != kLoadCommandReaders[reader_index].command) {
continue;
}
if (load_command.cmdsize < kLoadCommandReaders[reader_index].size) {
LOG(WARNING) << base::StringPrintf(
"load command cmdsize 0x%x insufficient for 0x%zx",
load_command.cmdsize,
kLoadCommandReaders[reader_index].size)
<< load_command_info;
return false;
}
if (kLoadCommandReaders[reader_index].singleton) {
if (singleton_indices[reader_index] != kInvalidSegmentIndex) {
LOG(WARNING) << "duplicate load command at "
<< singleton_indices[reader_index] << load_command_info;
return false;
}
singleton_indices[reader_index] = load_command_index;
}
if (!((this)->*(kLoadCommandReaders[reader_index].function))(
load_command_address, load_command_info)) {
return false;
}
break;
}
offset += load_command.cmdsize;
}
// Now that the slide is known, push it into the segments.
for (const auto& segment : segments_) {
segment->SetSlide(slide_);
// This was already checked for the unslid values while the segments were
// read, but now its possible to check the slid values too. The individual
// sections dont need to be checked because they were verified to be
// contained within their respective segments when the segments were read.
mach_vm_address_t slid_segment_address = segment->Address();
mach_vm_size_t slid_segment_size = segment->Size();
CheckedMachAddressRange slid_segment_range(
process_reader_->Is64Bit(), slid_segment_address, slid_segment_size);
if (!slid_segment_range.IsValid()) {
LOG(WARNING) << base::StringPrintf(
"invalid slid segment range 0x%llx + 0x%llx, "
"segment ",
slid_segment_address,
slid_segment_size) << segment->Name() << module_info_;
return false;
}
}
if (!segment_map_.count(SEG_TEXT)) {
// The __TEXT segment is required. Even a module with no executable code
// will have a __TEXT segment encompassing the Mach-O header and load
// commands. Without a __TEXT segment, |size_| will not have been computed.
LOG(WARNING) << "no " SEG_TEXT " segment" << module_info_;
return false;
}
if (mach_header.filetype == MH_DYLIB && !id_dylib_command_) {
// This doesnt render a module unusable, its just weird and worth noting.
LOG(INFO) << "no LC_ID_DYLIB" << module_info_;
}
INITIALIZATION_STATE_SET_VALID(initialized_);
return true;
}
const MachOImageSegmentReader* MachOImageReader::GetSegmentByName(
const std::string& segment_name) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
const auto& iterator = segment_map_.find(segment_name);
if (iterator == segment_map_.end()) {
return nullptr;
}
return segments_[iterator->second].get();
}
const process_types::section* MachOImageReader::GetSectionByName(
const std::string& segment_name,
const std::string& section_name,
mach_vm_address_t* address) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
const MachOImageSegmentReader* segment = GetSegmentByName(segment_name);
if (!segment) {
return nullptr;
}
return segment->GetSectionByName(section_name, address);
}
const process_types::section* MachOImageReader::GetSectionAtIndex(
size_t index,
const MachOImageSegmentReader** containing_segment,
mach_vm_address_t* address) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
static_assert(NO_SECT == 0, "NO_SECT must be zero");
if (index == NO_SECT) {
LOG(WARNING) << "section index " << index << " out of range";
return nullptr;
}
// Switch to a more comfortable 0-based index.
size_t local_index = index - 1;
for (const auto& segment : segments_) {
size_t nsects = segment->nsects();
if (local_index < nsects) {
const process_types::section* section =
segment->GetSectionAtIndex(local_index, address);
if (containing_segment) {
*containing_segment = segment.get();
}
return section;
}
local_index -= nsects;
}
LOG(WARNING) << "section index " << index << " out of range";
return nullptr;
}
bool MachOImageReader::LookUpExternalDefinedSymbol(
const std::string& name,
mach_vm_address_t* value) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
if (symbol_table_initialized_.is_uninitialized()) {
InitializeSymbolTable();
}
if (!symbol_table_initialized_.is_valid() || !symbol_table_) {
return false;
}
const MachOImageSymbolTableReader::SymbolInformation* symbol_info =
symbol_table_->LookUpExternalDefinedSymbol(name);
if (!symbol_info) {
return false;
}
if (symbol_info->section == NO_SECT) {
// This is an absolute (N_ABS) symbol, which requires no further validation
// or processing.
*value = symbol_info->value;
return true;
}
// This is a symbol defined in a particular section, so make sure that its
// valid for that section and fix it up for any “slide” as needed.
mach_vm_address_t section_address;
const MachOImageSegmentReader* segment;
const process_types::section* section =
GetSectionAtIndex(symbol_info->section, &segment, &section_address);
if (!section) {
return false;
}
mach_vm_address_t slid_value =
symbol_info->value + (segment->SegmentSlides() ? slide_ : 0);
// The __mh_execute_header (_MH_EXECUTE_SYM) symbol is weird. In
// position-independent executables, it shows up in the symbol table as a
// symbol in section 1, although its not really in that section. It points to
// the mach_header[_64], which is the beginning of the __TEXT segment, and the
// __text section normally begins after the load commands in the __TEXT
// segment. The range check below will fail for this symbol, because its not
// really in the section it claims to be in. See Xcode 5.1
// ld64-236.3/src/ld/OutputFile.cpp ld::tool::OutputFile::buildSymbolTable().
// There, ld takes symbols that refer to anything in the mach_header[_64] and
// marks them as being in section 1. Here, section 1 is treated in this same
// special way as long as its in the __TEXT segment that begins at the start
// of the image, which is normally the case, and as long as the symbols value
// is the base of the image.
//
// This only happens for PIE executables, because __mh_execute_header needs
// to slide. In non-PIE executables, __mh_execute_header is an absolute
// symbol.
CheckedMachAddressRange section_range(
process_reader_->Is64Bit(), section_address, section->size);
if (!section_range.ContainsValue(slid_value) &&
!(symbol_info->section == 1 && segment->Name() == SEG_TEXT &&
slid_value == Address())) {
std::string section_name_full =
MachOImageSegmentReader::SegmentAndSectionNameString(section->segname,
section->sectname);
LOG(WARNING) << base::StringPrintf(
"symbol %s (0x%llx) outside of section %s (0x%llx + "
"0x%llx)",
name.c_str(),
slid_value,
section_name_full.c_str(),
section_address,
section->size) << module_info_;
return false;
}
*value = slid_value;
return true;
}
uint32_t MachOImageReader::DylibVersion() const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
DCHECK_EQ(FileType(), implicit_cast<uint32_t>(MH_DYLIB));
if (id_dylib_command_) {
return id_dylib_command_->dylib_current_version;
}
// In case this was a weird dylib without an LC_ID_DYLIB command.
return 0;
}
void MachOImageReader::UUID(crashpad::UUID* uuid) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
memcpy(uuid, &uuid_, sizeof(uuid_));
}
bool MachOImageReader::GetCrashpadInfo(
process_types::CrashpadInfo* crashpad_info) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
mach_vm_address_t crashpad_info_address;
const process_types::section* crashpad_info_section =
GetSectionByName(SEG_DATA, "crashpad_info", &crashpad_info_address);
if (!crashpad_info_section) {
return false;
}
if (crashpad_info_section->size <
crashpad_info->MinimumSize(process_reader_)) {
LOG(WARNING) << "small crashpad info section size "
<< crashpad_info_section->size << module_info_;
return false;
}
// This Read() will zero out anything beyond the structures declared size.
if (!crashpad_info->Read(process_reader_, crashpad_info_address)) {
LOG(WARNING) << "could not read crashpad info" << module_info_;
return false;
}
if (crashpad_info->signature != CrashpadInfo::kSignature ||
crashpad_info->version != 1) {
LOG(WARNING) << base::StringPrintf(
"unexpected crashpad info signature 0x%x, version %u%s",
crashpad_info->signature,
crashpad_info->version,
module_info_.c_str());
return false;
}
// Dont require strict equality, to leave wiggle room for sloppy linkers.
if (crashpad_info->size > crashpad_info_section->size) {
LOG(WARNING) << "crashpad info struct size " << crashpad_info->size
<< " large for section size " << crashpad_info_section->size
<< module_info_;
return false;
}
if (crashpad_info->size > crashpad_info->ExpectedSize(process_reader_)) {
// This isnt strictly a problem, because unknown fields will simply be
// ignored, but it may be of diagnostic interest.
LOG(INFO) << "large crashpad info size " << crashpad_info->size
<< module_info_;
}
return true;
}
template <typename T>
bool MachOImageReader::ReadLoadCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info,
uint32_t expected_load_command_id,
T* load_command) {
if (!load_command->Read(process_reader_, load_command_address)) {
LOG(WARNING) << "could not read load command" << load_command_info;
return false;
}
DCHECK_GE(load_command->cmdsize, load_command->Size());
DCHECK_EQ(load_command->cmd, expected_load_command_id);
return true;
}
bool MachOImageReader::ReadSegmentCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
size_t segment_index = segments_.size();
segments_.push_back(std::make_unique<MachOImageSegmentReader>());
MachOImageSegmentReader* segment = segments_.back().get();
if (!segment->Initialize(process_reader_,
load_command_address,
load_command_info,
module_name_,
file_type_)) {
segments_.pop_back();
return false;
}
// At this point, the segment itself is considered valid, but if one of the
// next checks fails, it will render the module invalid. If any of the next
// checks fail, this method should return false, but it doesnt need to bother
// removing the segment from segments_. The segment will be properly released
// when the image is destroyed, and the image wont be usable because
// initialization wont have completed. Most importantly, leaving the segment
// in segments_ means that no other structures (such as perhaps segment_map_)
// become inconsistent or require cleanup.
const std::string segment_name = segment->Name();
const auto insert_result =
segment_map_.insert(std::make_pair(segment_name, segment_index));
if (!insert_result.second) {
LOG(WARNING) << base::StringPrintf("duplicate %s segment at %zu and %zu",
segment_name.c_str(),
insert_result.first->second,
segment_index) << load_command_info;
return false;
}
if (segment_name == SEG_TEXT) {
mac: Accept modules in 10.13’s dyld shared cache In 10.13, modules loaded from the dyld shared cache appear with __TEXT segments that have a nonzero “fileoff” (file offset). Previously, the fileoff was always 0. Previously, the fileoff for segments in the dyld shared cache was the actual offset into the shared cache (not 0), but special consideration was given to __TEXT segments which were forced to 0. See 10.12.4 dyld-433.5/interlinked-dylibs/OptimizerLinkedit.cpp LinkeditOptimizer<>::updateLoadCommands(). Note the comment there where the __TEXT segment’s apparent fileoff is set to 0: // HACK until lldb fixed in: <rdar://problem/20357466> // DynamicLoaderMacOSXDYLD fixes for Monarch dyld shared cache Refer also to the lldb commit that references the above, http://llvm.org/viewvc/llvm-project?view=revision&revision=233714. Evidently, update_dyld_shared_cache has been revised to no longer apply this hack in 10.13. Crashpad’s sanity check for __TEXT segments having a fileoff of 0 is no longer valid, and causes it to reject modules loaded from the dyld shared cache. Since this was just a sanity check, remove it entirely. This caused module information for modules loaded from the dyld shared cache to be missing from minidumps produced on 10.13, which in turn prevented symbolization in frames belonging to most system libraries. For reasons not yet understood, I don’t see this problem in Chrome on 10.13db1 17A264c on a test virtual machine (HFS+ filesystem), although I do see it on actual hardware (APFS filesystem), and I do see it in Crashpad’s tests and reduced testcases on both as well. Bug: crashpad:185, crashpad:189 Test: crashpad_snapshot_test MachOImageReader.Self_DyldImages:ProcessReader.SelfModules:ProcessReader.ChildModules:ProcessTypes.DyldImagesSelf Change-Id: I8b0a22c55c33ce920804a879f6fab67272f3556e Reviewed-on: https://chromium-review.googlesource.com/535576 Reviewed-by: Robert Sesek <rsesek@chromium.org>
2017-06-14 15:01:53 -04:00
mach_vm_size_t vmsize = segment->vmsize();
if (vmsize == 0) {
LOG(WARNING) << "zero-sized " SEG_TEXT " segment" << load_command_info;
return false;
}
size_ = vmsize;
// The slide is computed as the difference between the __TEXT segments
// preferred and actual load addresses. This is the same way that dyld
// computes slide. See 10.9.2 dyld-239.4/src/dyldInitialization.cpp
// slideOfMainExecutable().
slide_ = address_ - segment->vmaddr();
}
return true;
}
bool MachOImageReader::ReadSymTabCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info) {
symtab_command_.reset(new process_types::symtab_command());
return ReadLoadCommand(load_command_address,
load_command_info,
LC_SYMTAB,
symtab_command_.get());
}
bool MachOImageReader::ReadDySymTabCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
dysymtab_command_.reset(new process_types::dysymtab_command());
return ReadLoadCommand(load_command_address,
load_command_info,
LC_DYSYMTAB,
dysymtab_command_.get());
}
bool MachOImageReader::ReadIdDylibCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
if (file_type_ != MH_DYLIB) {
LOG(WARNING) << base::StringPrintf(
"LC_ID_DYLIB inappropriate in non-dylib file type 0x%x",
file_type_) << load_command_info;
return false;
}
DCHECK(!id_dylib_command_);
id_dylib_command_.reset(new process_types::dylib_command());
return ReadLoadCommand(load_command_address,
load_command_info,
LC_ID_DYLIB,
id_dylib_command_.get());
}
bool MachOImageReader::ReadDylinkerCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
if (file_type_ != MH_EXECUTE && file_type_ != MH_DYLINKER) {
LOG(WARNING) << base::StringPrintf(
"LC_LOAD_DYLINKER/LC_ID_DYLINKER inappropriate in file "
"type 0x%x",
file_type_) << load_command_info;
return false;
}
const uint32_t kExpectedCommand =
file_type_ == MH_DYLINKER ? LC_ID_DYLINKER : LC_LOAD_DYLINKER;
process_types::dylinker_command dylinker_command;
if (!ReadLoadCommand(load_command_address,
load_command_info,
kExpectedCommand,
&dylinker_command)) {
return false;
}
if (!process_reader_->Memory()->ReadCStringSizeLimited(
load_command_address + dylinker_command.name,
dylinker_command.cmdsize - dylinker_command.name,
&dylinker_name_)) {
LOG(WARNING) << "could not read dylinker_command name" << load_command_info;
return false;
}
return true;
}
bool MachOImageReader::ReadUUIDCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info) {
process_types::uuid_command uuid_command;
if (!ReadLoadCommand(
load_command_address, load_command_info, LC_UUID, &uuid_command)) {
return false;
}
uuid_.InitializeFromBytes(uuid_command.uuid);
return true;
}
bool MachOImageReader::ReadSourceVersionCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
process_types::source_version_command source_version_command;
if (!ReadLoadCommand(load_command_address,
load_command_info,
LC_SOURCE_VERSION,
&source_version_command)) {
return false;
}
source_version_ = source_version_command.version;
return true;
}
bool MachOImageReader::ReadUnexpectedCommand(
mach_vm_address_t load_command_address,
const std::string& load_command_info) {
LOG(WARNING) << "unexpected load command" << load_command_info;
return false;
}
void MachOImageReader::InitializeSymbolTable() const {
DCHECK(symbol_table_initialized_.is_uninitialized());
symbol_table_initialized_.set_invalid();
if (!symtab_command_) {
// Its technically valid for there to be no LC_SYMTAB, and in that case,
// any symbol lookups should fail. Mark the symbol table as valid, and
// LookUpExternalDefinedSymbol() will understand what it means when this is
// valid but symbol_table_ is not present.
symbol_table_initialized_.set_valid();
return;
}
// Find the __LINKEDIT segment. Technically, the symbol table can be in any
// mapped segment, but by convention, its in the one named __LINKEDIT.
const MachOImageSegmentReader* linkedit_segment =
GetSegmentByName(SEG_LINKEDIT);
if (!linkedit_segment) {
LOG(WARNING) << "no " SEG_LINKEDIT " segment";
return;
}
symbol_table_.reset(new MachOImageSymbolTableReader());
if (!symbol_table_->Initialize(process_reader_,
symtab_command_.get(),
dysymtab_command_.get(),
linkedit_segment,
module_info_)) {
symbol_table_.reset();
return;
}
symbol_table_initialized_.set_valid();
}
} // namespace crashpad