Add MachOImageSymbolTableReader and hook it up to MachOImageReader.

TEST=util_test MachOImageReader.*
R=rsesek@chromium.org

Review URL: https://codereview.chromium.org/539263003
This commit is contained in:
Mark Mentovai 2014-09-05 16:53:18 -04:00
parent 3d4eeae864
commit 2cae118b60
8 changed files with 960 additions and 135 deletions

View File

@ -25,6 +25,7 @@
#include "base/strings/stringprintf.h"
#include "util/mac/checked_mach_address_range.h"
#include "util/mac/mach_o_image_segment_reader.h"
#include "util/mac/mach_o_image_symbol_table_reader.h"
#include "util/mac/process_reader.h"
namespace {
@ -47,10 +48,12 @@ MachOImageReader::MachOImageReader()
source_version_(0),
symtab_command_(),
dysymtab_command_(),
symbol_table_(),
id_dylib_command_(),
process_reader_(NULL),
file_type_(0),
initialized_() {
initialized_(),
symbol_table_initialized_() {
}
MachOImageReader::~MachOImageReader() {
@ -224,20 +227,18 @@ bool MachOImageReader::Initialize(ProcessReader* process_reader,
}
if (load_command.cmdsize < kLoadCommandReaders[reader_index].size) {
LOG(WARNING)
<< base::StringPrintf(
"load command cmdsize 0x%x insufficient for 0x%zx",
load_command.cmdsize,
kLoadCommandReaders[reader_index].size)
<< load_command_info;
LOG(WARNING) << base::StringPrintf(
"load command cmdsize 0x%x insufficient for 0x%zx",
load_command.cmdsize,
kLoadCommandReaders[reader_index].size)
<< load_command_info;
return false;
}
if (kLoadCommandReaders[reader_index].singleton) {
if (singleton_indices[reader_index] != kInvalidSegmentIndex) {
LOG(WARNING) << "duplicate load command at "
<< singleton_indices[reader_index]
<< load_command_info;
<< singleton_indices[reader_index] << load_command_info;
return false;
}
@ -255,20 +256,16 @@ bool MachOImageReader::Initialize(ProcessReader* process_reader,
offset += load_command.cmdsize;
}
// This was already checked for the unslid values while the segments were
// read, but now that the slide is known, check the slid values too. The
// individual sections dont need to be checked because they were verified to
// be contained within their respective segments when the segments were read.
for (const MachOImageSegmentReader* segment : segments_) {
mach_vm_address_t slid_segment_address = segment->vmaddr();
mach_vm_size_t slid_segment_size = segment->vmsize();
if (segment->SegmentSlides()) {
slid_segment_address += slide_;
} else {
// The non-sliding __PAGEZERO segment extends instead of slides. See
// MachOImageSegmentReader::SegmentSlides().
slid_segment_size += slide_;
}
// Now that the slide is known, push it into the segments.
for (MachOImageSegmentReader* segment : segments_) {
segment->SetSlide(slide_);
// This was already checked for the unslid values while the segments were
// read, but now its possible to check the slid values too. The individual
// sections dont need to be checked because they were verified to be
// contained within their respective segments when the segments were read.
mach_vm_address_t slid_segment_address = segment->Address();
mach_vm_size_t slid_segment_size = segment->Size();
CheckedMachAddressRange slid_segment_range(
process_reader_, slid_segment_address, slid_segment_size);
if (!slid_segment_range.IsValid()) {
@ -299,9 +296,7 @@ bool MachOImageReader::Initialize(ProcessReader* process_reader,
}
const MachOImageSegmentReader* MachOImageReader::GetSegmentByName(
const std::string& segment_name,
mach_vm_address_t* address,
mach_vm_size_t* size) const {
const std::string& segment_name) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
const auto& iterator = segment_map_.find(segment_name);
@ -310,15 +305,6 @@ const MachOImageSegmentReader* MachOImageReader::GetSegmentByName(
}
const MachOImageSegmentReader* segment = segments_[iterator->second];
if (address) {
*address = segment->vmaddr() + (segment->SegmentSlides() ? slide_ : 0);
}
if (size) {
// The non-sliding __PAGEZERO segment extends instead of slides. See
// MachOImageSegmentReader::SegmentSlides().
*size = segment->vmsize() + (segment->SegmentSlides() ? 0 : slide_);
}
return segment;
}
@ -328,27 +314,17 @@ const process_types::section* MachOImageReader::GetSectionByName(
mach_vm_address_t* address) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
const MachOImageSegmentReader* segment =
GetSegmentByName(segment_name, NULL, NULL);
const MachOImageSegmentReader* segment = GetSegmentByName(segment_name);
if (!segment) {
return NULL;
}
const process_types::section* section =
segment->GetSectionByName(section_name);
if (!section) {
return NULL;
}
if (address) {
*address = section->addr + (segment->SegmentSlides() ? slide_ : 0);
}
return section;
return segment->GetSectionByName(section_name, address);
}
const process_types::section* MachOImageReader::GetSectionAtIndex(
size_t index,
const MachOImageSegmentReader** containing_segment,
mach_vm_address_t* address) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
@ -365,10 +341,10 @@ const process_types::section* MachOImageReader::GetSectionAtIndex(
size_t nsects = segment->nsects();
if (local_index < nsects) {
const process_types::section* section =
segment->GetSectionAtIndex(local_index);
segment->GetSectionAtIndex(local_index, address);
if (address) {
*address = section->addr + (segment->SegmentSlides() ? slide_ : 0);
if (containing_segment) {
*containing_segment = segment;
}
return section;
@ -381,6 +357,86 @@ const process_types::section* MachOImageReader::GetSectionAtIndex(
return NULL;
}
bool MachOImageReader::LookUpExternalDefinedSymbol(
const std::string& name,
mach_vm_address_t* value) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
if (symbol_table_initialized_.is_uninitialized()) {
InitializeSymbolTable();
}
if (!symbol_table_initialized_.is_valid() || !symbol_table_) {
return false;
}
const MachOImageSymbolTableReader::SymbolInformation* symbol_info =
symbol_table_->LookUpExternalDefinedSymbol(name);
if (!symbol_info) {
return false;
}
if (symbol_info->section == NO_SECT) {
// This is an absolute (N_ABS) symbol, which requires no further validation
// or processing.
*value = symbol_info->value;
return true;
}
// This is a symbol defined in a particular section, so make sure that its
// valid for that section and fix it up for any “slide” as needed.
mach_vm_address_t section_address;
const MachOImageSegmentReader* segment;
const process_types::section* section =
GetSectionAtIndex(symbol_info->section, &segment, &section_address);
if (!section) {
return false;
}
mach_vm_address_t slid_value =
symbol_info->value + (segment->SegmentSlides() ? slide_ : 0);
// The __mh_execute_header (_MH_EXECUTE_SYM) symbol is weird. In
// position-independent executables, it shows up in the symbol table as a
// symbol in section 1, although its not really in that section. It points to
// the mach_header[_64], which is the beginning of the __TEXT segment, and the
// __text section normally begins after the load commands in the __TEXT
// segment. The range check below will fail for this symbol, because its not
// really in the section it claims to be in. See Xcode 5.1
// ld64-236.3/src/ld/OutputFile.cpp ld::tool::OutputFile::buildSymbolTable().
// There, ld takes symbols that refer to anything in the mach_header[_64] and
// marks them as being in section 1. Here, section 1 is treated in this same
// special way as long as its in the __TEXT segment that begins at the start
// of the image, which is normally the case, and as long as the symbols value
// is the base of the image.
//
// This only happens for PIE executables, because __mh_execute_header needs
// to slide. In non-PIE executables, __mh_execute_header is an absolute
// symbol.
CheckedMachAddressRange section_range(
process_reader_, section_address, section->size);
if (!section_range.ContainsValue(slid_value) &&
!(symbol_info->section == 1 && segment->Name() == SEG_TEXT &&
slid_value == Address())) {
std::string section_name_full =
MachOImageSegmentReader::SegmentAndSectionNameString(section->segname,
section->sectname);
LOG(WARNING) << base::StringPrintf(
"symbol %s (0x%llx) outside of section %s (0x%llx + "
"0x%llx)",
name.c_str(),
slid_value,
section_name_full.c_str(),
section_address,
section->size) << module_info_;
return false;
}
*value = slid_value;
return true;
}
uint32_t MachOImageReader::DylibVersion() const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
DCHECK_EQ(FileType(), static_cast<uint32_t>(MH_DYLIB));
@ -577,4 +633,39 @@ bool MachOImageReader::ReadUnexpectedCommand(
return false;
}
void MachOImageReader::InitializeSymbolTable() const {
DCHECK(symbol_table_initialized_.is_uninitialized());
symbol_table_initialized_.set_invalid();
if (!symtab_command_) {
// Its technically valid for there to be no LC_SYMTAB, and in that case,
// any symbol lookups should fail. Mark the symbol table as valid, and
// LookUpExternalDefinedSymbol() will understand what it means when this is
// valid but symbol_table_ is not present.
symbol_table_initialized_.set_valid();
return;
}
// Find the __LINKEDIT segment. Technically, the symbol table can be in any
// mapped segment, but by convention, its in the one named __LINKEDIT.
const MachOImageSegmentReader* linkedit_segment =
GetSegmentByName(SEG_LINKEDIT);
if (!linkedit_segment) {
LOG(WARNING) << "no " SEG_LINKEDIT " segment";
return;
}
symbol_table_.reset(new MachOImageSymbolTableReader());
if (!symbol_table_->Initialize(process_reader_,
symtab_command_.get(),
dysymtab_command_.get(),
linkedit_segment,
module_info_)) {
symbol_table_.reset();
return;
}
symbol_table_initialized_.set_valid();
}
} // namespace crashpad

View File

@ -31,6 +31,7 @@
namespace crashpad {
class MachOImageSegmentReader;
class MachOImageSymbolTableReader;
class ProcessReader;
//! \brief A reader for Mach-O images mapped into another process.
@ -106,30 +107,13 @@ class MachOImageReader {
//!
//! \param[in] segment_name The name of the segment to search for, for
//! example, `"__TEXT"`.
//! \param[out] address The actual address that the segment was loaded at in
//! memory, taking any “slide” into account if the segment did not load at
//! its preferred address as stored in the Mach-O image file. This
//! parameter can be `NULL`.
//! \param[out] size The actual size of the segment as loaded at in memory.
//! This value takes any expansion of the segment into account, which
//! occurs when a nonsliding segment in a sliding image loads at its
//! preferred address but grows by the value of the slide. This parameter
//! can be `NULL`.
//!
//! \return A pointer to the segment information if it was found, or `NULL` if
//! it was not found.
//!
//! \note The \a address parameter takes “slide” into account, and the \a size
//! parameter takes growth into account for non-sliding segments, so that
//! these parameters reflect the actual address and size of the segment as
//! loaded into a process address space. This is distinct from the
//! segments preferred load address and size, which may be obtained by
//! calling MachOImageSegmentReader::vmaddr() and
//! MachOImageSegmentReader::vmsize(), respectively.
//! it was not found. The caller does not take ownership; the lifetime of
//! the returned object is scoped to the lifetime of this MachOImageReader
//! object.
const MachOImageSegmentReader* GetSegmentByName(
const std::string& segment_name,
mach_vm_address_t* address,
mach_vm_size_t* size) const;
const std::string& segment_name) const;
//! \brief Obtain section information by segment and section name.
//!
@ -143,7 +127,9 @@ class MachOImageReader {
//! parameter can be `NULL`.
//!
//! \return A pointer to the section information if it was found, or `NULL` if
//! it was not found.
//! it was not found. The caller does not take ownership; the lifetime of
//! the returned object is scoped to the lifetime of this MachOImageReader
//! object.
//!
//! No parameter is provided for the sections size, because it can be
//! obtained from the returned process_types::section::size field.
@ -162,13 +148,19 @@ class MachOImageReader {
//! \param[in] index The index of the section to return, in the order that it
//! appears in the segment load commands. This is a 1-based index,
//! matching the section number values used for `nlist::n_sect`.
//! \param[out] containing_segment The segment that contains the section.
//! This parameter can be `NULL`. The caller does not take ownership;
//! the lifetime of the returned object is scoped to the lifetime of this
//! MachOImageReader object.
//! \param[out] address The actual address that the section was loaded at in
//! memory, taking any “slide” into account if the section did not load at
//! its preferred address as stored in the Mach-O image file. This
//! parameter can be `NULL`.
//!
//! \return A pointer to the section information. If \a index is out of range,
//! logs a warning and returns `NULL`.
//! logs a warning and returns `NULL`. The caller does not take ownership;
//! the lifetime of the returned object is scoped to the lifetime of this
//! MachOImageReader object.
//!
//! No parameter is provided for the sections size, because it can be
//! obtained from the returned process_types::section::size field.
@ -186,8 +178,52 @@ class MachOImageReader {
//! and handled non-fatally by reporting the error to the caller.
const process_types::section* GetSectionAtIndex(
size_t index,
const MachOImageSegmentReader** containing_segment,
mach_vm_address_t* address) const;
//! \brief Looks up a symbol in the images symbol table.
//!
//! This method is capable of locating external defined symbols. Specifically,
//! this method can look up symbols that have these charcteristics:
//! - `N_STAB` (debugging) and `N_PEXT` (private external) must not be set.
//! - `N_EXT` (external) must be set.
//! - The type must be `N_ABS` (absolute) or `N_SECT` (defined in section).
//!
//! `N_INDR` (indirect), `N_UNDF` (undefined), and `N_PBUD` (prebound
//! undefined) symbols cannot be located through this mechanism.
//!
//! \param[in] name The name of the symbol to look up, “mangled” or
//! “decorated” appropriately. For example, use `"_main"` to look up the
//! symbol for the C `main()` function, and use `"__Z4Funcv"` to look up
//! the symbol for the C++ `Func()` function. Contrary to `dlsym()`, the
//! leading underscore must not be stripped when using this interface.
//! \param[out] value If the lookup was successful, this will be set to the
//! value of the symbol, adjusted for any “slide” as needed. The value can
//! be used as an address in the remote process address space where the
//! pointee of the symbol exists in memory.
//!
//! \return `true` if the symbol lookup was successful and the symbol was
//! found. `false` otherwise, including error conditions (for which a
//! warning message will be logged), modules without symbol tables, and
//! symbol names not found in the symbol table.
//!
//! \note Symbol values returned via this interface are adjusted for “slide”
//! as appropriate, in contrast to the underlying implementation,
//! MachOImageSymbolTableReader::LookUpExternalDefinedSymbol().
//!
//! \warning Symbols that are resolved by running symbol resolvers
//! (`.symbol_resolver`) are not properly handled by this interface. The
//! address of the symbol resolver is returned because thats what shows
//! up in the symbol table, rather than the effective address of the
//! resolved symbol as used by dyld after running the resolver. The only
//! way to detect this situation would be to read the `LC_DYLD_INFO` or
//! `LC_DYLD_INFO_ONLY` load command if present and looking for the
//! `EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER` flag, but that would just be
//! able to detect symbols with a resolver, it would not be able to
//! resolve them from out-of-process, so its not currently done.
bool LookUpExternalDefinedSymbol(const std::string& name,
mach_vm_address_t* value) const;
//! \brief Returns a Mach-O dylib images current version.
//!
//! This information comes from the `dylib_current_version` field of a dylibs
@ -255,6 +291,19 @@ class MachOImageReader {
bool ReadUnexpectedCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info);
// Performs deferred initialization of the symbol table. Because a modules
// symbol table is often not needed, this is not handled in Initialize(), but
// is done lazily, on-demand as needed.
//
// symbol_table_initialized_ will be transitioned to the appropriate state. If
// initialization completes successfully, this will be the valid state.
// Otherwise, it will be left in the invalid state and a warning message will
// be logged.
//
// Note that if the object contains no symbol table, symbol_table_initialized_
// will be set to the valid state, but symbol_table_ will be NULL.
void InitializeSymbolTable() const;
PointerVector<MachOImageSegmentReader> segments_;
std::map<std::string, size_t> segment_map_;
std::string module_info_;
@ -266,11 +315,25 @@ class MachOImageReader {
uint64_t source_version_;
scoped_ptr<process_types::symtab_command> symtab_command_;
scoped_ptr<process_types::dysymtab_command> dysymtab_command_;
// symbol_table_ (and symbol_table_initialized_) are mutable in order to
// maintain LookUpExternalDefinedSymbol() as a const interface while allowing
// lazy initialization via InitializeSymbolTable(). This is logical
// const-ness, not physical const-ness.
mutable scoped_ptr<MachOImageSymbolTableReader> symbol_table_;
scoped_ptr<process_types::dylib_command> id_dylib_command_;
ProcessReader* process_reader_; // weak
uint32_t file_type_;
InitializationStateDcheck initialized_;
// symbol_table_initialized_ protects symbol_table_: symbol_table_ can only
// be used when symbol_table_initialized_ is valid, although
// symbol_table_initialized_ being valid doesnt imply that symbol_table_ is
// set. symbol_table_initialized_ will be valid without symbol_table_ being
// set in modules that have no symbol table.
mutable InitializationState symbol_table_initialized_;
DISALLOW_COPY_AND_ASSIGN(MachOImageReader);
};

View File

@ -15,9 +15,12 @@
#include "util/mac/mach_o_image_reader.h"
#include <dlfcn.h>
#include <mach-o/dyld.h>
#include <mach-o/dyld_images.h>
#include <mach-o/getsect.h>
#include <mach-o/ldsyms.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include <stdint.h>
#include "base/strings/stringprintf.h"
@ -29,6 +32,9 @@
#include "util/misc/uuid.h"
#include "util/test/mac/dyld.h"
// This file is responsible for testing MachOImageReader,
// MachOImageSegmentReader, and MachOImageSymbolTableReader.
namespace {
using namespace crashpad;
@ -41,12 +47,17 @@ const uint32_t kMachMagic = MH_MAGIC_64;
typedef segment_command_64 SegmentCommand;
const uint32_t kSegmentCommand = LC_SEGMENT_64;
typedef section_64 Section;
typedef nlist_64 Nlist;
#else
typedef mach_header MachHeader;
const uint32_t kMachMagic = MH_MAGIC;
typedef segment_command SegmentCommand;
const uint32_t kSegmentCommand = LC_SEGMENT;
typedef section Section;
// This needs to be called “struct nlist” because “nlist” without the struct
// refers to the nlist() function.
typedef struct nlist Nlist;
#endif
#if defined(ARCH_CPU_X86_64)
@ -96,8 +107,6 @@ void ExpectSection(const Section* expect_section,
void ExpectSegmentCommand(const SegmentCommand* expect_segment,
const MachHeader* expect_image,
const MachOImageSegmentReader* actual_segment,
mach_vm_address_t actual_segment_address,
mach_vm_size_t actual_segment_size,
const MachOImageReader* actual_image,
size_t* section_index) {
ASSERT_TRUE(expect_segment);
@ -113,7 +122,7 @@ void ExpectSegmentCommand(const SegmentCommand* expect_segment,
EXPECT_EQ(expect_segment->fileoff, actual_segment->fileoff());
if (actual_segment->SegmentSlides()) {
EXPECT_EQ(actual_segment_address,
EXPECT_EQ(actual_segment->Address(),
actual_segment->vmaddr() + actual_image->Slide());
unsigned long expect_segment_size;
@ -121,9 +130,9 @@ void ExpectSegmentCommand(const SegmentCommand* expect_segment,
expect_image, segment_name.c_str(), &expect_segment_size);
mach_vm_address_t expect_segment_address =
reinterpret_cast<mach_vm_address_t>(expect_segment_data);
EXPECT_EQ(expect_segment_address, actual_segment_address);
EXPECT_EQ(expect_segment_address, actual_segment->Address());
EXPECT_EQ(expect_segment_size, actual_segment->vmsize());
EXPECT_EQ(actual_segment->vmsize(), actual_segment_size);
EXPECT_EQ(actual_segment->vmsize(), actual_segment->Size());
} else {
// getsegmentdata() doesnt return appropriate data for the __PAGEZERO
// segment because getsegmentdata() always adjusts for slide, but the
@ -131,9 +140,9 @@ void ExpectSegmentCommand(const SegmentCommand* expect_segment,
// check for that segment according to the same rules that the kernel uses
// to identify __PAGEZERO. See 10.9.4 xnu-2422.110.17/bsd/kern/mach_loader.c
// load_segment().
EXPECT_EQ(actual_segment_address, actual_segment->vmaddr());
EXPECT_EQ(actual_segment->Address(), actual_segment->vmaddr());
EXPECT_EQ(actual_segment->vmsize() + actual_image->Slide(),
actual_segment_size);
actual_segment->Size());
}
ASSERT_EQ(expect_segment->nsects, actual_segment->nsects());
@ -149,7 +158,7 @@ void ExpectSegmentCommand(const SegmentCommand* expect_segment,
for (size_t index = 0; index < actual_segment->nsects(); ++index) {
const Section* expect_section = &expect_sections[index];
const process_types::section* actual_section =
actual_segment->GetSectionAtIndex(index);
actual_segment->GetSectionAtIndex(index, NULL);
ExpectSection(&expect_sections[index], actual_section);
if (testing::Test::HasFatalFailure()) {
return;
@ -159,7 +168,7 @@ void ExpectSegmentCommand(const SegmentCommand* expect_segment,
std::string section_name =
MachOImageSegmentReader::SectionNameString(expect_section->sectname);
const process_types::section* actual_section_by_name =
actual_segment->GetSectionByName(section_name);
actual_segment->GetSectionByName(section_name, NULL);
EXPECT_EQ(actual_section, actual_section_by_name);
// Make sure that the section is accessible by the parent MachOImageReaders
@ -188,15 +197,18 @@ void ExpectSegmentCommand(const SegmentCommand* expect_segment,
}
// Test the parent MachOImageReaders GetSectionAtIndex as well.
const MachOImageSegmentReader* containing_segment;
mach_vm_address_t actual_section_address_at_index;
const process_types::section* actual_section_from_image_at_index =
actual_image->GetSectionAtIndex(++(*section_index),
&containing_segment,
&actual_section_address_at_index);
EXPECT_EQ(actual_section, actual_section_from_image_at_index);
EXPECT_EQ(actual_segment, containing_segment);
EXPECT_EQ(actual_section_address, actual_section_address_at_index);
}
EXPECT_EQ(NULL, actual_segment->GetSectionByName("NoSuchSection"));
EXPECT_EQ(NULL, actual_segment->GetSectionByName("NoSuchSection", NULL));
}
// Walks through the load commands of |expect_image|, finding all of the
@ -216,6 +228,8 @@ void ExpectSegmentCommands(const MachHeader* expect_image,
ASSERT_TRUE(expect_image);
ASSERT_TRUE(actual_image);
// &expect_image[1] points right past the end of the mach_header[_64], to the
// start of the load commands.
const char* commands_base = reinterpret_cast<const char*>(&expect_image[1]);
uint32_t position = 0;
size_t section_index = 0;
@ -225,20 +239,16 @@ void ExpectSegmentCommands(const MachHeader* expect_image,
reinterpret_cast<const load_command*>(&commands_base[position]);
ASSERT_LE(position + command->cmdsize, expect_image->sizeofcmds);
if (command->cmd == kSegmentCommand) {
ASSERT_GE(command->cmdsize, sizeof(SegmentCommand));
const SegmentCommand* expect_segment =
reinterpret_cast<const SegmentCommand*>(command);
std::string segment_name =
MachOImageSegmentReader::SegmentNameString(expect_segment->segname);
mach_vm_address_t actual_segment_address;
mach_vm_size_t actual_segment_size;
const MachOImageSegmentReader* actual_segment =
actual_image->GetSegmentByName(
segment_name, &actual_segment_address, &actual_segment_size);
actual_image->GetSegmentByName(segment_name);
ExpectSegmentCommand(expect_segment,
expect_image,
actual_segment,
actual_segment_address,
actual_segment_size,
actual_image,
&section_index);
if (testing::Test::HasFatalFailure()) {
@ -252,19 +262,20 @@ void ExpectSegmentCommands(const MachHeader* expect_image,
if (test_section_index_bounds) {
// GetSectionAtIndex uses a 1-based index. Make sure that the range is
// correct.
EXPECT_EQ(NULL, actual_image->GetSectionAtIndex(0, NULL));
EXPECT_EQ(NULL, actual_image->GetSectionAtIndex(section_index + 1, NULL));
EXPECT_EQ(NULL, actual_image->GetSectionAtIndex(0, NULL, NULL));
EXPECT_EQ(NULL,
actual_image->GetSectionAtIndex(section_index + 1, NULL, NULL));
}
// Make sure that by-name lookups for names that dont exist work properly:
// they should return NULL.
EXPECT_FALSE(actual_image->GetSegmentByName("NoSuchSegment", NULL, NULL));
EXPECT_FALSE(actual_image->GetSegmentByName("NoSuchSegment"));
EXPECT_FALSE(
actual_image->GetSectionByName("NoSuchSegment", "NoSuchSection", NULL));
// Make sure that theres a __TEXT segment so that this can do a valid test of
// a section that doesnt exist within a segment that does.
EXPECT_TRUE(actual_image->GetSegmentByName(SEG_TEXT, NULL, NULL));
EXPECT_TRUE(actual_image->GetSegmentByName(SEG_TEXT));
EXPECT_FALSE(actual_image->GetSectionByName(SEG_TEXT, "NoSuchSection", NULL));
// Similarly, make sure that a section name that exists in one segment isnt
@ -280,16 +291,24 @@ void ExpectSegmentCommands(const MachHeader* expect_image,
EXPECT_FALSE(actual_image->GetSectionByName(SEG_LINKEDIT, SECT_TEXT, NULL));
}
// In some cases, the expected slide value for an image is unknown, because no
// reasonable API to return it is provided. When this happens, use kSlideUnknown
// to avoid checking the actual slide value against anything.
const mach_vm_size_t kSlideUnknown = std::numeric_limits<mach_vm_size_t>::max();
// Verifies that |expect_image| is a vaild Mach-O header for the current system
// by checking its |magic| and |cputype| fields. Then, verifies that the
// information in |actual_image| matches that in |expect_image|. The |filetype|
// field is examined, and actual_image->Address() is compared to
// |expect_image_address|. Various other attributes of |actual_image| are
// sanity-checked depending on the Mach-O file type. Finally,
// ExpectSegmentCommands() is called to verify all that all of the segments
// match; |test_section_index_bounds| is used as an argument to that function.
// field is examined, actual_image->Address() is compared to
// |expect_image_address|, and actual_image->Slide() is compared to
// |expect_image_slide|, unless |expect_image_slide| is kSlideUnknown. Various
// other attributes of |actual_image| are sanity-checked depending on the Mach-O
// file type. Finally, ExpectSegmentCommands() is called to verify all that all
// of the segments match; |test_section_index_bounds| is used as an argument to
// that function.
void ExpectMachImage(const MachHeader* expect_image,
mach_vm_address_t expect_image_address,
mach_vm_size_t expect_image_slide,
const MachOImageReader* actual_image,
bool test_section_index_bounds) {
ASSERT_TRUE(expect_image);
@ -300,15 +319,15 @@ void ExpectMachImage(const MachHeader* expect_image,
EXPECT_EQ(expect_image->filetype, actual_image->FileType());
EXPECT_EQ(expect_image_address, actual_image->Address());
if (expect_image_slide != kSlideUnknown) {
EXPECT_EQ(expect_image_slide, actual_image->Slide());
}
mach_vm_address_t actual_text_segment_address;
mach_vm_size_t actual_text_segment_size;
const MachOImageSegmentReader* actual_text_segment =
actual_image->GetSegmentByName(
SEG_TEXT, &actual_text_segment_address, &actual_text_segment_size);
actual_image->GetSegmentByName(SEG_TEXT);
ASSERT_TRUE(actual_text_segment);
EXPECT_EQ(expect_image_address, actual_text_segment_address);
EXPECT_EQ(actual_image->Size(), actual_text_segment_size);
EXPECT_EQ(expect_image_address, actual_text_segment->Address());
EXPECT_EQ(actual_image->Size(), actual_text_segment->Size());
EXPECT_EQ(expect_image_address - actual_text_segment->vmaddr(),
actual_image->Slide());
@ -329,6 +348,129 @@ void ExpectMachImage(const MachHeader* expect_image,
actual_image->UUID(&uuid);
ExpectSegmentCommands(expect_image, actual_image, test_section_index_bounds);
if (testing::Test::HasFatalFailure()) {
return;
}
}
// Verifies the symbol whose Nlist structure is |entry| and whose name is |name|
// matches the value of a symbol by the same name looked up in |actual_image|.
// MachOImageReader::LookUpExternalDefinedSymbol() is used for this purpose.
// Only external defined symbols are considered, other types of symbols are
// excluded because LookUpExternalDefinedSymbol() only deals with external
// defined symbols.
void ExpectSymbol(const Nlist* entry,
const char* name,
const MachOImageReader* actual_image) {
SCOPED_TRACE(name);
uint32_t entry_type = entry->n_type & N_TYPE;
if ((entry->n_type & N_STAB) == 0 && (entry->n_type & N_PEXT) == 0 &&
entry_type != N_UNDF && entry_type != N_PBUD &&
(entry->n_type & N_EXT) == 1) {
// Note that this catches more symbols than MachOImageSymbolTableReader
// does. This test looks for all external defined symbols, but the
// implementation excludes indirect (N_INDR) symbols. This is intentional,
// because indirect symbols are currently not seen in the wild, but if they
// begin to be used more widely, this test is expected to catch them so that
// a decision can be made regarding whether support ought to be implemented.
mach_vm_address_t actual_address;
ASSERT_TRUE(
actual_image->LookUpExternalDefinedSymbol(name, &actual_address));
// Since the nlist interface was used to read the symbol, use it to compute
// the symbol address too. This isnt perfect, and it should be possible in
// theory to use dlsym() to get the expected address of a symbol. In
// practice, dlsym() is difficult to use when only a MachHeader* is
// available as in this function, as opposed to a void* opaque handle. It is
// possible to get a void* handle by using dladdr() to find the file name
// corresponding to the MachHeader*, and using dlopen() again on that name,
// assuming it hasnt changed on disk since being loaded. However, even with
// that being done, dlsym() can only deal with symbols whose names begin
// with an underscore (and requires that the leading underscore be trimmed).
// dlsym() will also return different addresses for symbols that are
// resolved via symbol resolver.
mach_vm_address_t expect_address = entry->n_value;
if (entry_type == N_SECT) {
EXPECT_GE(entry->n_sect, 1u);
expect_address += actual_image->Slide();
} else {
EXPECT_EQ(NO_SECT, entry->n_sect);
}
EXPECT_EQ(expect_address, actual_address);
}
// Youd think that it might be a good idea to verify that if the conditions
// above werent met, that the symbol didnt show up in actual_images symbol
// table at all. Unfortunately, its possible for the same name to show up as
// both an external defined symbol and as something else, so its not possible
// to verify this reliably.
}
// Locates the symbol table in |expect_image| and verifies that all of the
// external defined symbols found there are also present and have the same
// values in |actual_image|. ExpectSymbol() is used to verify the actual symbol.
void ExpectSymbolTable(const MachHeader* expect_image,
const MachOImageReader* actual_image) {
// This intentionally consults only LC_SYMTAB and not LC_DYSYMTAB so that it
// can look at the larger set of all symbols. The actual implementation being
// tested is free to consult LC_DYSYMTAB, but thats considered an
// optimization. Its not necessary for the test, and its better for the test
// to expose bugs in that optimization rather than duplicate them.
const char* commands_base = reinterpret_cast<const char*>(&expect_image[1]);
uint32_t position = 0;
const symtab_command* symtab = NULL;
const SegmentCommand* linkedit = NULL;
for (uint32_t index = 0; index < expect_image->ncmds; ++index) {
ASSERT_LT(position, expect_image->sizeofcmds);
const load_command* command =
reinterpret_cast<const load_command*>(&commands_base[position]);
ASSERT_LE(position + command->cmdsize, expect_image->sizeofcmds);
if (command->cmd == LC_SYMTAB) {
ASSERT_FALSE(symtab);
ASSERT_EQ(sizeof(symtab_command), command->cmdsize);
symtab = reinterpret_cast<const symtab_command*>(command);
} else if (command->cmd == kSegmentCommand) {
ASSERT_GE(command->cmdsize, sizeof(SegmentCommand));
const SegmentCommand* segment =
reinterpret_cast<const SegmentCommand*>(command);
std::string segment_name =
MachOImageSegmentReader::SegmentNameString(segment->segname);
if (segment_name == SEG_LINKEDIT) {
ASSERT_FALSE(linkedit);
linkedit = segment;
}
}
position += command->cmdsize;
}
if (symtab) {
ASSERT_TRUE(linkedit);
const char* linkedit_base =
reinterpret_cast<const char*>(linkedit->vmaddr + actual_image->Slide());
const Nlist* nlist = reinterpret_cast<const Nlist*>(
linkedit_base + symtab->symoff - linkedit->fileoff);
const char* strtab = linkedit_base + symtab->stroff - linkedit->fileoff;
for (uint32_t index = 0; index < symtab->nsyms; ++index) {
const Nlist* entry = nlist + index;
const char* name = strtab + entry->n_un.n_strx;
ExpectSymbol(entry, name, actual_image);
if (testing::Test::HasFatalFailure()) {
return;
}
}
}
mach_vm_address_t ignore;
EXPECT_FALSE(actual_image->LookUpExternalDefinedSymbol("", &ignore));
EXPECT_FALSE(
actual_image->LookUpExternalDefinedSymbol("NoSuchSymbolName", &ignore));
EXPECT_FALSE(
actual_image->LookUpExternalDefinedSymbol("_NoSuchSymbolName", &ignore));
}
TEST(MachOImageReader, Self_MainExecutable) {
@ -336,46 +478,64 @@ TEST(MachOImageReader, Self_MainExecutable) {
ASSERT_TRUE(process_reader.Initialize(mach_task_self()));
const MachHeader* mh_execute_header = reinterpret_cast<MachHeader*>(
dlsym(RTLD_MAIN_ONLY, "_mh_execute_header"));
dlsym(RTLD_MAIN_ONLY, MH_EXECUTE_SYM));
ASSERT_NE(static_cast<void*>(NULL), mh_execute_header);
mach_vm_address_t mh_execute_header_address =
reinterpret_cast<mach_vm_address_t>(mh_execute_header);
MachOImageReader image_reader;
ASSERT_TRUE(image_reader.Initialize(
&process_reader, mh_execute_header_address, "mh_execute_header"));
&process_reader, mh_execute_header_address, "executable"));
EXPECT_EQ(static_cast<uint32_t>(MH_EXECUTE), image_reader.FileType());
ExpectMachImage(
mh_execute_header, mh_execute_header_address, &image_reader, true);
// The main executable has image index 0.
intptr_t image_slide = _dyld_get_image_vmaddr_slide(0);
ExpectMachImage(mh_execute_header,
mh_execute_header_address,
image_slide,
&image_reader,
true);
if (Test::HasFatalFailure()) {
return;
}
// This symbol, __mh_execute_header, is known to exist in all MH_EXECUTE
// Mach-O files.
mach_vm_address_t symbol_address;
ASSERT_TRUE(image_reader.LookUpExternalDefinedSymbol(_MH_EXECUTE_SYM,
&symbol_address));
EXPECT_EQ(mh_execute_header_address, symbol_address);
ExpectSymbolTable(mh_execute_header, &image_reader);
if (Test::HasFatalFailure()) {
return;
}
}
TEST(MachOImageReader, Self_DyldImages) {
ProcessReader process_reader;
ASSERT_TRUE(process_reader.Initialize(mach_task_self()));
const struct dyld_all_image_infos* dyld_image_infos =
_dyld_get_all_image_infos();
ASSERT_GE(dyld_image_infos->version, 1u);
ASSERT_TRUE(dyld_image_infos->infoArray);
uint32_t count = _dyld_image_count();
ASSERT_GE(count, 1u);
for (uint32_t index = 0; index < dyld_image_infos->infoArrayCount; ++index) {
const dyld_image_info* dyld_image = &dyld_image_infos->infoArray[index];
SCOPED_TRACE(base::StringPrintf(
"index %u, image %s", index, dyld_image->imageFilePath));
for (uint32_t index = 0; index < count; ++index) {
const char* image_name = _dyld_get_image_name(index);
SCOPED_TRACE(base::StringPrintf("index %u, image %s", index, image_name));
// dyld_image_info::imageLoadAddress is poorly-declared: its declared as
// _dyld_get_image_header() is poorly-declared: its declared as returning
// const mach_header* in both 32-bit and 64-bit environments, but in the
// 64-bit environment, it should be const mach_header_64*.
const MachHeader* mach_header =
reinterpret_cast<const MachHeader*>(dyld_image->imageLoadAddress);
reinterpret_cast<const MachHeader*>(_dyld_get_image_header(index));
mach_vm_address_t image_address =
reinterpret_cast<mach_vm_address_t>(mach_header);
MachOImageReader image_reader;
ASSERT_TRUE(image_reader.Initialize(
&process_reader, image_address, dyld_image->imageFilePath));
&process_reader, image_address, image_name));
uint32_t file_type = image_reader.FileType();
if (index == 0) {
@ -384,7 +544,14 @@ TEST(MachOImageReader, Self_DyldImages) {
EXPECT_TRUE(file_type == MH_DYLIB || file_type == MH_BUNDLE);
}
ExpectMachImage(mach_header, image_address, &image_reader, false);
intptr_t image_slide = _dyld_get_image_vmaddr_slide(index);
ExpectMachImage(
mach_header, image_address, image_slide, &image_reader, false);
if (Test::HasFatalFailure()) {
return;
}
ExpectSymbolTable(mach_header, &image_reader);
if (Test::HasFatalFailure()) {
return;
}
@ -392,6 +559,11 @@ TEST(MachOImageReader, Self_DyldImages) {
// Now that all of the modules have been verified, make sure that dyld itself
// can be read properly too.
const struct dyld_all_image_infos* dyld_image_infos =
_dyld_get_all_image_infos();
ASSERT_GE(dyld_image_infos->version, 1u);
EXPECT_EQ(count, dyld_image_infos->infoArrayCount);
if (dyld_image_infos->version >= 2) {
SCOPED_TRACE("dyld");
@ -407,7 +579,14 @@ TEST(MachOImageReader, Self_DyldImages) {
EXPECT_EQ(static_cast<uint32_t>(MH_DYLINKER), image_reader.FileType());
ExpectMachImage(mach_header, image_address, &image_reader, false);
// Theres no good API to get dylds slide, so dont bother checking it.
ExpectMachImage(
mach_header, image_address, kSlideUnknown, &image_reader, false);
if (Test::HasFatalFailure()) {
return;
}
ExpectSymbolTable(mach_header, &image_reader);
if (Test::HasFatalFailure()) {
return;
}
@ -434,7 +613,11 @@ TEST(MachOImageReader, Self_DyldImages) {
ASSERT_TRUE(
image_reader.Initialize(&process_reader, image_address, "uuid"));
ExpectMachImage(mach_header, image_address, &image_reader, false);
// Theres no good way to get the images slide here, although the image
// should have already been checked along with its slide above, in the
// loop through all images.
ExpectMachImage(
mach_header, image_address, kSlideUnknown, &image_reader, false);
UUID expected_uuid;
expected_uuid.InitializeFromBytes(dyld_image->imageUUID);

View File

@ -33,7 +33,12 @@ std::string SizeLimitedCString(const char* c_string, size_t max_length) {
} // namespace
MachOImageSegmentReader::MachOImageSegmentReader()
: segment_command_(), sections_(), section_map_(), initialized_() {
: segment_command_(),
sections_(),
section_map_(),
slide_(0),
initialized_(),
initialized_slide_() {
}
MachOImageSegmentReader::~MachOImageSegmentReader() {
@ -175,8 +180,21 @@ std::string MachOImageSegmentReader::Name() const {
return NameInternal();
}
mach_vm_address_t MachOImageSegmentReader::Address() const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
INITIALIZATION_STATE_DCHECK_VALID(initialized_slide_);
return vmaddr() + (SegmentSlides() ? slide_ : 0);
}
mach_vm_size_t MachOImageSegmentReader::Size() const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
INITIALIZATION_STATE_DCHECK_VALID(initialized_slide_);
return vmsize() + (SegmentSlides() ? 0 : slide_);
}
const process_types::section* MachOImageSegmentReader::GetSectionByName(
const std::string& section_name) const {
const std::string& section_name,
mach_vm_address_t* address) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
const auto& iterator = section_map_.find(section_name);
@ -184,14 +202,23 @@ const process_types::section* MachOImageSegmentReader::GetSectionByName(
return NULL;
}
return &sections_[iterator->second];
return GetSectionAtIndex(iterator->second, address);
}
const process_types::section* MachOImageSegmentReader::GetSectionAtIndex(
size_t index) const {
size_t index,
mach_vm_address_t* address) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
CHECK_LT(index, sections_.size());
return &sections_[index];
const process_types::section* section = &sections_[index];
if (address) {
INITIALIZATION_STATE_DCHECK_VALID(initialized_slide_);
*address = section->addr + (SegmentSlides() ? slide_ : 0);
}
return section;
}
bool MachOImageSegmentReader::SegmentSlides() const {
@ -238,4 +265,11 @@ std::string MachOImageSegmentReader::NameInternal() const {
return SegmentNameString(segment_command_.segname);
}
void MachOImageSegmentReader::SetSlide(mach_vm_size_t slide) {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
INITIALIZATION_STATE_SET_INITIALIZING(initialized_slide_);
slide_ = slide;
INITIALIZATION_STATE_SET_VALID(initialized_slide_);
}
} // namespace crashpad

View File

@ -29,8 +29,6 @@
namespace crashpad {
class ProcessReader;
//! \brief A reader for LC_SEGMENT or LC_SEGMENT_64 load commands in Mach-O
//! images mapped into another process.
//!
@ -63,6 +61,17 @@ class MachOImageSegmentReader {
mach_vm_address_t load_command_address,
const std::string& load_command_info);
//! \brief Sets the images slide value.
//!
//! This method must only be called once on an object, after Initialize() is
//! called successfully. It must be called before Address(), Size(),
//! GetSectionByName(), or GetSectionAtIndex() can be called.
//!
//! This method is provided because slide is a property of the image that
//! cannot be determined until at least some segments have been read. As such,
//! it is not necessarily known at the time that Initialize() is called.
void SetSlide(mach_vm_size_t slide);
//! \brief Returns the segments name.
//!
//! The segments name is taken from the load commands `segname` field.
@ -71,17 +80,36 @@ class MachOImageSegmentReader {
//! `<mach-o/loader.h>`.
std::string Name() const;
//! \return The segments actual load address in memory, adjusted for any
//! “slide”.
//!
//! \note For the segments preferred load address, not adjusted for slide,
//! use vmaddr().
mach_vm_address_t Address() const;
//! \return The segments actual size address in memory, adjusted for any
//! growth in the case of a nonsliding segment.
//!
//! \note For the segments preferred size, not adjusted for growth, use
//! vmsize().
mach_vm_address_t Size() const;
//! \brief The segments preferred load address.
//!
//! \return The segments preferred load address as stored in the Mach-O file.
//!
//! \note This value is not adjusted for any “slide” that may have occurred
//! when the image was loaded.
//! when the image was loaded. Use Address() for a value adjusted for
//! slide.
//!
//! \sa MachOImageReader::GetSegmentByName()
mach_vm_address_t vmaddr() const { return segment_command_.vmaddr; }
//! \brief Returns the segments size as mapped into memory.
//!
//! \note For non-sliding segments, this value is not adjusted for any growth
//! that may have occurred when the image was loaded. Use Size() for a
//! value adjusted for growth.
mach_vm_size_t vmsize() const { return segment_command_.vmsize; }
//! \brief Returns the file offset of the mapped segment in the file from
@ -110,9 +138,15 @@ class MachOImageSegmentReader {
//! \param[in] section_name The name of the section to search for, without the
//! leading segment name. For example, use `"__text"`, not
//! `"__TEXT,__text"` or `"__TEXT.__text"`.
//! \param[out] address The actual address that the section was loaded at in
//! memory, taking any “slide” into account if the section did not load at
//! its preferred address as stored in the Mach-O image file. This
//! parameter can be `NULL`.
//!
//! \return A pointer to the section information if it was found, or `NULL` if
//! it was not found.
//! it was not found. The caller does not take ownership; the lifetime of
//! the returned object is scoped to the lifetime of this
//! MachOImageSegmentReader object.
//!
//! \note The process_types::section::addr field gives the sections preferred
//! load address as stored in the Mach-O image file, and is not adjusted
@ -120,7 +154,8 @@ class MachOImageSegmentReader {
//!
//! \sa MachOImageReader::GetSectionByName()
const process_types::section* GetSectionByName(
const std::string& section_name) const;
const std::string& section_name,
mach_vm_address_t* address) const;
//! \brief Obtain section information by section index.
//!
@ -129,9 +164,15 @@ class MachOImageSegmentReader {
//! MachOImageReader::GetSectionAtIndex(), this is a 0-based index. This
//! parameter must be in the range of valid indices aas reported by
//! nsects().
//! \param[out] address The actual address that the section was loaded at in
//! memory, taking any “slide” into account if the section did not load at
//! its preferred address as stored in the Mach-O image file. This
//! parameter can be `NULL`.
//!
//! \return A pointer to the section information. If \a index is out of range,
//! execution is aborted.
//! execution is aborted. The caller does not take ownership; the
//! lifetime of the returned object is scoped to the lifetime of this
//! MachOImageSegmentReader object.
//!
//! \note The process_types::section::addr field gives the sections preferred
//! load address as stored in the Mach-O image file, and is not adjusted
@ -144,7 +185,9 @@ class MachOImageSegmentReader {
//! treated more harshly as a logic error, as opposed to a data error.
//!
//! \sa MachOImageReader::GetSectionAtIndex()
const process_types::section* GetSectionAtIndex(size_t index) const;
const process_types::section* GetSectionAtIndex(
size_t index,
mach_vm_address_t* address) const;
//! Returns whether the segment slides.
//!
@ -195,7 +238,14 @@ class MachOImageSegmentReader {
// Maps section names to indices into the sections_ vector.
std::map<std::string, size_t> section_map_;
// The images slide. Note that the segments slide may be 0 and not the value
// of the images slide if SegmentSlides() is false. In that case, the
// segment is extended instead of slid, so its size as loaded will be
// increased by this value.
mach_vm_size_t slide_;
InitializationStateDcheck initialized_;
InitializationStateDcheck initialized_slide_;
DISALLOW_COPY_AND_ASSIGN(MachOImageSegmentReader);
};

View File

@ -0,0 +1,267 @@
// Copyright 2014 The Crashpad Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "util/mac/mach_o_image_symbol_table_reader.h"
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include "base/memory/scoped_ptr.h"
#include "base/strings/stringprintf.h"
#include "util/mac/checked_mach_address_range.h"
#include "util/mach/task_memory.h"
namespace crashpad {
namespace internal {
//! \brief The internal implementation for MachOImageSymbolTableReader.
//!
//! Initialization is broken into more than one function that needs to share
//! data, so member variables are used. However, much of this data is irrelevant
//! after initialization is completed, so rather than doing it in
//! MachOImageSymbolTableReader, its handled by this class, which is a “friend”
//! of MachOImageSymbolTableReader.
class MachOImageSymbolTableReaderInitializer {
public:
MachOImageSymbolTableReaderInitializer(
ProcessReader* process_reader,
const MachOImageSegmentReader* linkedit_segment,
const std::string& module_info)
: module_info_(module_info),
linkedit_range_(),
process_reader_(process_reader),
linkedit_segment_(linkedit_segment) {
linkedit_range_.SetRange(
process_reader_, linkedit_segment->Address(), linkedit_segment->Size());
DCHECK(linkedit_range_.IsValid());
}
~MachOImageSymbolTableReaderInitializer() {}
//! \brief Reads the symbol table from another process.
//!
//! \sa MachOImageSymbolTableReader::Initialize()
bool Initialize(const process_types::symtab_command* symtab_command,
const process_types::dysymtab_command* dysymtab_command,
MachOImageSymbolTableReader::SymbolInformationMap*
external_defined_symbols) {
mach_vm_address_t symtab_address =
AddressForLinkEditComponent(symtab_command->symoff);
uint32_t symbol_count = symtab_command->nsyms;
size_t nlist_size = process_types::nlist::ExpectedSize(process_reader_);
mach_vm_size_t symtab_size = symbol_count * nlist_size;
if (!IsInLinkEditSegment(symtab_address, symtab_size, "symtab")) {
return false;
}
// If a dysymtab is present, use it to filter the symtab for just the
// portion used for extdefsym. If no dysymtab is present, the entire symtab
// will need to be consulted.
uint32_t skip_count = 0;
if (dysymtab_command) {
if (dysymtab_command->iextdefsym >= symtab_command->nsyms ||
dysymtab_command->iextdefsym + dysymtab_command->nextdefsym >
symtab_command->nsyms) {
LOG(WARNING) << base::StringPrintf(
"dysymtab extdefsym %u + %u > symtab nsyms %u",
dysymtab_command->iextdefsym,
dysymtab_command->nextdefsym,
symtab_command->nsyms) << module_info_;
return false;
}
skip_count = dysymtab_command->iextdefsym;
mach_vm_size_t skip_size = skip_count * nlist_size;
symtab_address += skip_size;
symtab_size -= skip_size;
symbol_count = dysymtab_command->nextdefsym;
}
mach_vm_address_t strtab_address =
AddressForLinkEditComponent(symtab_command->stroff);
mach_vm_size_t strtab_size = symtab_command->strsize;
if (!IsInLinkEditSegment(strtab_address, strtab_size, "strtab")) {
return false;
}
scoped_ptr<process_types::nlist[]> symbols(
new process_types::nlist[symtab_command->nsyms]);
if (!process_types::nlist::ReadArrayInto(
process_reader_, symtab_address, symbol_count, &symbols[0])) {
LOG(WARNING) << "could not read symbol table" << module_info_;
return false;
}
for (size_t symbol_index = 0; symbol_index < symbol_count; ++symbol_index) {
const process_types::nlist& symbol = symbols[symbol_index];
std::string symbol_info = base::StringPrintf(", symbol index %zu%s",
skip_count + symbol_index,
module_info_.c_str());
uint8_t symbol_type = symbol.n_type & N_TYPE;
if ((symbol.n_type & N_STAB) == 0 && (symbol.n_type & N_PEXT) == 0 &&
(symbol_type == N_ABS || symbol_type == N_SECT) &&
(symbol.n_type & N_EXT)) {
if (symbol.n_strx >= strtab_size) {
LOG(WARNING) << base::StringPrintf(
"string at 0x%x out of bounds (0x%llx)",
symbol.n_strx,
strtab_size) << symbol_info;
return false;
}
std::string name;
if (!process_reader_->Memory()->ReadCStringSizeLimited(
strtab_address + symbol.n_strx,
strtab_size - symbol.n_strx,
&name)) {
LOG(WARNING) << "could not read string" << symbol_info;
return false;
}
if (symbol_type == N_ABS && symbol.n_sect != NO_SECT) {
LOG(WARNING) << base::StringPrintf("N_ABS symbol %s in section %u",
name.c_str(),
symbol.n_sect) << symbol_info;
return false;
}
if (symbol_type == N_SECT && symbol.n_sect == NO_SECT) {
LOG(WARNING) << base::StringPrintf(
"N_SECT symbol %s in section NO_SECT",
name.c_str()) << symbol_info;
return false;
}
if (external_defined_symbols->count(name)) {
LOG(WARNING) << "duplicate symbol " << name << symbol_info;
return false;
}
MachOImageSymbolTableReader::SymbolInformation symbol_info;
symbol_info.value = symbol.n_value;
symbol_info.section = symbol.n_sect;
(*external_defined_symbols)[name] = symbol_info;
} else if (dysymtab_command) {
LOG(WARNING) << "non-external symbol in extdefsym" << symbol_info;
return false;
}
}
return true;
}
private:
//! \brief Computes the address for data in the `__LINKEDIT` segment
//! identified by its file offset in a Mach-O image.
//!
//! \param[in] fileoff The file offset relative to the beginning of an images
//! `mach_header` or `mach_header_64` of the data in the `__LINKEDIT`
//! segment.
//!
//! \return The address, in the remote process address space, of the
//! requested data.
mach_vm_address_t AddressForLinkEditComponent(uint32_t fileoff) const {
return linkedit_range_.Base() + fileoff - linkedit_segment_->fileoff();
}
//! \brief Determines whether an address range is located within the
//! `__LINKEDIT` segment.
//!
//! \param[in] address The base address of the range to check.
//! \param[in] size The size of the range to check.
//! \param[in] tag A string that identifies the range being checked. This is
//! used only for logging.
//!
//! \return `true` if the range identified by \a address + \a size lies
//! entirely within the `__LINKEDIT` segment. `false` if that range is
//! invalid, or if that range is not contained by the `__LINKEDIT`
//! segment, with an appropriate message logged.
bool IsInLinkEditSegment(mach_vm_address_t address,
mach_vm_size_t size,
const char* tag) const {
CheckedMachAddressRange subrange(process_reader_, address, size);
if (!subrange.IsValid()) {
LOG(WARNING) << base::StringPrintf("invalid %s range (0x%llx + 0x%llx)",
tag,
address,
size) << module_info_;
return false;
}
if (!linkedit_range_.ContainsRange(subrange)) {
LOG(WARNING) << base::StringPrintf(
"%s at 0x%llx + 0x%llx outside of " SEG_LINKEDIT
" segment at 0x%llx + 0x%llx",
tag,
address,
size,
linkedit_range_.Base(),
linkedit_range_.Size()) << module_info_;
return false;
}
return true;
}
std::string module_info_;
CheckedMachAddressRange linkedit_range_;
ProcessReader* process_reader_; // weak
const MachOImageSegmentReader* linkedit_segment_; // weak
DISALLOW_COPY_AND_ASSIGN(MachOImageSymbolTableReaderInitializer);
};
} // namespace internal
MachOImageSymbolTableReader::MachOImageSymbolTableReader()
: external_defined_symbols_(), initialized_() {
}
MachOImageSymbolTableReader::~MachOImageSymbolTableReader() {
}
bool MachOImageSymbolTableReader::Initialize(
ProcessReader* process_reader,
const process_types::symtab_command* symtab_command,
const process_types::dysymtab_command* dysymtab_command,
const MachOImageSegmentReader* linkedit_segment,
const std::string& module_info) {
INITIALIZATION_STATE_SET_INITIALIZING(initialized_);
internal::MachOImageSymbolTableReaderInitializer initializer(process_reader,
linkedit_segment,
module_info);
if (!initializer.Initialize(
symtab_command, dysymtab_command, &external_defined_symbols_)) {
return false;
}
INITIALIZATION_STATE_SET_VALID(initialized_);
return true;
}
const MachOImageSymbolTableReader::SymbolInformation*
MachOImageSymbolTableReader::LookUpExternalDefinedSymbol(
const std::string& name) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
const auto& iterator = external_defined_symbols_.find(name);
if (iterator == external_defined_symbols_.end()) {
return NULL;
}
return &iterator->second;
}
} // namespace crashpad

View File

@ -0,0 +1,135 @@
// Copyright 2014 The Crashpad Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CRASHPAD_UTIL_MAC_MACH_O_IMAGE_SYMBOL_TABLE_READER_H_
#define CRASHPAD_UTIL_MAC_MACH_O_IMAGE_SYMBOL_TABLE_READER_H_
#include "base/basictypes.h"
#include <map>
#include <string>
#include <mach/mach.h>
#include <stdint.h>
#include "util/mac/mach_o_image_segment_reader.h"
#include "util/mac/process_reader.h"
#include "util/mac/process_types.h"
#include "util/misc/initialization_state_dcheck.h"
namespace crashpad {
//! \brief A reader for symbol tables in Mach-O images mapped into another
//! process.
class MachOImageSymbolTableReader {
public:
//! \brief Information about a symbol in a modules symbol table.
//!
//! This is a more minimal form of the `nlist` (or `nlist_64`) structure,
//! only containing the equivalent of the `n_value` and `n_sect` fields.
struct SymbolInformation {
//! \brief The address of the symbol as it exists in the symbol table, not
//! adjusted for any “slide.”
mach_vm_address_t value;
//! \brief The 1-based section index in the module in which the symbol is
//! found.
//!
//! For symbols defined in a section (`N_SECT`), this is the section index
//! that can be passed to MachOImageReader::GetSectionAtIndex(), and \a
//! value will need to be adjusted for segment slide if the containing
//! segment slid when loaded. For absolute symbols (`N_ABS`), this will be
//! `NO_SECT` (`0`), and \a value must not be adjusted for segment slide.
uint8_t section;
};
// TODO(mark): Use unordered_map or a similar hash-based map? For now,
// std::map is fine because this map only stores external defined symbols,
// and there arent expected to be very many of those that performance would
// become a problem. std::map is also guaranteed to be part of the standard
// library, which isnt the case for std::unordered_map, which requires the
// C++11 library. In reality, std::unordered_map does not appear to provide
// a performance advantage. It appears that the memory copies currently done
// by TaskMemory::Read() have substantially more impact on symbol table
// operations.
//
// This is public so that the type is available to
// MachOImageSymbolTableReaderInitializer.
typedef std::map<std::string, SymbolInformation> SymbolInformationMap;
MachOImageSymbolTableReader();
~MachOImageSymbolTableReader();
//! \brief Reads the symbol table from another process.
//!
//! This method must only be called once on an object. This method must be
//! called successfully before any other method in this class may be called.
//!
//! \param[in] process_reader The reader for the remote process.
//! \param[in] symtab_command The `LC_SYMTAB` load command that identifies
//! the symbol table.
//! \param[in] dysymtab_command The `LC_DYSYMTAB` load command that identifies
//! dynamic symbol information within the symbol table. This load command
//! is not present in all modules, and this parameter may be `NULL` for
//! modules that do not have this information. When present, \a
//! dysymtab_command is an optimization that allows the symbol table
//! reader to only examine symbol table entries known to be relevant for
//! its purposes.
//! \param[in] linkedit_segment The `__LINKEDIT` segment. This segment should
//! contain the data referenced by \a symtab_command and \a
//! dysymtab_command. This may be any segment in the module, but by
//! convention, the name `__LINKEDIT` is used for this purpose.
//! \param[in] module_info A string to be used in logged messages. This string
//! is for diagnostic purposes only, and may be empty.
//!
//! \return `true` if the symbol table was read successfully. `false`
//! otherwise, with an appropriate message logged.
bool Initialize(ProcessReader* process_reader,
const process_types::symtab_command* symtab_command,
const process_types::dysymtab_command* dysymtab_command,
const MachOImageSegmentReader* linkedit_segment,
const std::string& module_info);
//! \brief Looks up a symbol in the images symbol table.
//!
//! The returned information captures the symbol as it exists in the images
//! symbol table, not adjusted for any “slide.”
//!
//! \param[in] name The name of the symbol to look up, “mangled” or
//! “decorated” appropriately. For example, use `"_main"` to look up the
//! symbol for the C `main()` function, and use `"__Z4Funcv"` to look up
//! the symbol for the C++ `Func()` function.
//!
//! \return A SymbolInformation* object with information about the symbol if
//! it was found, or `NULL` if the symbol was not found or if an error
//! occurred. On error, a warning message will also be logged. The caller
//! does not take ownership; the lifetime of the returned object is scoped
//! to the lifetime of this MachOImageSymbolTableReader object.
//!
//! \note Symbol values returned via this interface are not adjusted for
//! “slide.” For slide-adjusted values, use the higher-level
//! MachOImageReader::LookUpExternalDefinedSymbol() interface.
const SymbolInformation* LookUpExternalDefinedSymbol(
const std::string& name) const;
private:
SymbolInformationMap external_defined_symbols_;
InitializationStateDcheck initialized_;
DISALLOW_COPY_AND_ASSIGN(MachOImageSymbolTableReader);
};
} // namespace crashpad
#endif // CRASHPAD_UTIL_MAC_MACH_O_IMAGE_SYMBOL_TABLE_READER_H_

View File

@ -42,6 +42,8 @@
'mac/mach_o_image_reader.h',
'mac/mach_o_image_segment_reader.cc',
'mac/mach_o_image_segment_reader.h',
'mac/mach_o_image_symbol_table_reader.cc',
'mac/mach_o_image_symbol_table_reader.h',
'mac/service_management.cc',
'mac/service_management.h',
'mac/process_reader.cc',