crashpad/snapshot/mac/mach_o_image_reader.h
Mark Mentovai 5d0a133ecd Tolerate weird cl_kernels modules.
cl_kernels modules (OpenCL kernels) are not structured as correct Mach-O
images on Mac OS X 10.10, but they’re present frequently enough that
it’s worth detecting and tolerating their quirks.

As discussed in
https://groups.google.com/a/chromium.org/d/msg/crashpad-dev/NaB7PrfW04g/FanqNJkVBfUJ

Apple bug: https://openradar.appspot.com/20239912

TEST=crashpad_snapshot_test ProcessReader.*
R=rsesek@chromium.org

Review URL: https://codereview.chromium.org/1019243006
2015-03-23 16:27:42 -04:00

355 lines
17 KiB
C++
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2014 The Crashpad Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_READER_H_
#define CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_READER_H_
#include <mach/mach.h>
#include <stdint.h>
#include <map>
#include <string>
#include "base/basictypes.h"
#include "base/memory/scoped_ptr.h"
#include "snapshot/mac/process_types.h"
#include "util/misc/initialization_state_dcheck.h"
#include "util/misc/uuid.h"
#include "util/stdlib/pointer_container.h"
namespace crashpad {
class MachOImageSegmentReader;
class MachOImageSymbolTableReader;
class ProcessReader;
//! \brief A reader for Mach-O images mapped into another process.
//!
//! This class is capable of reading both 32-bit (`mach_header`/`MH_MAGIC`) and
//! 64-bit (`mach_header_64`/`MH_MAGIC_64`) images based on the bitness of the
//! remote process.
//!
//! \sa MachOImageAnnotationsReader
class MachOImageReader {
public:
MachOImageReader();
~MachOImageReader();
//! \brief Reads the Mach-O image files load commands from another process.
//!
//! This method must only be called once on an object. This method must be
//! called successfully before any other method in this class may be called.
//!
//! \param[in] process_reader The reader for the remote process.
//! \param[in] address The address, in the remote process address space,
//! where the `mach_header` or `mach_header_64` at the beginning of the
//! image to be read is located. This address can be determined by reading
//! the remote process dyld information (see
//! snapshot/mac/process_types/dyld_images.proctype).
//! \param[in] name The modules name, a string to be used in logged messages.
//! This string is for diagnostic purposes and to relax otherwise strict
//! parsing rules for common modules with known defects.
//!
//! \return `true` if the image was read successfully, including all load
//! commands. `false` otherwise, with an appropriate message logged.
bool Initialize(ProcessReader* process_reader,
mach_vm_address_t address,
const std::string& name);
//! \brief Returns the Mach-O file type.
//!
//! This value comes from the `filetype` field of the `mach_header` or
//! `mach_header_64`. Common values include `MH_EXECUTE`, `MH_DYLIB`,
//! `MH_DYLINKER`, and `MH_BUNDLE`.
uint32_t FileType() const { return file_type_; }
//! \brief Returns the Mach-O images load address.
//!
//! This is the value passed as \a address to Initialize().
mach_vm_address_t Address() const { return address_; }
//! \brief Returns the mapped size of the Mach-O images `__TEXT` segment.
//!
//! Note that this is returns only the size of the `__TEXT` segment, not of
//! any other segment. This is because the interface only allows one load
//! address and size to be reported, but Mach-O image files may consist of
//! multiple discontiguous segments. By convention, the `__TEXT` segment is
//! always mapped at the beginning of a Mach-O image file, and it is the most
//! useful for the expected intended purpose of collecting data to obtain
//! stack backtraces. The implementation insists during initialization that
//! the `__TEXT` segment be mapped at the beginning of the file.
//!
//! In practice, discontiguous segments are only found for images that have
//! loaded out of the dyld shared cache, but the `__TEXT` segments size is
//! returned for modules that loaded with contiguous segments as well for
//! consistency.
mach_vm_size_t Size() const { return size_; }
//! \brief Returns the Mach-O images “slide,” the difference between its
//! actual load address and its preferred load address.
//!
//! “Slide” is computed by subtracting the `__TEXT` segments preferred load
//! address from its actual load address. It will be reported as a positive
//! offset when the actual load address is greater than the preferred load
//! address. The preferred load address is taken to be the segments reported
//! `vmaddr` value.
mach_vm_size_t Slide() const { return slide_; }
//! \brief Obtain segment information by segment name.
//!
//! \param[in] segment_name The name of the segment to search for, for
//! example, `"__TEXT"`.
//!
//! \return A pointer to the segment information if it was found, or `nullptr`
//! if it was not found. The caller does not take ownership; the lifetime
//! of the returned object is scoped to the lifetime of this
//! MachOImageReader object.
const MachOImageSegmentReader* GetSegmentByName(
const std::string& segment_name) const;
//! \brief Obtain section information by segment and section name.
//!
//! \param[in] segment_name The name of the segment to search for, for
//! example, `"__TEXT"`.
//! \param[in] section_name The name of the section within the segment to
//! search for, for example, `"__text"`.
//! \param[out] address The actual address that the section was loaded at in
//! memory, taking any “slide” into account if the section did not load at
//! its preferred address as stored in the Mach-O image file. This
//! parameter can be `nullptr`.
//!
//! \return A pointer to the section information if it was found, or `nullptr`
//! if it was not found. The caller does not take ownership; the lifetime
//! of the returned object is scoped to the lifetime of this
//! MachOImageReader object.
//!
//! No parameter is provided for the sections size, because it can be
//! obtained from the returned process_types::section::size field.
//!
//! \note The process_types::section::addr field gives the sections preferred
//! load address as stored in the Mach-O image file, and is not adjusted
//! for any “slide” that may have occurred when the image was loaded. Use
//! \a address to obtain the sections actual load address.
const process_types::section* GetSectionByName(
const std::string& segment_name,
const std::string& section_name,
mach_vm_address_t* address) const;
//! \brief Obtain section information by section index.
//!
//! \param[in] index The index of the section to return, in the order that it
//! appears in the segment load commands. This is a 1-based index,
//! matching the section number values used for `nlist::n_sect`.
//! \param[out] containing_segment The segment that contains the section.
//! This parameter can be `nullptr`. The caller does not take ownership;
//! the lifetime of the returned object is scoped to the lifetime of this
//! MachOImageReader object.
//! \param[out] address The actual address that the section was loaded at in
//! memory, taking any “slide” into account if the section did not load at
//! its preferred address as stored in the Mach-O image file. This
//! parameter can be `nullptr`.
//!
//! \return A pointer to the section information. If \a index is out of range,
//! logs a warning and returns `nullptr`. The caller does not take
//! ownership; the lifetime of the returned object is scoped to the
//! lifetime of this MachOImageReader object.
//!
//! No parameter is provided for the sections size, because it can be
//! obtained from the returned process_types::section::size field.
//!
//! \note The process_types::section::addr field gives the sections preferred
//! load address as stored in the Mach-O image file, and is not adjusted
//! for any “slide” that may have occurred when the image was loaded. Use
//! \a address to obtain the sections actual load address.
//! \note Unlike MachOImageSegmentReader::GetSectionAtIndex(), this method
//! accepts out-of-range values for \a index, and returns `nullptr`
//! instead of aborting execution upon encountering an out-of-range value.
//! This is because a Mach-O image files symbol table refers to this
//! per-module section index, and an out-of-range index in that case
//! should be treated as a data error (where the data is beyond this
//! codes control) and handled non-fatally by reporting the error to the
//! caller.
const process_types::section* GetSectionAtIndex(
size_t index,
const MachOImageSegmentReader** containing_segment,
mach_vm_address_t* address) const;
//! \brief Looks up a symbol in the images symbol table.
//!
//! This method is capable of locating external defined symbols. Specifically,
//! this method can look up symbols that have these charcteristics:
//! - `N_STAB` (debugging) and `N_PEXT` (private external) must not be set.
//! - `N_EXT` (external) must be set.
//! - The type must be `N_ABS` (absolute) or `N_SECT` (defined in section).
//!
//! `N_INDR` (indirect), `N_UNDF` (undefined), and `N_PBUD` (prebound
//! undefined) symbols cannot be located through this mechanism.
//!
//! \param[in] name The name of the symbol to look up, “mangled” or
//! “decorated” appropriately. For example, use `"_main"` to look up the
//! symbol for the C `main()` function, and use `"__Z4Funcv"` to look up
//! the symbol for the C++ `Func()` function. Contrary to `dlsym()`, the
//! leading underscore must not be stripped when using this interface.
//! \param[out] value If the lookup was successful, this will be set to the
//! value of the symbol, adjusted for any “slide” as needed. The value can
//! be used as an address in the remote process address space where the
//! pointee of the symbol exists in memory.
//!
//! \return `true` if the symbol lookup was successful and the symbol was
//! found. `false` otherwise, including error conditions (for which a
//! warning message will be logged), modules without symbol tables, and
//! symbol names not found in the symbol table.
//!
//! \note Symbol values returned via this interface are adjusted for “slide”
//! as appropriate, in contrast to the underlying implementation,
//! MachOImageSymbolTableReader::LookUpExternalDefinedSymbol().
//!
//! \warning Symbols that are resolved by running symbol resolvers
//! (`.symbol_resolver`) are not properly handled by this interface. The
//! address of the symbol resolver is returned because thats what shows
//! up in the symbol table, rather than the effective address of the
//! resolved symbol as used by dyld after running the resolver. The only
//! way to detect this situation would be to read the `LC_DYLD_INFO` or
//! `LC_DYLD_INFO_ONLY` load command if present and looking for the
//! `EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER` flag, but that would just be
//! able to detect symbols with a resolver, it would not be able to
//! resolve them from out-of-process, so its not currently done.
bool LookUpExternalDefinedSymbol(const std::string& name,
mach_vm_address_t* value) const;
//! \brief Returns a Mach-O dylib images current version.
//!
//! This information comes from the `dylib_current_version` field of a dylibs
//! `LC_ID_DYLIB` load command. For dylibs without this load command, `0` will
//! be returned.
//!
//! This method may only be called on Mach-O images for which FileType()
//! returns `MH_DYLIB`.
uint32_t DylibVersion() const;
//! \brief Returns a Mach-O images source version.
//!
//! This information comes from a Mach-O images `LC_SOURCE_VERSION` load
//! command. For Mach-O images without this load command, `0` will be
//! returned.
uint64_t SourceVersion() const { return source_version_; }
//! \brief Returns a Mach-O images UUID.
//!
//! This information comes from a Mach-O images `LC_UUID` load command. For
//! Mach-O images without this load command, a zeroed-out UUID value will be
//! returned.
//
// UUID is a name in this scope (referring to this method), so the parameters
// type needs to be qualified with |crashpad::|.
void UUID(crashpad::UUID* uuid) const;
//! \brief Returns the dynamic linkers pathname.
//!
//! The dynamic linker is normally /usr/lib/dyld.
//!
//! For executable images (those with file type `MH_EXECUTE`), this is the
//! name provided in the `LC_LOAD_DYLINKER` load command, if any. For dynamic
//! linker images (those with file type `MH_DYLINKER`), this is the name
//! provided in the `LC_ID_DYLINKER` load command. In other cases, this will
//! be empty.
std::string DylinkerName() const { return dylinker_name_; }
//! \brief Obtains the modules CrashpadInfo structure.
//!
//! \return `true` on success, `false` on failure. If the module does not have
//! a `__crashpad_info` section, this will return `false` without logging
//! any messages. Other failures will result in messages being logged.
bool GetCrashpadInfo(process_types::CrashpadInfo* crashpad_info) const;
private:
// A generic helper routine for the other Read*Command() methods.
template <typename T>
bool ReadLoadCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info,
uint32_t expected_load_command_id,
T* load_command);
// The Read*Command() methods are subroutines called by Initialize(). They are
// responsible for reading a single load command. They may update the member
// fields of their MachOImageReader object. If they cant make sense of a load
// command, they return false.
bool ReadSegmentCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info);
bool ReadSymTabCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info);
bool ReadDySymTabCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info);
bool ReadIdDylibCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info);
bool ReadDylinkerCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info);
bool ReadUUIDCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info);
bool ReadSourceVersionCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info);
bool ReadUnexpectedCommand(mach_vm_address_t load_command_address,
const std::string& load_command_info);
// Performs deferred initialization of the symbol table. Because a modules
// symbol table is often not needed, this is not handled in Initialize(), but
// is done lazily, on-demand as needed.
//
// symbol_table_initialized_ will be transitioned to the appropriate state. If
// initialization completes successfully, this will be the valid state.
// Otherwise, it will be left in the invalid state and a warning message will
// be logged.
//
// Note that if the object contains no symbol table, symbol_table_initialized_
// will be set to the valid state, but symbol_table_ will be nullptr.
void InitializeSymbolTable() const;
PointerVector<MachOImageSegmentReader> segments_;
std::map<std::string, size_t> segment_map_;
std::string module_name_;
std::string module_info_;
std::string dylinker_name_;
crashpad::UUID uuid_;
mach_vm_address_t address_;
mach_vm_size_t size_;
mach_vm_size_t slide_;
uint64_t source_version_;
scoped_ptr<process_types::symtab_command> symtab_command_;
scoped_ptr<process_types::dysymtab_command> dysymtab_command_;
// symbol_table_ (and symbol_table_initialized_) are mutable in order to
// maintain LookUpExternalDefinedSymbol() as a const interface while allowing
// lazy initialization via InitializeSymbolTable(). This is logical
// const-ness, not physical const-ness.
mutable scoped_ptr<MachOImageSymbolTableReader> symbol_table_;
scoped_ptr<process_types::dylib_command> id_dylib_command_;
ProcessReader* process_reader_; // weak
uint32_t file_type_;
InitializationStateDcheck initialized_;
// symbol_table_initialized_ protects symbol_table_: symbol_table_ can only
// be used when symbol_table_initialized_ is valid, although
// symbol_table_initialized_ being valid doesnt imply that symbol_table_ is
// set. symbol_table_initialized_ will be valid without symbol_table_ being
// set in modules that have no symbol table.
mutable InitializationState symbol_table_initialized_;
DISALLOW_COPY_AND_ASSIGN(MachOImageReader);
};
} // namespace crashpad
#endif // CRASHPAD_SNAPSHOT_MAC_MACH_O_IMAGE_READER_H_