crashpad/snapshot/elf/elf_image_reader.cc

859 lines
26 KiB
C++
Raw Normal View History

// Copyright 2017 The Crashpad Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "snapshot/elf/elf_image_reader.h"
#include <stddef.h>
#include <algorithm>
#include <limits>
#include <utility>
#include <vector>
#include "base/logging.h"
#include "base/numerics/safe_math.h"
#include "build/build_config.h"
#include "util/numeric/checked_vm_address_range.h"
namespace crashpad {
class ElfImageReader::ProgramHeaderTable {
public:
virtual ~ProgramHeaderTable() {}
2018-07-26 08:27:31 -07:00
virtual bool VerifyLoadSegments(bool verbose) const = 0;
virtual size_t Size() const = 0;
virtual bool GetDynamicSegment(VMAddress* address, VMSize* size) const = 0;
2018-07-26 08:27:31 -07:00
virtual bool GetPreferredElfHeaderAddress(VMAddress* address,
bool verbose) const = 0;
virtual bool GetPreferredLoadedMemoryRange(VMAddress* address,
2018-07-26 08:27:31 -07:00
VMSize* size,
bool verbose) const = 0;
// Locate the next PT_NOTE segment starting at segment index start_index. If a
// PT_NOTE segment is found, start_index is set to the next index after the
// found segment.
virtual bool GetNoteSegment(size_t* start_index,
VMAddress* address,
VMSize* size) const = 0;
protected:
ProgramHeaderTable() {}
};
template <typename PhdrType>
class ElfImageReader::ProgramHeaderTableSpecific
: public ElfImageReader::ProgramHeaderTable {
public:
ProgramHeaderTableSpecific<PhdrType>() {}
ProgramHeaderTableSpecific<PhdrType>(
const ProgramHeaderTableSpecific<PhdrType>&) = delete;
ProgramHeaderTableSpecific<PhdrType>& operator=(
const ProgramHeaderTableSpecific<PhdrType>&) = delete;
~ProgramHeaderTableSpecific<PhdrType>() {}
bool Initialize(const ProcessMemoryRange& memory,
VMAddress address,
2018-07-26 08:27:31 -07:00
VMSize num_segments,
bool verbose) {
INITIALIZATION_STATE_SET_INITIALIZING(initialized_);
table_.resize(num_segments);
if (!memory.Read(address, sizeof(PhdrType) * num_segments, table_.data())) {
return false;
}
2018-07-26 08:27:31 -07:00
if (!VerifyLoadSegments(verbose)) {
return false;
}
INITIALIZATION_STATE_SET_VALID(initialized_);
return true;
}
2018-07-26 08:27:31 -07:00
bool VerifyLoadSegments(bool verbose) const override {
constexpr bool is_64_bit = std::is_same<PhdrType, Elf64_Phdr>::value;
VMAddress last_vaddr;
bool load_found = false;
for (const auto& header : table_) {
if (header.p_type == PT_LOAD) {
CheckedVMAddressRange load_range(
is_64_bit, header.p_vaddr, header.p_memsz);
if (!load_range.IsValid()) {
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "bad load range";
return false;
}
if (load_found && header.p_vaddr <= last_vaddr) {
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "out of order load segments";
return false;
}
load_found = true;
last_vaddr = header.p_vaddr;
}
}
return true;
}
size_t Size() const override { return sizeof(PhdrType) * table_.size(); }
2018-07-26 08:27:31 -07:00
bool GetPreferredElfHeaderAddress(VMAddress* address,
bool verbose) const override {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
for (const auto& header : table_) {
if (header.p_type == PT_LOAD && header.p_offset == 0) {
*address = header.p_vaddr;
return true;
}
}
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "no preferred header address";
return false;
}
bool GetPreferredLoadedMemoryRange(VMAddress* base,
2018-07-26 08:27:31 -07:00
VMSize* size,
bool verbose) const override {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
VMAddress preferred_base = 0;
VMAddress preferred_end = 0;
bool load_found = false;
for (const auto& header : table_) {
if (header.p_type == PT_LOAD) {
if (!load_found) {
preferred_base = header.p_vaddr;
load_found = true;
}
preferred_end = header.p_vaddr + header.p_memsz;
}
}
if (load_found) {
*base = preferred_base;
*size = preferred_end - preferred_base;
return true;
}
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "no load segments";
return false;
}
bool GetDynamicSegment(VMAddress* address, VMSize* size) const override {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
const PhdrType* phdr;
if (!GetProgramHeader(PT_DYNAMIC, &phdr)) {
return false;
}
*address = phdr->p_vaddr;
*size = phdr->p_memsz;
return true;
}
bool GetProgramHeader(uint32_t type, const PhdrType** header_out) const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
for (const auto& header : table_) {
if (header.p_type == type) {
*header_out = &header;
return true;
}
}
return false;
}
bool GetNoteSegment(size_t* start_index,
VMAddress* address,
VMSize* size) const override {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
for (size_t index = *start_index; index < table_.size(); ++index) {
if (table_[index].p_type == PT_NOTE && table_[index].p_vaddr != 0) {
*start_index = index + 1;
*address = table_[index].p_vaddr;
*size = table_[index].p_memsz;
return true;
}
}
return false;
}
private:
std::vector<PhdrType> table_;
InitializationStateDcheck initialized_;
};
ElfImageReader::NoteReader::~NoteReader() = default;
ElfImageReader::NoteReader::Result ElfImageReader::NoteReader::NextNote(
std::string* name,
NoteType* type,
Reland "Use a relative address in .note.crashpad.info" This is a reland of 95e97a32eba4d505ab9591e683d2147c441eea48 Original change's description: > Use a relative address in .note.crashpad.info > > The desc value in the note is now the offset of CRASHPAD_INFO_SYMBOL > from desc. > > Making this note writable can trigger a linker error resulting in > the binary embedding .note.crashpad.info to be rejected by the > kernel during program loading. > > The error was observed with: > GNU ld (GNU Binutils for Debian) 2.30 > clang version 4.0.1-10 (tags/RELEASE_401/final) > Debian 4.17.17-1rodete2 > > When the note is made writable, crashpad_snapshot_test contains two > PT_LOAD segments which map to the same page. > > LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000 > 0x0000000000000258 0x0000000000000258 R 0x200000 > LOAD 0x0000000000000258 0x0000000000000258 0x0000000000000258 > 0x00000000002b84d8 0x00000000002b8950 RWE 0x200000 > > Executing this binary with the execv system call triggers a segfault > during program loading (an error can't be returned because the original > process vm has already been discarded). > > I suspect (I haven't set up a debuggable kernel) the failure occurs > while attempting to map the second load segment because its virtual > address, 0x258, is in the same page as the first load segment. > https://elixir.bootlin.com/linux/v4.17.17/source/fs/binfmt_elf.c#L380 > > The linker normally produces consecutive load segments where the second > segment is loaded 0x200000 bytes after the first, which I think is the > maximum expected page size. Modifying the test executable to load the > second segment at 0x1258 (4096 byte page size) allows program loading > to succeed (but of course crashes after control is given to it). > > Bug: crashpad:260 > Change-Id: I2b9f1e66e98919138baef3da991a9710bd970dc4 > Reviewed-on: https://chromium-review.googlesource.com/c/1292232 > Reviewed-by: Scott Graham <scottmg@chromium.org> > Reviewed-by: Mark Mentovai <mark@chromium.org> > Commit-Queue: Joshua Peraza <jperaza@chromium.org> Bug: crashpad:260 Change-Id: I66713de84cc26c9119e0454d19c9c189263fe054 Reviewed-on: https://chromium-review.googlesource.com/c/1318066 Commit-Queue: Joshua Peraza <jperaza@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Scott Graham <scottmg@chromium.org>
2018-11-06 13:19:06 -08:00
std::string* desc,
VMAddress* desc_address) {
if (!is_valid_) {
LOG(ERROR) << "invalid note reader";
return Result::kError;
}
Result result = Result::kError;
do {
while (current_address_ == segment_end_address_) {
VMSize segment_size;
if (!phdr_table_->GetNoteSegment(
&phdr_index_, &current_address_, &segment_size)) {
return Result::kNoMoreNotes;
}
current_address_ += elf_reader_->GetLoadBias();
segment_end_address_ = current_address_ + segment_size;
segment_range_ = std::make_unique<ProcessMemoryRange>();
if (!segment_range_->Initialize(*range_) ||
!segment_range_->RestrictRange(current_address_, segment_size)) {
return Result::kError;
}
}
retry_ = false;
Reland "Use a relative address in .note.crashpad.info" This is a reland of 95e97a32eba4d505ab9591e683d2147c441eea48 Original change's description: > Use a relative address in .note.crashpad.info > > The desc value in the note is now the offset of CRASHPAD_INFO_SYMBOL > from desc. > > Making this note writable can trigger a linker error resulting in > the binary embedding .note.crashpad.info to be rejected by the > kernel during program loading. > > The error was observed with: > GNU ld (GNU Binutils for Debian) 2.30 > clang version 4.0.1-10 (tags/RELEASE_401/final) > Debian 4.17.17-1rodete2 > > When the note is made writable, crashpad_snapshot_test contains two > PT_LOAD segments which map to the same page. > > LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000 > 0x0000000000000258 0x0000000000000258 R 0x200000 > LOAD 0x0000000000000258 0x0000000000000258 0x0000000000000258 > 0x00000000002b84d8 0x00000000002b8950 RWE 0x200000 > > Executing this binary with the execv system call triggers a segfault > during program loading (an error can't be returned because the original > process vm has already been discarded). > > I suspect (I haven't set up a debuggable kernel) the failure occurs > while attempting to map the second load segment because its virtual > address, 0x258, is in the same page as the first load segment. > https://elixir.bootlin.com/linux/v4.17.17/source/fs/binfmt_elf.c#L380 > > The linker normally produces consecutive load segments where the second > segment is loaded 0x200000 bytes after the first, which I think is the > maximum expected page size. Modifying the test executable to load the > second segment at 0x1258 (4096 byte page size) allows program loading > to succeed (but of course crashes after control is given to it). > > Bug: crashpad:260 > Change-Id: I2b9f1e66e98919138baef3da991a9710bd970dc4 > Reviewed-on: https://chromium-review.googlesource.com/c/1292232 > Reviewed-by: Scott Graham <scottmg@chromium.org> > Reviewed-by: Mark Mentovai <mark@chromium.org> > Commit-Queue: Joshua Peraza <jperaza@chromium.org> Bug: crashpad:260 Change-Id: I66713de84cc26c9119e0454d19c9c189263fe054 Reviewed-on: https://chromium-review.googlesource.com/c/1318066 Commit-Queue: Joshua Peraza <jperaza@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Scott Graham <scottmg@chromium.org>
2018-11-06 13:19:06 -08:00
result = range_->Is64Bit()
? ReadNote<Elf64_Nhdr>(name, type, desc, desc_address)
: ReadNote<Elf32_Nhdr>(name, type, desc, desc_address);
} while (retry_);
if (result == Result::kSuccess) {
return Result::kSuccess;
}
is_valid_ = false;
return Result::kError;
}
namespace {
// The maximum size the user can specify for maximum note size. Clamping this
// ensures that buffer allocations cannot be wildly large. It is not expected
// that a note would be larger than ~1k in normal usage.
constexpr size_t kMaxMaxNoteSize = 16384;
} // namespace
ElfImageReader::NoteReader::NoteReader(const ElfImageReader* elf_reader,
const ProcessMemoryRange* range,
const ProgramHeaderTable* phdr_table,
size_t max_note_size,
const std::string& name_filter,
NoteType type_filter,
bool use_filter)
: current_address_(0),
segment_end_address_(0),
elf_reader_(elf_reader),
range_(range),
phdr_table_(phdr_table),
segment_range_(),
phdr_index_(0),
max_note_size_(std::min(kMaxMaxNoteSize, max_note_size)),
name_filter_(name_filter),
type_filter_(type_filter),
use_filter_(use_filter),
is_valid_(true),
retry_(false) {
DCHECK_LT(max_note_size, kMaxMaxNoteSize);
}
template <typename NhdrType>
ElfImageReader::NoteReader::Result ElfImageReader::NoteReader::ReadNote(
std::string* name,
NoteType* type,
Reland "Use a relative address in .note.crashpad.info" This is a reland of 95e97a32eba4d505ab9591e683d2147c441eea48 Original change's description: > Use a relative address in .note.crashpad.info > > The desc value in the note is now the offset of CRASHPAD_INFO_SYMBOL > from desc. > > Making this note writable can trigger a linker error resulting in > the binary embedding .note.crashpad.info to be rejected by the > kernel during program loading. > > The error was observed with: > GNU ld (GNU Binutils for Debian) 2.30 > clang version 4.0.1-10 (tags/RELEASE_401/final) > Debian 4.17.17-1rodete2 > > When the note is made writable, crashpad_snapshot_test contains two > PT_LOAD segments which map to the same page. > > LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000 > 0x0000000000000258 0x0000000000000258 R 0x200000 > LOAD 0x0000000000000258 0x0000000000000258 0x0000000000000258 > 0x00000000002b84d8 0x00000000002b8950 RWE 0x200000 > > Executing this binary with the execv system call triggers a segfault > during program loading (an error can't be returned because the original > process vm has already been discarded). > > I suspect (I haven't set up a debuggable kernel) the failure occurs > while attempting to map the second load segment because its virtual > address, 0x258, is in the same page as the first load segment. > https://elixir.bootlin.com/linux/v4.17.17/source/fs/binfmt_elf.c#L380 > > The linker normally produces consecutive load segments where the second > segment is loaded 0x200000 bytes after the first, which I think is the > maximum expected page size. Modifying the test executable to load the > second segment at 0x1258 (4096 byte page size) allows program loading > to succeed (but of course crashes after control is given to it). > > Bug: crashpad:260 > Change-Id: I2b9f1e66e98919138baef3da991a9710bd970dc4 > Reviewed-on: https://chromium-review.googlesource.com/c/1292232 > Reviewed-by: Scott Graham <scottmg@chromium.org> > Reviewed-by: Mark Mentovai <mark@chromium.org> > Commit-Queue: Joshua Peraza <jperaza@chromium.org> Bug: crashpad:260 Change-Id: I66713de84cc26c9119e0454d19c9c189263fe054 Reviewed-on: https://chromium-review.googlesource.com/c/1318066 Commit-Queue: Joshua Peraza <jperaza@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Scott Graham <scottmg@chromium.org>
2018-11-06 13:19:06 -08:00
std::string* desc,
VMAddress* desc_address) {
static_assert(sizeof(*type) >= sizeof(NhdrType::n_namesz),
"Note field size mismatch");
DCHECK_LT(current_address_, segment_end_address_);
NhdrType note_info;
if (!segment_range_->Read(current_address_, sizeof(note_info), &note_info)) {
return Result::kError;
}
current_address_ += sizeof(note_info);
constexpr size_t align = sizeof(note_info.n_namesz);
#define CHECKED_PAD(x, into) \
base::CheckAnd(base::CheckAdd(x, align - 1), ~(align - 1)) \
.AssignIfValid(&into)
size_t padded_namesz;
if (!CHECKED_PAD(note_info.n_namesz, padded_namesz)) {
return Result::kError;
}
size_t padded_descsz;
if (!CHECKED_PAD(note_info.n_descsz, padded_descsz)) {
return Result::kError;
}
size_t note_size;
if (!base::CheckAdd(padded_namesz, padded_descsz).AssignIfValid(&note_size)) {
return Result::kError;
}
// Notes typically have 4-byte alignment. However, .note.android.ident may
// inadvertently use 2-byte alignment.
// https://android-review.googlesource.com/c/platform/bionic/+/554986/
// We can still find .note.android.ident if it appears first in a note segment
// but there may be 4-byte aligned notes following it. If this note was
// aligned at less than 4-bytes, expect that the next note will be aligned at
// 4-bytes and add extra padding, if necessary.
VMAddress end_of_note_candidate;
if (!base::CheckAdd(current_address_, note_size)
.AssignIfValid(&end_of_note_candidate)) {
return Result::kError;
}
VMAddress end_of_note;
if (!CHECKED_PAD(end_of_note_candidate, end_of_note)) {
return Result::kError;
}
end_of_note = std::min(end_of_note, segment_end_address_);
#undef CHECKED_PAD
if (note_size > max_note_size_) {
current_address_ = end_of_note;
retry_ = true;
return Result::kError;
}
if (use_filter_ && note_info.n_type != type_filter_) {
current_address_ = end_of_note;
retry_ = true;
return Result::kError;
}
std::string local_name(note_info.n_namesz, '\0');
if (!segment_range_->Read(
current_address_, note_info.n_namesz, &local_name[0])) {
return Result::kError;
}
if (!local_name.empty()) {
if (local_name.back() != '\0') {
LOG(ERROR) << "unterminated note name";
return Result::kError;
}
local_name.pop_back();
}
if (use_filter_ && local_name != name_filter_) {
current_address_ = end_of_note;
retry_ = true;
return Result::kError;
}
current_address_ += padded_namesz;
std::string local_desc(note_info.n_descsz, '\0');
if (!segment_range_->Read(
current_address_, note_info.n_descsz, &local_desc[0])) {
return Result::kError;
}
Reland "Use a relative address in .note.crashpad.info" This is a reland of 95e97a32eba4d505ab9591e683d2147c441eea48 Original change's description: > Use a relative address in .note.crashpad.info > > The desc value in the note is now the offset of CRASHPAD_INFO_SYMBOL > from desc. > > Making this note writable can trigger a linker error resulting in > the binary embedding .note.crashpad.info to be rejected by the > kernel during program loading. > > The error was observed with: > GNU ld (GNU Binutils for Debian) 2.30 > clang version 4.0.1-10 (tags/RELEASE_401/final) > Debian 4.17.17-1rodete2 > > When the note is made writable, crashpad_snapshot_test contains two > PT_LOAD segments which map to the same page. > > LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000 > 0x0000000000000258 0x0000000000000258 R 0x200000 > LOAD 0x0000000000000258 0x0000000000000258 0x0000000000000258 > 0x00000000002b84d8 0x00000000002b8950 RWE 0x200000 > > Executing this binary with the execv system call triggers a segfault > during program loading (an error can't be returned because the original > process vm has already been discarded). > > I suspect (I haven't set up a debuggable kernel) the failure occurs > while attempting to map the second load segment because its virtual > address, 0x258, is in the same page as the first load segment. > https://elixir.bootlin.com/linux/v4.17.17/source/fs/binfmt_elf.c#L380 > > The linker normally produces consecutive load segments where the second > segment is loaded 0x200000 bytes after the first, which I think is the > maximum expected page size. Modifying the test executable to load the > second segment at 0x1258 (4096 byte page size) allows program loading > to succeed (but of course crashes after control is given to it). > > Bug: crashpad:260 > Change-Id: I2b9f1e66e98919138baef3da991a9710bd970dc4 > Reviewed-on: https://chromium-review.googlesource.com/c/1292232 > Reviewed-by: Scott Graham <scottmg@chromium.org> > Reviewed-by: Mark Mentovai <mark@chromium.org> > Commit-Queue: Joshua Peraza <jperaza@chromium.org> Bug: crashpad:260 Change-Id: I66713de84cc26c9119e0454d19c9c189263fe054 Reviewed-on: https://chromium-review.googlesource.com/c/1318066 Commit-Queue: Joshua Peraza <jperaza@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Scott Graham <scottmg@chromium.org>
2018-11-06 13:19:06 -08:00
*desc_address = current_address_;
current_address_ = end_of_note;
if (name) {
name->swap(local_name);
}
if (type) {
*type = note_info.n_type;
}
desc->swap(local_desc);
return Result::kSuccess;
}
ElfImageReader::ElfImageReader()
: header_64_(),
ehdr_address_(0),
load_bias_(0),
memory_(),
program_headers_(),
dynamic_array_(),
symbol_table_(),
initialized_(),
dynamic_array_initialized_(),
symbol_table_initialized_() {}
ElfImageReader::~ElfImageReader() {}
bool ElfImageReader::Initialize(const ProcessMemoryRange& memory,
2018-07-26 08:27:31 -07:00
VMAddress address,
bool verbose) {
INITIALIZATION_STATE_SET_INITIALIZING(initialized_);
ehdr_address_ = address;
if (!memory_.Initialize(memory)) {
return false;
}
uint8_t e_ident[EI_NIDENT];
if (!memory_.Read(ehdr_address_, EI_NIDENT, e_ident)) {
return false;
}
if (e_ident[EI_MAG0] != ELFMAG0 || e_ident[EI_MAG1] != ELFMAG1 ||
e_ident[EI_MAG2] != ELFMAG2 || e_ident[EI_MAG3] != ELFMAG3) {
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "Incorrect ELF magic number";
return false;
}
if (!(memory_.Is64Bit() && e_ident[EI_CLASS] == ELFCLASS64) &&
!(!memory_.Is64Bit() && e_ident[EI_CLASS] == ELFCLASS32)) {
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "unexpected bitness";
return false;
}
#if defined(ARCH_CPU_LITTLE_ENDIAN)
constexpr uint8_t expected_encoding = ELFDATA2LSB;
#elif defined(ARCH_CPU_BIG_ENDIAN)
constexpr uint8_t expected_encoding = ELFDATA2MSB;
#endif
if (e_ident[EI_DATA] != expected_encoding) {
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "unexpected encoding";
return false;
}
if (e_ident[EI_VERSION] != EV_CURRENT) {
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "unexpected version";
return false;
}
if (!(memory_.Is64Bit()
? memory_.Read(ehdr_address_, sizeof(header_64_), &header_64_)
: memory_.Read(ehdr_address_, sizeof(header_32_), &header_32_))) {
return false;
}
#define VERIFY_HEADER(header) \
do { \
if (header.e_type != ET_EXEC && header.e_type != ET_DYN) { \
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "unexpected image type"; \
return false; \
} \
if (header.e_version != EV_CURRENT) { \
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "unexpected version"; \
return false; \
} \
if (header.e_ehsize != sizeof(header)) { \
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "unexpected header size"; \
return false; \
} \
} while (false);
if (memory_.Is64Bit()) {
VERIFY_HEADER(header_64_);
} else {
VERIFY_HEADER(header_32_);
}
2018-07-26 08:27:31 -07:00
if (!InitializeProgramHeaders(verbose)) {
return false;
}
VMAddress preferred_ehdr_address;
if (!program_headers_.get()->GetPreferredElfHeaderAddress(
2018-07-26 08:27:31 -07:00
&preferred_ehdr_address, verbose)) {
return false;
}
load_bias_ = ehdr_address_ - preferred_ehdr_address;
VMAddress base_address;
VMSize loaded_size;
2018-07-26 08:27:31 -07:00
if (!program_headers_.get()->GetPreferredLoadedMemoryRange(
&base_address, &loaded_size, verbose)) {
return false;
}
base_address += load_bias_;
if (!memory_.RestrictRange(base_address, loaded_size)) {
return false;
}
VMSize ehdr_size;
VMAddress phdr_address;
if (memory_.Is64Bit()) {
ehdr_size = sizeof(header_64_);
phdr_address = ehdr_address_ + header_64_.e_phoff;
} else {
ehdr_size = sizeof(header_32_);
phdr_address = ehdr_address_ + header_32_.e_phoff;
}
CheckedVMAddressRange range(memory_.Is64Bit(), base_address, loaded_size);
if (!range.ContainsRange(
CheckedVMAddressRange(memory_.Is64Bit(), ehdr_address_, ehdr_size))) {
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "ehdr out of range";
return false;
}
if (!range.ContainsRange(CheckedVMAddressRange(
memory.Is64Bit(), phdr_address, program_headers_->Size()))) {
2018-07-26 08:27:31 -07:00
LOG_IF(ERROR, verbose) << "phdrs out of range";
return false;
}
INITIALIZATION_STATE_SET_VALID(initialized_);
return true;
}
uint16_t ElfImageReader::FileType() const {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
return memory_.Is64Bit() ? header_64_.e_type : header_32_.e_type;
}
bool ElfImageReader::SoName(std::string* name) {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
if (!InitializeDynamicArray()) {
return false;
}
VMSize offset;
if (!dynamic_array_->GetValue(DT_SONAME, true, &offset)) {
return false;
}
return ReadDynamicStringTableAtOffset(offset, name);
}
bool ElfImageReader::GetDynamicSymbol(const std::string& name,
VMAddress* address,
VMSize* size) {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
if (!InitializeDynamicSymbolTable()) {
return false;
}
ElfSymbolTableReader::SymbolInformation info;
if (!symbol_table_->GetSymbol(name, &info)) {
return false;
}
if (info.shndx == SHN_UNDEF || info.shndx == SHN_COMMON) {
return false;
}
switch (info.binding) {
case STB_GLOBAL:
case STB_WEAK:
break;
case STB_LOCAL:
default:
return false;
}
switch (info.type) {
case STT_OBJECT:
case STT_FUNC:
break;
case STT_COMMON:
case STT_NOTYPE:
case STT_SECTION:
case STT_FILE:
case STT_TLS:
default:
return false;
}
if (info.shndx != SHN_ABS) {
info.address += GetLoadBias();
}
*address = info.address;
*size = info.size;
return true;
}
bool ElfImageReader::ReadDynamicStringTableAtOffset(VMSize offset,
std::string* string) {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
if (!InitializeDynamicArray()) {
return false;
}
VMAddress string_table_address;
VMSize string_table_size;
Read either DT_HASH or DT_GNU_HASH to determine the size of DT_SYMTAB Without the section headers for the symbol table, there's no direct way to calculate the number of entries in the table. DT_HASH and DT_GNU_HASH are auxiliary tables that are designed to make symbol lookup faster. DT_HASH is the original and is theoretically mandatory. DT_GNU_HASH is the new-and-improved, but is more complex. In practice, however, an Android build (at least vs. API 16) has only DT_HASH, and not DT_GNU_HASH, and a Fuchsia build has only DT_GNU_HASH but not DT_HASH. So, both are tried. This change does not actually use the data in these tables to improve the speed of symbol lookup, but instead only uses them to correctly terminate the linear search. DT_HASH contains the total number of symbols in the symbol table fairly directly because there is an entry for each symbol table entry in the hash table, so the number is the same. DT_GNU_HASH regrettably does not. Instead, it's necessary to walk the buckets and chain structure to find the largest entry. DT_GNU_HASH doesn't appear in any "real" documentation that I'm aware of, other than the binutils code (at least as far as I know). Some more-and-less-useful references: - https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/ - https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ - http://deroko.phearless.org/dt_gnu_hash.txt - https://sourceware.org/ml/binutils/2006-10/msg00377.html Change-Id: I7cfc4372f29efc37446f0931d22a1f790e44076f Bug: crashpad:213, crashpad:196 Reviewed-on: https://chromium-review.googlesource.com/876879 Commit-Queue: Scott Graham <scottmg@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Joshua Peraza <jperaza@chromium.org>
2018-01-30 14:30:50 -08:00
if (!GetAddressFromDynamicArray(DT_STRTAB, true, &string_table_address) ||
!dynamic_array_->GetValue(DT_STRSZ, true, &string_table_size)) {
LOG(ERROR) << "missing string table info";
return false;
}
if (offset >= string_table_size) {
LOG(ERROR) << "bad offset";
return false;
}
// GNU ld.so doesn't adjust the vdso's dynamic array entries by the load bias.
// If the address is too small to point into the loaded module range and is
// small enough to be an offset from the base of the module, adjust it now.
if (string_table_address < memory_.Base() &&
string_table_address < memory_.Size()) {
string_table_address += GetLoadBias();
}
if (!memory_.ReadCStringSizeLimited(
string_table_address + offset, string_table_size - offset, string)) {
LOG(ERROR) << "missing nul-terminator";
return false;
}
return true;
}
bool ElfImageReader::GetDebugAddress(VMAddress* debug) {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
if (!InitializeDynamicArray()) {
return false;
}
Read either DT_HASH or DT_GNU_HASH to determine the size of DT_SYMTAB Without the section headers for the symbol table, there's no direct way to calculate the number of entries in the table. DT_HASH and DT_GNU_HASH are auxiliary tables that are designed to make symbol lookup faster. DT_HASH is the original and is theoretically mandatory. DT_GNU_HASH is the new-and-improved, but is more complex. In practice, however, an Android build (at least vs. API 16) has only DT_HASH, and not DT_GNU_HASH, and a Fuchsia build has only DT_GNU_HASH but not DT_HASH. So, both are tried. This change does not actually use the data in these tables to improve the speed of symbol lookup, but instead only uses them to correctly terminate the linear search. DT_HASH contains the total number of symbols in the symbol table fairly directly because there is an entry for each symbol table entry in the hash table, so the number is the same. DT_GNU_HASH regrettably does not. Instead, it's necessary to walk the buckets and chain structure to find the largest entry. DT_GNU_HASH doesn't appear in any "real" documentation that I'm aware of, other than the binutils code (at least as far as I know). Some more-and-less-useful references: - https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/ - https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ - http://deroko.phearless.org/dt_gnu_hash.txt - https://sourceware.org/ml/binutils/2006-10/msg00377.html Change-Id: I7cfc4372f29efc37446f0931d22a1f790e44076f Bug: crashpad:213, crashpad:196 Reviewed-on: https://chromium-review.googlesource.com/876879 Commit-Queue: Scott Graham <scottmg@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Joshua Peraza <jperaza@chromium.org>
2018-01-30 14:30:50 -08:00
return GetAddressFromDynamicArray(DT_DEBUG, true, debug);
}
2018-07-26 08:27:31 -07:00
bool ElfImageReader::GetDynamicArrayAddress(VMAddress* address) {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
VMAddress dyn_segment_address;
VMSize dyn_segment_size;
if (!program_headers_.get()->GetDynamicSegment(&dyn_segment_address,
&dyn_segment_size)) {
LOG(ERROR) << "no dynamic segment";
return false;
}
*address = dyn_segment_address + GetLoadBias();
return true;
}
VMAddress ElfImageReader::GetProgramHeaderTableAddress() {
INITIALIZATION_STATE_DCHECK_VALID(initialized_);
return ehdr_address_ +
(memory_.Is64Bit() ? header_64_.e_phoff : header_32_.e_phoff);
}
bool ElfImageReader::InitializeProgramHeaders(bool verbose) {
#define INITIALIZE_PROGRAM_HEADERS(PhdrType, header) \
do { \
if (header.e_phentsize != sizeof(PhdrType)) { \
LOG_IF(ERROR, verbose) << "unexpected phdr size"; \
return false; \
} \
auto phdrs = new ProgramHeaderTableSpecific<PhdrType>(); \
program_headers_.reset(phdrs); \
if (!phdrs->Initialize(memory_, \
ehdr_address_ + header.e_phoff, \
header.e_phnum, \
verbose)) { \
return false; \
} \
} while (false);
if (memory_.Is64Bit()) {
INITIALIZE_PROGRAM_HEADERS(Elf64_Phdr, header_64_);
} else {
INITIALIZE_PROGRAM_HEADERS(Elf32_Phdr, header_32_);
}
return true;
}
bool ElfImageReader::InitializeDynamicArray() {
if (dynamic_array_initialized_.is_valid()) {
return true;
}
if (!dynamic_array_initialized_.is_uninitialized()) {
return false;
}
dynamic_array_initialized_.set_invalid();
VMAddress dyn_segment_address;
VMSize dyn_segment_size;
if (!program_headers_.get()->GetDynamicSegment(&dyn_segment_address,
&dyn_segment_size)) {
LOG(ERROR) << "no dynamic segment";
return false;
}
dyn_segment_address += GetLoadBias();
dynamic_array_.reset(new ElfDynamicArrayReader());
if (!dynamic_array_->Initialize(
memory_, dyn_segment_address, dyn_segment_size)) {
return false;
}
dynamic_array_initialized_.set_valid();
return true;
}
bool ElfImageReader::InitializeDynamicSymbolTable() {
if (symbol_table_initialized_.is_valid()) {
return true;
}
if (!symbol_table_initialized_.is_uninitialized()) {
return false;
}
symbol_table_initialized_.set_invalid();
if (!InitializeDynamicArray()) {
return false;
}
VMAddress symbol_table_address;
Read either DT_HASH or DT_GNU_HASH to determine the size of DT_SYMTAB Without the section headers for the symbol table, there's no direct way to calculate the number of entries in the table. DT_HASH and DT_GNU_HASH are auxiliary tables that are designed to make symbol lookup faster. DT_HASH is the original and is theoretically mandatory. DT_GNU_HASH is the new-and-improved, but is more complex. In practice, however, an Android build (at least vs. API 16) has only DT_HASH, and not DT_GNU_HASH, and a Fuchsia build has only DT_GNU_HASH but not DT_HASH. So, both are tried. This change does not actually use the data in these tables to improve the speed of symbol lookup, but instead only uses them to correctly terminate the linear search. DT_HASH contains the total number of symbols in the symbol table fairly directly because there is an entry for each symbol table entry in the hash table, so the number is the same. DT_GNU_HASH regrettably does not. Instead, it's necessary to walk the buckets and chain structure to find the largest entry. DT_GNU_HASH doesn't appear in any "real" documentation that I'm aware of, other than the binutils code (at least as far as I know). Some more-and-less-useful references: - https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/ - https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ - http://deroko.phearless.org/dt_gnu_hash.txt - https://sourceware.org/ml/binutils/2006-10/msg00377.html Change-Id: I7cfc4372f29efc37446f0931d22a1f790e44076f Bug: crashpad:213, crashpad:196 Reviewed-on: https://chromium-review.googlesource.com/876879 Commit-Queue: Scott Graham <scottmg@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Joshua Peraza <jperaza@chromium.org>
2018-01-30 14:30:50 -08:00
if (!GetAddressFromDynamicArray(DT_SYMTAB, true, &symbol_table_address)) {
LOG(ERROR) << "no symbol table";
return false;
}
Read either DT_HASH or DT_GNU_HASH to determine the size of DT_SYMTAB Without the section headers for the symbol table, there's no direct way to calculate the number of entries in the table. DT_HASH and DT_GNU_HASH are auxiliary tables that are designed to make symbol lookup faster. DT_HASH is the original and is theoretically mandatory. DT_GNU_HASH is the new-and-improved, but is more complex. In practice, however, an Android build (at least vs. API 16) has only DT_HASH, and not DT_GNU_HASH, and a Fuchsia build has only DT_GNU_HASH but not DT_HASH. So, both are tried. This change does not actually use the data in these tables to improve the speed of symbol lookup, but instead only uses them to correctly terminate the linear search. DT_HASH contains the total number of symbols in the symbol table fairly directly because there is an entry for each symbol table entry in the hash table, so the number is the same. DT_GNU_HASH regrettably does not. Instead, it's necessary to walk the buckets and chain structure to find the largest entry. DT_GNU_HASH doesn't appear in any "real" documentation that I'm aware of, other than the binutils code (at least as far as I know). Some more-and-less-useful references: - https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/ - https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ - http://deroko.phearless.org/dt_gnu_hash.txt - https://sourceware.org/ml/binutils/2006-10/msg00377.html Change-Id: I7cfc4372f29efc37446f0931d22a1f790e44076f Bug: crashpad:213, crashpad:196 Reviewed-on: https://chromium-review.googlesource.com/876879 Commit-Queue: Scott Graham <scottmg@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Joshua Peraza <jperaza@chromium.org>
2018-01-30 14:30:50 -08:00
// Try both DT_HASH and DT_GNU_HASH. They're completely different, but both
// circuitously offer a way to find the number of entries in the symbol table.
// DT_HASH is specifically checked first, because depending on the linker, the
// count maybe be incorrect for zero-export cases. In practice, it is believed
// that the zero-export case is probably not particularly useful, so this
// incorrect count will only occur in constructed test cases (see
// ElfImageReader.DtHashAndDtGnuHashMatch).
VMSize number_of_symbol_table_entries;
if (!GetNumberOfSymbolEntriesFromDtHash(&number_of_symbol_table_entries) &&
!GetNumberOfSymbolEntriesFromDtGnuHash(&number_of_symbol_table_entries)) {
LOG(ERROR) << "could not retrieve number of symbol table entries";
return false;
}
symbol_table_.reset(new ElfSymbolTableReader(
&memory_, this, symbol_table_address, number_of_symbol_table_entries));
symbol_table_initialized_.set_valid();
return true;
}
bool ElfImageReader::GetAddressFromDynamicArray(uint64_t tag,
Read either DT_HASH or DT_GNU_HASH to determine the size of DT_SYMTAB Without the section headers for the symbol table, there's no direct way to calculate the number of entries in the table. DT_HASH and DT_GNU_HASH are auxiliary tables that are designed to make symbol lookup faster. DT_HASH is the original and is theoretically mandatory. DT_GNU_HASH is the new-and-improved, but is more complex. In practice, however, an Android build (at least vs. API 16) has only DT_HASH, and not DT_GNU_HASH, and a Fuchsia build has only DT_GNU_HASH but not DT_HASH. So, both are tried. This change does not actually use the data in these tables to improve the speed of symbol lookup, but instead only uses them to correctly terminate the linear search. DT_HASH contains the total number of symbols in the symbol table fairly directly because there is an entry for each symbol table entry in the hash table, so the number is the same. DT_GNU_HASH regrettably does not. Instead, it's necessary to walk the buckets and chain structure to find the largest entry. DT_GNU_HASH doesn't appear in any "real" documentation that I'm aware of, other than the binutils code (at least as far as I know). Some more-and-less-useful references: - https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/ - https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ - http://deroko.phearless.org/dt_gnu_hash.txt - https://sourceware.org/ml/binutils/2006-10/msg00377.html Change-Id: I7cfc4372f29efc37446f0931d22a1f790e44076f Bug: crashpad:213, crashpad:196 Reviewed-on: https://chromium-review.googlesource.com/876879 Commit-Queue: Scott Graham <scottmg@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Joshua Peraza <jperaza@chromium.org>
2018-01-30 14:30:50 -08:00
bool log,
VMAddress* address) {
Read either DT_HASH or DT_GNU_HASH to determine the size of DT_SYMTAB Without the section headers for the symbol table, there's no direct way to calculate the number of entries in the table. DT_HASH and DT_GNU_HASH are auxiliary tables that are designed to make symbol lookup faster. DT_HASH is the original and is theoretically mandatory. DT_GNU_HASH is the new-and-improved, but is more complex. In practice, however, an Android build (at least vs. API 16) has only DT_HASH, and not DT_GNU_HASH, and a Fuchsia build has only DT_GNU_HASH but not DT_HASH. So, both are tried. This change does not actually use the data in these tables to improve the speed of symbol lookup, but instead only uses them to correctly terminate the linear search. DT_HASH contains the total number of symbols in the symbol table fairly directly because there is an entry for each symbol table entry in the hash table, so the number is the same. DT_GNU_HASH regrettably does not. Instead, it's necessary to walk the buckets and chain structure to find the largest entry. DT_GNU_HASH doesn't appear in any "real" documentation that I'm aware of, other than the binutils code (at least as far as I know). Some more-and-less-useful references: - https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/ - https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ - http://deroko.phearless.org/dt_gnu_hash.txt - https://sourceware.org/ml/binutils/2006-10/msg00377.html Change-Id: I7cfc4372f29efc37446f0931d22a1f790e44076f Bug: crashpad:213, crashpad:196 Reviewed-on: https://chromium-review.googlesource.com/876879 Commit-Queue: Scott Graham <scottmg@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Joshua Peraza <jperaza@chromium.org>
2018-01-30 14:30:50 -08:00
if (!dynamic_array_->GetValue(tag, log, address)) {
return false;
}
Use BUILDFLAG for OS checking Use BUILDFLAG(IS_*) instead of defined(OS_*). This was generated mostly mechnically by performing the following steps: - sed -i '' -E -e 's/defined\(OS_/BUILDFLAG(IS_/g' \ -e 's%([ !])OS_([A-Z]+)%\1BUILDFLAG(IS_\2)%g' \ $(git grep -l 'OS_' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm') - sed -i '' -e 's/#ifdef BUILDFLAG(/#if BUILDFLAG(/' \ $(git grep -l '#ifdef BUILDFLAG(' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm') - gsed -i -z -E -e \ 's%(.*)#include "%\1#include "build/buildflag.h"\n#include "%' \ $(git grep -l 'BUILDFLAG(IS_' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm') - Spot checks to move #include "build/buildflag.h" to the correct parts of files. - sed -i '' -E -e \ 's%^(#include "build/buildflag.h")$%#include "build/build_config.h"\n\1%' \ $(grep -L '^#include "build/build_config.h"$' $(git grep -l 'BUILDFLAG(IS_' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm')) - Add “clang-format off” around tool usage messages. - git cl format - Update mini_chromium to 85ba51f98278 (intermediate step). TESTING ONLY). - for f in $(git grep -l '^#include "build/buildflag.h"$' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm'); do \ grep -v '^#include "build/buildflag.h"$' "${f}" > /tmp/z; \ cp /tmp/z "${f}"; done - git cl format - Update mini_chromium to 735143774c5f (intermediate step). - Update mini_chromium to f41420eb45fa (as checked in). - Update mini_chromium to 6e2f204b4ae1 (as checked in). For ease of review and inspection, each of these steps is uploaded as a new patch set in a review series. This includes an update of mini_chromium to 6e2f204b4ae1: f41420eb45fa Use BUILDFLAG for OS checking 6e2f204b4ae1 Include what you use: string_util.h uses build_config.h Bug: chromium:1234043 Change-Id: Ieef86186f094c64e59b853729737e36982f8cf69 Reviewed-on: https://chromium-review.googlesource.com/c/crashpad/crashpad/+/3400258 Reviewed-by: Joshua Peraza <jperaza@chromium.org> Commit-Queue: Mark Mentovai <mark@chromium.org>
2022-01-19 15:00:24 -05:00
#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA)
// The GNU loader updates the dynamic array according to the load bias.
// The Android and Fuchsia loaders only update the debug address.
if (tag != DT_DEBUG) {
*address += GetLoadBias();
}
Use BUILDFLAG for OS checking Use BUILDFLAG(IS_*) instead of defined(OS_*). This was generated mostly mechnically by performing the following steps: - sed -i '' -E -e 's/defined\(OS_/BUILDFLAG(IS_/g' \ -e 's%([ !])OS_([A-Z]+)%\1BUILDFLAG(IS_\2)%g' \ $(git grep -l 'OS_' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm') - sed -i '' -e 's/#ifdef BUILDFLAG(/#if BUILDFLAG(/' \ $(git grep -l '#ifdef BUILDFLAG(' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm') - gsed -i -z -E -e \ 's%(.*)#include "%\1#include "build/buildflag.h"\n#include "%' \ $(git grep -l 'BUILDFLAG(IS_' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm') - Spot checks to move #include "build/buildflag.h" to the correct parts of files. - sed -i '' -E -e \ 's%^(#include "build/buildflag.h")$%#include "build/build_config.h"\n\1%' \ $(grep -L '^#include "build/build_config.h"$' $(git grep -l 'BUILDFLAG(IS_' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm')) - Add “clang-format off” around tool usage messages. - git cl format - Update mini_chromium to 85ba51f98278 (intermediate step). TESTING ONLY). - for f in $(git grep -l '^#include "build/buildflag.h"$' '**/*.c' '**/*.cc' '**/*.h' '**/*.m' '**/*.mm'); do \ grep -v '^#include "build/buildflag.h"$' "${f}" > /tmp/z; \ cp /tmp/z "${f}"; done - git cl format - Update mini_chromium to 735143774c5f (intermediate step). - Update mini_chromium to f41420eb45fa (as checked in). - Update mini_chromium to 6e2f204b4ae1 (as checked in). For ease of review and inspection, each of these steps is uploaded as a new patch set in a review series. This includes an update of mini_chromium to 6e2f204b4ae1: f41420eb45fa Use BUILDFLAG for OS checking 6e2f204b4ae1 Include what you use: string_util.h uses build_config.h Bug: chromium:1234043 Change-Id: Ieef86186f094c64e59b853729737e36982f8cf69 Reviewed-on: https://chromium-review.googlesource.com/c/crashpad/crashpad/+/3400258 Reviewed-by: Joshua Peraza <jperaza@chromium.org> Commit-Queue: Mark Mentovai <mark@chromium.org>
2022-01-19 15:00:24 -05:00
#endif // BUILDFLAG(IS_ANDROID)
return true;
}
Read either DT_HASH or DT_GNU_HASH to determine the size of DT_SYMTAB Without the section headers for the symbol table, there's no direct way to calculate the number of entries in the table. DT_HASH and DT_GNU_HASH are auxiliary tables that are designed to make symbol lookup faster. DT_HASH is the original and is theoretically mandatory. DT_GNU_HASH is the new-and-improved, but is more complex. In practice, however, an Android build (at least vs. API 16) has only DT_HASH, and not DT_GNU_HASH, and a Fuchsia build has only DT_GNU_HASH but not DT_HASH. So, both are tried. This change does not actually use the data in these tables to improve the speed of symbol lookup, but instead only uses them to correctly terminate the linear search. DT_HASH contains the total number of symbols in the symbol table fairly directly because there is an entry for each symbol table entry in the hash table, so the number is the same. DT_GNU_HASH regrettably does not. Instead, it's necessary to walk the buckets and chain structure to find the largest entry. DT_GNU_HASH doesn't appear in any "real" documentation that I'm aware of, other than the binutils code (at least as far as I know). Some more-and-less-useful references: - https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/ - https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ - http://deroko.phearless.org/dt_gnu_hash.txt - https://sourceware.org/ml/binutils/2006-10/msg00377.html Change-Id: I7cfc4372f29efc37446f0931d22a1f790e44076f Bug: crashpad:213, crashpad:196 Reviewed-on: https://chromium-review.googlesource.com/876879 Commit-Queue: Scott Graham <scottmg@chromium.org> Reviewed-by: Mark Mentovai <mark@chromium.org> Reviewed-by: Joshua Peraza <jperaza@chromium.org>
2018-01-30 14:30:50 -08:00
bool ElfImageReader::GetNumberOfSymbolEntriesFromDtHash(
VMSize* number_of_symbol_table_entries) {
if (!InitializeDynamicArray()) {
return false;
}
VMAddress dt_hash_address;
if (!GetAddressFromDynamicArray(DT_HASH, false, &dt_hash_address)) {
return false;
}
struct {
uint32_t nbucket;
uint32_t nchain;
} header;
if (!memory_.Read(dt_hash_address, sizeof(header), &header)) {
LOG(ERROR) << "failed to read DT_HASH header";
return false;
}
*number_of_symbol_table_entries = header.nchain;
return true;
}
bool ElfImageReader::GetNumberOfSymbolEntriesFromDtGnuHash(
VMSize* number_of_symbol_table_entries) {
if (!InitializeDynamicArray()) {
return false;
}
VMAddress dt_gnu_hash_address;
if (!GetAddressFromDynamicArray(DT_GNU_HASH, false, &dt_gnu_hash_address)) {
return false;
}
// See https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ and
// https://sourceware.org/ml/binutils/2006-10/msg00377.html.
struct {
uint32_t nbuckets;
uint32_t symoffset;
uint32_t bloom_size;
uint32_t bloom_shift;
} header;
if (!memory_.Read(dt_gnu_hash_address, sizeof(header), &header)) {
LOG(ERROR) << "failed to read DT_GNU_HASH header";
return false;
}
std::vector<uint32_t> buckets(header.nbuckets);
const size_t kNumBytesForBuckets = sizeof(buckets[0]) * buckets.size();
const size_t kWordSize =
memory_.Is64Bit() ? sizeof(uint64_t) : sizeof(uint32_t);
const VMAddress buckets_address =
dt_gnu_hash_address + sizeof(header) + (kWordSize * header.bloom_size);
if (!memory_.Read(buckets_address, kNumBytesForBuckets, buckets.data())) {
LOG(ERROR) << "read buckets";
return false;
}
// Locate the chain that handles the largest index bucket.
uint32_t last_symbol = 0;
for (uint32_t i = 0; i < header.nbuckets; ++i) {
last_symbol = std::max(buckets[i], last_symbol);
}
if (last_symbol < header.symoffset) {
*number_of_symbol_table_entries = header.symoffset;
return true;
}
// Walk the bucket's chain to add the chain length to the total.
const VMAddress chains_base_address = buckets_address + kNumBytesForBuckets;
for (;;) {
uint32_t chain_entry;
if (!memory_.Read(chains_base_address + (last_symbol - header.symoffset) *
sizeof(chain_entry),
sizeof(chain_entry),
&chain_entry)) {
LOG(ERROR) << "read chain entry";
return false;
}
++last_symbol;
// If the low bit is set, this entry is the end of the chain.
if (chain_entry & 1)
break;
}
*number_of_symbol_table_entries = last_symbol;
return true;
}
std::unique_ptr<ElfImageReader::NoteReader> ElfImageReader::Notes(
size_t max_note_size) {
return std::make_unique<NoteReader>(
this, &memory_, program_headers_.get(), max_note_size);
}
std::unique_ptr<ElfImageReader::NoteReader>
ElfImageReader::NotesWithNameAndType(const std::string& name,
NoteReader::NoteType type,
size_t max_note_size) {
return std::make_unique<NoteReader>(
this, &memory_, program_headers_.get(), max_note_size, name, type, true);
}
const ProcessMemoryRange* ElfImageReader::Memory() const {
return &memory_;
}
} // namespace crashpad