From cc7d131d956143ac917635dfffa615bfa1d9b760 Mon Sep 17 00:00:00 2001 From: tqcq <99722391+tqcq@users.noreply.github.com> Date: Sun, 7 Apr 2024 02:42:12 +0000 Subject: [PATCH] feat add dispatcher --- 3party/libelfin/CMakeLists.txt | 24 + 3party/libelfin/Makefile | 14 + 3party/libelfin/dwarf/.gitignore | 7 + 3party/libelfin/dwarf/abbrev.cc | 176 ++ 3party/libelfin/dwarf/attrs.cc | 267 +++ 3party/libelfin/dwarf/cursor.cc | 207 +++ 3party/libelfin/dwarf/data.hh | 539 ++++++ 3party/libelfin/dwarf/die.cc | 202 +++ 3party/libelfin/dwarf/die_str_map.cc | 118 ++ 3party/libelfin/dwarf/dwarf++.hh | 1558 +++++++++++++++++ 3party/libelfin/dwarf/dwarf.cc | 366 ++++ 3party/libelfin/dwarf/elf.cc | 54 + 3party/libelfin/dwarf/expr.cc | 423 +++++ 3party/libelfin/dwarf/internal.hh | 297 ++++ 3party/libelfin/dwarf/line.cc | 438 +++++ 3party/libelfin/dwarf/rangelist.cc | 103 ++ 3party/libelfin/dwarf/small_vector.hh | 197 +++ 3party/libelfin/dwarf/value.cc | 336 ++++ 3party/libelfin/elf/.gitignore | 6 + 3party/libelfin/elf/common.hh | 109 ++ 3party/libelfin/elf/data.hh | 574 ++++++ 3party/libelfin/elf/elf++.hh | 454 +++++ 3party/libelfin/elf/elf.cc | 403 +++++ 3party/libelfin/elf/enum-print.py | 163 ++ 3party/libelfin/elf/mmap_loader.cc | 60 + 3party/libelfin/elf/to_hex.hh | 34 + CMakeLists.txt | 18 +- src/sled/debugging/symbolize.cc | 2 + src/sled/debugging/symbolize_elf.inc | 159 +- src/sled/debugging/symbolize_test.cc | 5 +- .../experimental/design_patterns/dispatcher.h | 59 + src/sled/uri_fuzz.cc | 1 + 32 files changed, 7240 insertions(+), 133 deletions(-) create mode 100644 3party/libelfin/CMakeLists.txt create mode 100644 3party/libelfin/Makefile create mode 100644 3party/libelfin/dwarf/.gitignore create mode 100644 3party/libelfin/dwarf/abbrev.cc create mode 100644 3party/libelfin/dwarf/attrs.cc create mode 100644 3party/libelfin/dwarf/cursor.cc create mode 100644 3party/libelfin/dwarf/data.hh create mode 100644 3party/libelfin/dwarf/die.cc create mode 100644 3party/libelfin/dwarf/die_str_map.cc create mode 100644 3party/libelfin/dwarf/dwarf++.hh create mode 100644 3party/libelfin/dwarf/dwarf.cc create mode 100644 3party/libelfin/dwarf/elf.cc create mode 100644 3party/libelfin/dwarf/expr.cc create mode 100644 3party/libelfin/dwarf/internal.hh create mode 100644 3party/libelfin/dwarf/line.cc create mode 100644 3party/libelfin/dwarf/rangelist.cc create mode 100644 3party/libelfin/dwarf/small_vector.hh create mode 100644 3party/libelfin/dwarf/value.cc create mode 100644 3party/libelfin/elf/.gitignore create mode 100644 3party/libelfin/elf/common.hh create mode 100644 3party/libelfin/elf/data.hh create mode 100644 3party/libelfin/elf/elf++.hh create mode 100644 3party/libelfin/elf/elf.cc create mode 100644 3party/libelfin/elf/enum-print.py create mode 100644 3party/libelfin/elf/mmap_loader.cc create mode 100644 3party/libelfin/elf/to_hex.hh create mode 100644 src/sled/experimental/design_patterns/dispatcher.h diff --git a/3party/libelfin/CMakeLists.txt b/3party/libelfin/CMakeLists.txt new file mode 100644 index 0000000..92f3e9b --- /dev/null +++ b/3party/libelfin/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.10) +project( + elf + VERSION 0.3.0 + LANGUAGES C CXX) + +add_library( + dwarf STATIC + dwarf/abbrev.cc + dwarf/attrs.cc + dwarf/cursor.cc + dwarf/die.cc + dwarf/dwarf.cc + dwarf/elf.cc + dwarf/expr.cc + dwarf/line.cc + dwarf/rangelist.cc + dwarf/value.cc + dwarf/to_string.cc) +target_include_directories(dwarf PUBLIC dwarf/) + +add_library(elf STATIC elf/elf.cc elf/to_string.cc elf/mmap_loader.cc) +target_include_directories(elf PUBLIC elf/) +target_link_libraries(elf PUBLIC dwarf) diff --git a/3party/libelfin/Makefile b/3party/libelfin/Makefile new file mode 100644 index 0000000..30c8a14 --- /dev/null +++ b/3party/libelfin/Makefile @@ -0,0 +1,14 @@ +all: + $(MAKE) -C elf + $(MAKE) -C dwarf + +install: + $(MAKE) -C elf install + $(MAKE) -C dwarf install + +clean: + $(MAKE) -C elf clean + $(MAKE) -C dwarf clean + +check: + cd test && ./test.sh diff --git a/3party/libelfin/dwarf/.gitignore b/3party/libelfin/dwarf/.gitignore new file mode 100644 index 0000000..1049e91 --- /dev/null +++ b/3party/libelfin/dwarf/.gitignore @@ -0,0 +1,7 @@ +*.o +to_string.cc +libdwarf++.a +libdwarf++.so +libdwarf++.so.* +libdwarf++.pc +/doc/ diff --git a/3party/libelfin/dwarf/abbrev.cc b/3party/libelfin/dwarf/abbrev.cc new file mode 100644 index 0000000..f77dc0c --- /dev/null +++ b/3party/libelfin/dwarf/abbrev.cc @@ -0,0 +1,176 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "internal.hh" + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +static value::type +resolve_type(DW_AT name, DW_FORM form) +{ + switch (form) { + case DW_FORM::addr: + return value::type::address; + + case DW_FORM::block: + case DW_FORM::block1: + case DW_FORM::block2: + case DW_FORM::block4: + // Prior to DWARF 4, exprlocs didn't have their own + // form and were represented as blocks. + // XXX Should this be predicated on version? + switch (name) { + case DW_AT::location: + case DW_AT::byte_size: + case DW_AT::bit_offset: + case DW_AT::bit_size: + case DW_AT::string_length: + case DW_AT::lower_bound: + case DW_AT::return_addr: + case DW_AT::bit_stride: + case DW_AT::upper_bound: + case DW_AT::count: + case DW_AT::data_member_location: + case DW_AT::frame_base: + case DW_AT::segment: + case DW_AT::static_link: + case DW_AT::use_location: + case DW_AT::vtable_elem_location: + case DW_AT::allocated: + case DW_AT::associated: + case DW_AT::data_location: + case DW_AT::byte_stride: + return value::type::exprloc; + default: + return value::type::block; + } + + case DW_FORM::data4: + case DW_FORM::data8: + // Prior to DWARF 4, section offsets didn't have their + // own form and were represented as data4 or data8. + // DWARF 3 clarified that types that accepted both + // constants and section offsets were to treat data4 + // and data8 as section offsets and other constant + // forms as constants. + // XXX Should this be predicated on version? + switch (name) { + case DW_AT::location: + case DW_AT::stmt_list: + case DW_AT::string_length: + case DW_AT::return_addr: + case DW_AT::start_scope: + case DW_AT::data_member_location: + case DW_AT::frame_base: + case DW_AT::macro_info: + case DW_AT::segment: + case DW_AT::static_link: + case DW_AT::use_location: + case DW_AT::vtable_elem_location: + case DW_AT::ranges: + goto sec_offset; + default: + // Fall through + break; + } + case DW_FORM::data1: + case DW_FORM::data2: + return value::type::constant; + case DW_FORM::udata: + return value::type::uconstant; + case DW_FORM::sdata: + return value::type::sconstant; + + case DW_FORM::exprloc: + return value::type::exprloc; + + case DW_FORM::flag: + case DW_FORM::flag_present: + return value::type::flag; + + case DW_FORM::ref1: + case DW_FORM::ref2: + case DW_FORM::ref4: + case DW_FORM::ref8: + case DW_FORM::ref_addr: + case DW_FORM::ref_sig8: + case DW_FORM::ref_udata: + return value::type::reference; + + case DW_FORM::string: + case DW_FORM::strp: + return value::type::string; + + case DW_FORM::indirect: + // There's nothing meaningful we can do + return value::type::invalid; + + case DW_FORM::sec_offset: + sec_offset: + // The type of this form depends on the attribute + switch (name) { + case DW_AT::stmt_list: + return value::type::line; + + case DW_AT::location: + case DW_AT::string_length: + case DW_AT::return_addr: + case DW_AT::data_member_location: + case DW_AT::frame_base: + case DW_AT::segment: + case DW_AT::static_link: + case DW_AT::use_location: + case DW_AT::vtable_elem_location: + return value::type::loclist; + + case DW_AT::macro_info: + return value::type::mac; + + case DW_AT::start_scope: + case DW_AT::ranges: + return value::type::rangelist; + + case DW_AT::lo_user...DW_AT::hi_user: + //HACK: ignore vendor extensions + return value::type::invalid; + + default: + throw format_error("DW_FORM_sec_offset not expected for attribute " + + to_string(name)); + } + } + throw format_error("unknown attribute form " + to_string(form)); +} + +attribute_spec::attribute_spec(DW_AT name, DW_FORM form) + : name(name), form(form), type(resolve_type(name, form)) +{ +} + +bool +abbrev_entry::read(cursor *cur) +{ + attributes.clear(); + + // Section 7.5.3 + code = cur->uleb128(); + if (!code) + return false; + + tag = (DW_TAG)cur->uleb128(); + children = cur->fixed() == DW_CHILDREN::yes; + while (1) { + DW_AT name = (DW_AT)cur->uleb128(); + DW_FORM form = (DW_FORM)cur->uleb128(); + if (name == (DW_AT)0 && form == (DW_FORM)0) + break; + attributes.push_back(attribute_spec(name, form)); + } + attributes.shrink_to_fit(); + return true; +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/dwarf/attrs.cc b/3party/libelfin/dwarf/attrs.cc new file mode 100644 index 0000000..dc32f1f --- /dev/null +++ b/3party/libelfin/dwarf/attrs.cc @@ -0,0 +1,267 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "dwarf++.hh" + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +#define AT_ANY(name) \ + value at_##name(const die &d) \ + { \ + return d[DW_AT::name]; \ + } \ + static_assert(true, "") + +#define AT_ADDRESS(name) \ + taddr at_##name(const die &d) \ + { \ + return d[DW_AT::name].as_address(); \ + } \ + static_assert(true, "") + +#define AT_ENUM(name, type) \ + type at_##name(const die &d) \ + { \ + return (type)d[DW_AT::name].as_uconstant(); \ + } \ + static_assert(true, "") + +#define AT_FLAG(name) \ + bool at_##name(const die &d) \ + { \ + return d[DW_AT::name].as_flag(); \ + } \ + static_assert(true, "") + +#define AT_FLAG_(name) \ + bool at_##name(const die &d) \ + { \ + return d[DW_AT::name##_].as_flag(); \ + } \ + static_assert(true, "") + +#define AT_REFERENCE(name) \ + die at_##name(const die &d) \ + { \ + return d[DW_AT::name].as_reference(); \ + } \ + static_assert(true, "") + +#define AT_STRING(name) \ + string at_##name(const die &d) \ + { \ + return d[DW_AT::name].as_string(); \ + } \ + static_assert(true, "") + +#define AT_UDYNAMIC(name) \ + uint64_t at_##name(const die &d, expr_context *ctx) \ + { \ + return _at_udynamic(DW_AT::name, d, ctx); \ + } \ + static_assert(true, "") + +static uint64_t _at_udynamic(DW_AT attr, const die &d, expr_context *ctx, int depth = 0) +{ + // DWARF4 section 2.19 + if (depth > 16) + throw format_error("reference depth exceeded for " + to_string(attr)); + + value v(d[attr]); + switch (v.get_type()) { + case value::type::constant: + case value::type::uconstant: + return v.as_uconstant(); + case value::type::reference: + return _at_udynamic(attr, v.as_reference(), ctx, depth + 1); + case value::type::exprloc: + return v.as_exprloc().evaluate(ctx).value; + default: + throw format_error(to_string(attr) + " has unexpected type " + + to_string(v.get_type())); + } +} + +////////////////////////////////////////////////////////////////// +// 0x0X +// + +AT_REFERENCE(sibling); +// XXX location +AT_STRING(name); +AT_ENUM(ordering, DW_ORD); +AT_UDYNAMIC(byte_size); +AT_UDYNAMIC(bit_offset); +AT_UDYNAMIC(bit_size); + +////////////////////////////////////////////////////////////////// +// 0x1X +// + +// XXX stmt_list +AT_ADDRESS(low_pc); +taddr +at_high_pc(const die &d) +{ + value v(d[DW_AT::high_pc]); + switch (v.get_type()) { + case value::type::address: + return v.as_address(); + case value::type::constant: + case value::type::uconstant: + return at_low_pc(d) + v.as_uconstant(); + default: + throw format_error(to_string(DW_AT::high_pc) + " has unexpected type " + + to_string(v.get_type())); + } +} +AT_ENUM(language, DW_LANG); +AT_REFERENCE(discr); +AT_ANY(discr_value); // XXX Signed or unsigned +AT_ENUM(visibility, DW_VIS); +AT_REFERENCE(import); +// XXX string_length +AT_REFERENCE(common_reference); +AT_STRING(comp_dir); +AT_ANY(const_value); +AT_REFERENCE(containing_type); +// XXX default_value + +////////////////////////////////////////////////////////////////// +// 0x2X +// + +DW_INL at_inline(const die &d) +{ + // XXX Missing attribute is equivalent to DW_INL_not_inlined + // (DWARF4 section 3.3.8) + return (DW_INL)d[DW_AT::inline_].as_uconstant(); +} +AT_FLAG(is_optional); +AT_UDYNAMIC(lower_bound); // XXX Language-based default? +AT_STRING(producer); +AT_FLAG(prototyped); +// XXX return_addr +// XXX start_scope +AT_UDYNAMIC(bit_stride); +AT_UDYNAMIC(upper_bound); + +////////////////////////////////////////////////////////////////// +// 0x3X +// + +AT_REFERENCE(abstract_origin); +AT_ENUM(accessibility, DW_ACCESS); +// XXX const address_class +AT_FLAG(artificial); +// XXX base_types +AT_ENUM(calling_convention, DW_CC); +AT_UDYNAMIC(count); +expr_result +at_data_member_location(const die &d, expr_context *ctx, taddr base, taddr pc) +{ + value v(d[DW_AT::data_member_location]); + switch (v.get_type()) { + case value::type::constant: + case value::type::uconstant: + return {expr_result::type::address, base + v.as_uconstant()}; + case value::type::exprloc: + return v.as_exprloc().evaluate(ctx, base); + case value::type::loclist: + // XXX + throw std::runtime_error("not implemented"); + default: + throw format_error("DW_AT_data_member_location has unexpected type " + + to_string(v.get_type())); + } +} +// XXX decl_column decl_file decl_line +AT_FLAG(declaration); +// XXX discr_list +AT_ENUM(encoding, DW_ATE); +AT_FLAG(external); + +////////////////////////////////////////////////////////////////// +// 0x4X +// + +// XXX frame_base +die at_friend(const die &d) +{ + return d[DW_AT::friend_].as_reference(); +} +AT_ENUM(identifier_case, DW_ID); +// XXX macro_info +AT_REFERENCE(namelist_item); +AT_REFERENCE(priority); // XXX Computed might be useful +// XXX segment +AT_REFERENCE(specification); +// XXX static_link +AT_REFERENCE(type); +// XXX use_location +AT_FLAG(variable_parameter); +// XXX 7.11 The value DW_VIRTUALITY_none is equivalent to the absence +// of the DW_AT_virtuality attribute. +AT_ENUM(virtuality, DW_VIRTUALITY); +// XXX vtable_elem_location +AT_UDYNAMIC(allocated); +AT_UDYNAMIC(associated); + +////////////////////////////////////////////////////////////////// +// 0x5X +// + +// XXX data_location +AT_UDYNAMIC(byte_stride); +AT_ADDRESS(entry_pc); +AT_FLAG(use_UTF8); +AT_REFERENCE(extension); +rangelist +at_ranges(const die &d) +{ + return d[DW_AT::ranges].as_rangelist(); +} +// XXX trampoline +// XXX const call_column, call_file, call_line +AT_STRING(description); +// XXX const binary_scale +// XXX const decimal_scale +AT_REFERENCE(small); +// XXX const decimal_sign +// XXX const digit_count + +////////////////////////////////////////////////////////////////// +// 0x6X +// + +AT_STRING(picture_string); +AT_FLAG_(mutable); +AT_FLAG(threads_scaled); +AT_FLAG_(explicit); +AT_REFERENCE(object_pointer); +AT_ENUM(endianity, DW_END); +AT_FLAG(elemental); +AT_FLAG(pure); +AT_FLAG(recursive); +AT_REFERENCE(signature); // XXX Computed might be useful +AT_FLAG(main_subprogram); +// XXX const data_bit_offset +AT_FLAG(const_expr); +AT_FLAG(enum_class); +AT_STRING(linkage_name); + +rangelist +die_pc_range(const die &d) +{ + // DWARF4 section 2.17 + if (d.has(DW_AT::ranges)) + return at_ranges(d); + taddr low = at_low_pc(d); + taddr high = d.has(DW_AT::high_pc) ? at_high_pc(d) : (low + 1); + return rangelist({{low, high}}); +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/dwarf/cursor.cc b/3party/libelfin/dwarf/cursor.cc new file mode 100644 index 0000000..22b28b1 --- /dev/null +++ b/3party/libelfin/dwarf/cursor.cc @@ -0,0 +1,207 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "internal.hh" + +#include +#include + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +int64_t +cursor::sleb128() +{ + // Appendix C + uint64_t result = 0; + unsigned shift = 0; + while (pos < sec->end) { + uint8_t byte = *(uint8_t*)(pos++); + result |= (uint64_t)(byte & 0x7f) << shift; + shift += 7; + if ((byte & 0x80) == 0) { + if (shift < sizeof(result)*8 && (byte & 0x40)) + result |= -((uint64_t)1 << shift); + return result; + } + } + underflow(); + return 0; +} + +shared_ptr
+cursor::subsection() +{ + // Section 7.4 + const char *begin = pos; + section_length length = fixed(); + format fmt; + if (length < 0xfffffff0) { + fmt = format::dwarf32; + length += sizeof(uword); + } else if (length == 0xffffffff) { + length = fixed(); + fmt = format::dwarf64; + length += sizeof(uword) + sizeof(uint64_t); + } else { + throw format_error("initial length has reserved value"); + } + pos = begin + length; + return make_shared
(sec->type, begin, length, sec->ord, fmt); +} + +void +cursor::skip_initial_length() +{ + switch (sec->fmt) { + case format::dwarf32: + pos += sizeof(uword); + break; + case format::dwarf64: + pos += sizeof(uword) + sizeof(uint64_t); + break; + default: + throw logic_error("cannot skip initial length with unknown format"); + } +} + +void +cursor::skip_unit_type() +{ + pos += sizeof(sbyte); +} + +section_offset +cursor::offset() +{ + switch (sec->fmt) { + case format::dwarf32: + return fixed(); + case format::dwarf64: + return fixed(); + default: + throw logic_error("cannot read offset with unknown format"); + } +} + +void +cursor::string(std::string &out) +{ + size_t size; + const char *p = this->cstr(&size); + out.resize(size); + memmove(&out.front(), p, size); +} + +const char * +cursor::cstr(size_t *size_out) +{ + // Scan string size + const char *p = pos; + while (pos < sec->end && *pos) + pos++; + if (pos == sec->end) + throw format_error("unterminated string"); + if (size_out) + *size_out = pos - p; + pos++; + return p; +} + +void +cursor::skip_form(DW_FORM form) +{ + section_offset tmp; + + // Section 7.5.4 + switch (form) { + case DW_FORM::addr: + pos += sec->addr_size; + break; + case DW_FORM::sec_offset: + case DW_FORM::ref_addr: + case DW_FORM::strp: + switch (sec->fmt) { + case format::dwarf32: + pos += 4; + break; + case format::dwarf64: + pos += 8; + break; + case format::unknown: + throw logic_error("cannot read form with unknown format"); + } + break; + + // size+data forms + case DW_FORM::block1: + tmp = fixed(); + pos += tmp; + break; + case DW_FORM::block2: + tmp = fixed(); + pos += tmp; + break; + case DW_FORM::block4: + tmp = fixed(); + pos += tmp; + break; + case DW_FORM::block: + case DW_FORM::exprloc: + tmp = uleb128(); + pos += tmp; + break; + + // fixed-length forms + case DW_FORM::flag_present: + break; + case DW_FORM::flag: + case DW_FORM::data1: + case DW_FORM::ref1: + pos += 1; + break; + case DW_FORM::data2: + case DW_FORM::ref2: + pos += 2; + break; + case DW_FORM::data4: + case DW_FORM::ref4: + pos += 4; + break; + case DW_FORM::data8: + case DW_FORM::ref_sig8: + pos += 8; + break; + + // variable-length forms + case DW_FORM::sdata: + case DW_FORM::udata: + case DW_FORM::ref_udata: + while (pos < sec->end && (*(uint8_t*)pos & 0x80)) + pos++; + pos++; + break; + case DW_FORM::string: + while (pos < sec->end && *pos) + pos++; + pos++; + break; + + case DW_FORM::indirect: + skip_form((DW_FORM)uleb128()); + break; + + default: + throw format_error("unknown form " + to_string(form)); + } +} + +void +cursor::underflow() +{ + throw underflow_error("cannot read past end of DWARF section"); +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/dwarf/data.hh b/3party/libelfin/dwarf/data.hh new file mode 100644 index 0000000..a9cd883 --- /dev/null +++ b/3party/libelfin/dwarf/data.hh @@ -0,0 +1,539 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#ifndef _DWARFPP_DW_HH_ +#define _DWARFPP_DW_HH_ + +#include +#include + +DWARFPP_BEGIN_NAMESPACE + +// Integer representations (Section 7.26) +typedef std::int8_t sbyte; +typedef std::uint8_t ubyte; +typedef std::uint16_t uhalf; +typedef std::uint32_t uword; + +// Section offsets and lengths +typedef std::uint64_t section_offset; +typedef std::uint64_t section_length; + +// A target machine address. Targets may use smaller addresses; this +// represents the largest supported address type. +typedef std::uint64_t taddr; + +// DIE tags (Section 7, figure 18). typedef, friend, and namespace +// have a trailing underscore because they are reserved words. +enum class DW_TAG { + array_type = 0x01, + class_type = 0x02, + entry_point = 0x03, + enumeration_type = 0x04, + formal_parameter = 0x05, + imported_declaration = 0x08, + label = 0x0a, + lexical_block = 0x0b, + member = 0x0d, + pointer_type = 0x0f, + reference_type = 0x10, + compile_unit = 0x11, + string_type = 0x12, + structure_type = 0x13, + subroutine_type = 0x15, + typedef_ = 0x16, + + union_type = 0x17, + unspecified_parameters = 0x18, + variant = 0x19, + common_block = 0x1a, + common_inclusion = 0x1b, + inheritance = 0x1c, + inlined_subroutine = 0x1d, + module = 0x1e, + ptr_to_member_type = 0x1f, + set_type = 0x20, + subrange_type = 0x21, + with_stmt = 0x22, + access_declaration = 0x23, + base_type = 0x24, + catch_block = 0x25, + const_type = 0x26, + constant = 0x27, + enumerator = 0x28, + file_type = 0x29, + friend_ = 0x2a, + + namelist = 0x2b, + namelist_item = 0x2c, + packed_type = 0x2d, + subprogram = 0x2e, + template_type_parameter = 0x2f, + template_value_parameter = 0x30, + thrown_type = 0x31, + try_block = 0x32, + variant_part = 0x33, + variable = 0x34, + volatile_type = 0x35, + dwarf_procedure = 0x36, + restrict_type = 0x37, + interface_type = 0x38, + namespace_ = 0x39, + imported_module = 0x3a, + unspecified_type = 0x3b, + partial_unit = 0x3c, + imported_unit = 0x3d, + condition = 0x3f, + + shared_type = 0x40, + type_unit = 0x41, + rvalue_reference_type = 0x42, + template_alias = 0x43, + lo_user = 0x4080, + hi_user = 0xffff, +}; + +std::string to_string(DW_TAG v); + +// Child determination (Section 7, figure 19). +enum class DW_CHILDREN : ubyte { + no = 0x00, + yes = 0x01, +}; + +std::string to_string(DW_CHILDREN v); + +// Attribute names (Section 7, figure 20). inline, friend, mutable, +// and explicit have a trailing underscore because they are reserved +// words. +enum class DW_AT { + sibling = 0x01,// reference + location = 0x02,// exprloc, loclistptr + name = 0x03,// string + ordering = 0x09,// constant + byte_size = 0x0b,// constant, exprloc, reference + bit_offset = 0x0c,// constant, exprloc, reference + bit_size = 0x0d,// constant, exprloc, reference + stmt_list = 0x10,// lineptr + low_pc = 0x11,// address + high_pc = 0x12,// address, constant + language = 0x13,// constant + discr = 0x15,// reference + discr_value = 0x16,// constant + visibility = 0x17,// constant + import = 0x18,// reference + string_length = 0x19,// exprloc, loclistptr + common_reference = 0x1a,// reference + comp_dir = 0x1b,// string + const_value = 0x1c,// block, constant, string + + containing_type = 0x1d,// reference + default_value = 0x1e,// reference + inline_ = 0x20,// constant + is_optional = 0x21,// flag + lower_bound = 0x22,// constant, exprloc, reference + producer = 0x25,// string + prototyped = 0x27,// flag + return_addr = 0x2a,// exprloc, loclistptr + start_scope = 0x2c,// constant, rangelistptr + bit_stride = 0x2e,// constant, exprloc, reference + upper_bound = 0x2f,// constant, exprloc, reference + abstract_origin = 0x31,// reference + accessibility = 0x32,// constant + address_class = 0x33,// constant + artificial = 0x34,// flag + base_types = 0x35,// reference + calling_convention = 0x36,// constant + count = 0x37,// constant, exprloc, reference + data_member_location = 0x38,// constant, exprloc, loclistptr + decl_column = 0x39,// constant + + decl_file = 0x3a,// constant + decl_line = 0x3b,// constant + declaration = 0x3c,// flag + discr_list = 0x3d,// block + encoding = 0x3e,// constant + external = 0x3f,// flag + frame_base = 0x40,// exprloc, loclistptr + friend_ = 0x41,// reference + identifier_case = 0x42,// constant + macro_info = 0x43,// macptr + namelist_item = 0x44,// reference + priority = 0x45,// reference + segment = 0x46,// exprloc, loclistptr + specification = 0x47,// reference + static_link = 0x48,// exprloc, loclistptr + type = 0x49,// reference + use_location = 0x4a,// exprloc, loclistptr + variable_parameter = 0x4b,// flag + virtuality = 0x4c,// constant + vtable_elem_location = 0x4d,// exprloc, loclistptr + + // DWARF 3 + allocated = 0x4e,// constant, exprloc, reference + associated = 0x4f,// constant, exprloc, reference + data_location = 0x50,// exprloc + byte_stride = 0x51,// constant, exprloc, reference + entry_pc = 0x52,// address + use_UTF8 = 0x53,// flag + extension = 0x54,// reference + ranges = 0x55,// rangelistptr + trampoline = 0x56,// address, flag, reference, string + call_column = 0x57,// constant + call_file = 0x58,// constant + call_line = 0x59,// constant + description = 0x5a,// string + binary_scale = 0x5b,// constant + decimal_scale = 0x5c,// constant + small = 0x5d,// reference + decimal_sign = 0x5e,// constant + digit_count = 0x5f,// constant + picture_string = 0x60,// string + mutable_ = 0x61,// flag + + threads_scaled = 0x62,// flag + explicit_ = 0x63,// flag + object_pointer = 0x64,// reference + endianity = 0x65,// constant + elemental = 0x66,// flag + pure = 0x67,// flag + recursive = 0x68,// flag + + // DWARF 4 + signature = 0x69,// reference + main_subprogram = 0x6a,// flag + data_bit_offset = 0x6b,// constant + const_expr = 0x6c,// flag + enum_class = 0x6d,// flag + linkage_name = 0x6e,// string + + lo_user = 0x2000, + hi_user = 0x3fff, +}; + +std::string to_string(DW_AT v); + +// Attribute form encodings (Section 7, figure 21) +enum class DW_FORM { + addr = 0x01,// address + block2 = 0x03,// block + block4 = 0x04,// block + data2 = 0x05,// constant + data4 = 0x06,// constant + data8 = 0x07,// constant + string = 0x08,// string + block = 0x09,// block + block1 = 0x0a,// block + data1 = 0x0b,// constant + flag = 0x0c,// flag + sdata = 0x0d,// constant + strp = 0x0e,// string + udata = 0x0f,// constant + ref_addr = 0x10,// reference + ref1 = 0x11,// reference + ref2 = 0x12,// reference + ref4 = 0x13,// reference + ref8 = 0x14,// reference + + ref_udata = 0x15,// reference + indirect = 0x16,// (Section 7.5.3) + + // DWARF 4 + sec_offset = 0x17,// lineptr, loclistptr, macptr, rangelistptr + exprloc = 0x18,// exprloc + flag_present = 0x19,// flag + ref_sig8 = 0x20,// reference + implicit_const = 0x21, + loclistx = 0x22, + rnglistx = 0x23, + ref_sup8 = 0x24, + strx1 = 0x25, + strx2 = 0x26, + strx3 = 0x27, + strx4 = 0x28, + addrx1 = 0x29, + addrx2 = 0x2a, + addrx4 = 0x2c, + addrx3 = 0x2b, +}; + +std::string to_string(DW_FORM v); + +// DWARF operation encodings (Section 7.7.1 and figure 24) +enum class DW_OP : ubyte { + addr = 0x03,// [constant address (size target specific)] + deref = 0x06, + + const1u = 0x08,// [1-byte constant] + const1s = 0x09,// [1-byte constant] + const2u = 0x0a,// [2-byte constant] + const2s = 0x0b,// [2-byte constant] + const4u = 0x0c,// [4-byte constant] + const4s = 0x0d,// [4-byte constant] + const8u = 0x0e,// [8-byte constant] + const8s = 0x0f,// [8-byte constant] + constu = 0x10,// [ULEB128 constant] + consts = 0x11,// [SLEB128 constant] + dup = 0x12, + drop = 0x13, + over = 0x14, + pick = 0x15,// [1-byte stack index] + swap = 0x16, + rot = 0x17, + xderef = 0x18, + abs = 0x19, + and_ = 0x1a, + div = 0x1b, + + minus = 0x1c, + mod = 0x1d, + mul = 0x1e, + neg = 0x1f, + not_ = 0x20, + or_ = 0x21, + plus = 0x22, + plus_uconst = 0x23,// [ULEB128 addend] + shl = 0x24, + shr = 0x25, + shra = 0x26, + xor_ = 0x27, + skip = 0x2f,// [signed 2-byte constant] + bra = 0x28,// [signed 2-byte constant] + eq = 0x29, + ge = 0x2a, + gt = 0x2b, + le = 0x2c, + lt = 0x2d, + ne = 0x2e, + + // Literals 0..31 = (lit0 + literal) + lit0 = 0x30, + lit31 = 0x4f, + + // Registers 0..31 = (reg0 + regnum) + reg0 = 0x50, + reg31 = 0x6f, + + // Base register 0..31 = (breg0 + regnum) + breg0 = 0x70,// [SLEB128 offset] + breg31 = 0x8f,// [SLEB128 offset] + + regx = 0x90,// [ULEB128 register] + fbreg = 0x91,// [SLEB128 offset] + bregx = 0x92,// [ULEB128 register, SLEB128 offset] + piece = 0x93,// [ULEB128 size of piece addressed] + deref_size = 0x94,// [1-byte size of data retrieved] + xderef_size = 0x95,// [1-byte size of data retrieved] + nop = 0x96, + + // DWARF 3 + push_object_address = 0x97, + call2 = 0x98,// [2-byte offset of DIE] + call4 = 0x99,// [4-byte offset of DIE] + call_ref = 0x9a,// [4- or 8-byte offset of DIE] + form_tls_address = 0x9b, + call_frame_cfa = 0x9c, + bit_piece = 0x9d,// [ULEB128 size, ULEB128 offset] + + // DWARF 4 + implicit_value = 0x9e,// [ULEB128 size, block of that size] + stack_value = 0x9f, + + lo_user = 0xe0, + hi_user = 0xff, +}; + +std::string to_string(DW_OP v); + +// DW_AT::encoding constants (DWARF4 section 7.8 figure 25) +enum class DW_ATE { + address = 0x01, + boolean = 0x02, + complex_float = 0x03, + float_ = 0x04, + signed_ = 0x05, + signed_char = 0x06, + unsigned_ = 0x07, + unsigned_char = 0x08, + imaginary_float = 0x09, + packed_decimal = 0x0a, + numeric_string = 0x0b, + edited = 0x0c, + signed_fixed = 0x0d, + unsigned_fixed = 0x0e, + decimal_float = 0x0f, + + // DWARF 4 + UTF = 0x10, + + lo_user = 0x80, + hi_user = 0xff, +}; + +std::string to_string(DW_ATE v); + +// DW_AT::decimal_sign constants (DWARF4 section 7.8 figure 26) +enum class DW_DS { + unsigned_ = 0x01, + leading_overpunch = 0x02, + trailing_overpunch = 0x03, + leading_separate = 0x04, + trailing_separate = 0x05, +}; + +std::string to_string(DW_DS v); + +// DW_AT::endianity constants (DWARF4 section 7.8 figure 27) +enum class DW_END { + default_ = 0x00, + big = 0x01, + little = 0x02, + lo_user = 0x40, + hi_user = 0xff, +}; + +std::string to_string(DW_END v); + +// DW_AT::accessibility constants (DWARF4 section 7.9 figure 28) +enum class DW_ACCESS { + public_ = 0x01, + protected_ = 0x02, + private_ = 0x03, +}; + +std::string to_string(DW_ACCESS v); + +// DW_AT::visibility constants (DWARF4 section 7.10 figure 29) +enum class DW_VIS { + local = 0x01, + exported = 0x02, + qualified = 0x03, +}; + +std::string to_string(DW_VIS v); + +// DW_AT::virtuality constants (DWARF4 section 7.11 figure 30) +enum class DW_VIRTUALITY { + none = 0x00, + virtual_ = 0x01, + pure_virtual = 0x02, +}; + +std::string to_string(DW_VIRTUALITY v); + +// DW_AT::language constants (DWARF4 section 7.12 figure 31) +enum class DW_LANG { + C89 = 0x0001,// Lower bound 0 + C = 0x0002,// Lower bound 0 + Ada83 = 0x0003,// Lower bound 1 + C_plus_plus = 0x0004,// Lower bound 0 + Cobol74 = 0x0005,// Lower bound 1 + Cobol85 = 0x0006,// Lower bound 1 + Fortran77 = 0x0007,// Lower bound 1 + Fortran90 = 0x0008,// Lower bound 1 + Pascal83 = 0x0009,// Lower bound 1 + Modula2 = 0x000a,// Lower bound 1 + Java = 0x000b,// Lower bound 0 + C99 = 0x000c,// Lower bound 0 + Ada95 = 0x000d,// Lower bound 1 + Fortran95 = 0x000e,// Lower bound 1 + PLI = 0x000f,// Lower bound 1 + + ObjC = 0x0010,// Lower bound 0 + ObjC_plus_plus = 0x0011,// Lower bound 0 + UPC = 0x0012,// Lower bound 0 + D = 0x0013,// Lower bound 0 + Python = 0x0014,// Lower bound 0 + lo_user = 0x8000, + hi_user = 0xffff, +}; + +std::string to_string(DW_LANG v); + +// DW_AT::identifier_case constants (DWARF4 section 7.14 figure 32) +enum class DW_ID { + case_sensitive = 0x00, + up_case = 0x01, + down_case = 0x02, + case_insensitive = 0x03, +}; + +std::string to_string(DW_ID v); + +// DW_AT::calling_convention constants (DWARF4 section 7.15 figure 33) +enum class DW_CC { + normal = 0x01, + program = 0x02, + nocall = 0x03, + lo_user = 0x40, + hi_user = 0xff, +}; + +std::string to_string(DW_CC v); + +// DW_AT::inline constants (DWARF4 section 7.16 figure 34) +enum class DW_INL { + not_inlined = 0x00, + inlined = 0x01, + declared_not_inlined = 0x02, + declared_inlined = 0x03, +}; + +std::string to_string(DW_INL v); + +// DW_AT::ordering constants (DWARF4 section 7.17 figure 35) +enum class DW_ORD { + row_major = 0x00, + col_major = 0x01, +}; + +std::string to_string(DW_ORD v); + +// DW_AT::discr_list constants (DWARF4 section 7.18 figure 36) +enum class DW_DSC { + label = 0x00, + range = 0x01, +}; + +std::string to_string(DW_DSC v); + +// Line number standard opcodes (DWARF4 section 7.21 figure 37) +enum class DW_LNS { + copy = 0x01, + advance_pc = 0x02, + advance_line = 0x03, + set_file = 0x04, + set_column = 0x05, + negate_stmt = 0x06, + set_basic_block = 0x07, + const_add_pc = 0x08, + fixed_advance_pc = 0x09, + + // DWARF 3 + set_prologue_end = 0x0a, + set_epilogue_begin = 0x0b, + set_isa = 0x0c, +}; + +std::string to_string(DW_LNS v); + +// Line number extended opcodes (DWARF4 section 7.21 figure 38) +enum class DW_LNE { + end_sequence = 0x01, + set_address = 0x02, + define_file = 0x03, + + // DWARF 4 + set_discriminator = 0x04, + + // DWARF 3 + lo_user = 0x80, + hi_user = 0xff, +}; + +std::string to_string(DW_LNE v); + +DWARFPP_END_NAMESPACE + +#endif diff --git a/3party/libelfin/dwarf/die.cc b/3party/libelfin/dwarf/die.cc new file mode 100644 index 0000000..a87c018 --- /dev/null +++ b/3party/libelfin/dwarf/die.cc @@ -0,0 +1,202 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "internal.hh" + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +die::die(const unit *cu) + : cu(cu), abbrev(nullptr) +{ +} + +const unit & +die::get_unit() const +{ + return *cu; +} + +section_offset +die::get_section_offset() const +{ + return cu->get_section_offset() + offset; +} + +void +die::read(section_offset off) +{ + cursor cur(cu->data(), off); + + offset = off; + + abbrev_code acode = cur.uleb128(); + if (acode == 0) { + abbrev = nullptr; + next = cur.get_section_offset(); + return; + } + abbrev = &cu->get_abbrev(acode); + + tag = abbrev->tag; + + // XXX We can pre-compute almost all of this work in the + // abbrev_entry. + attrs.clear(); + attrs.reserve(abbrev->attributes.size()); + for (auto &attr : abbrev->attributes) { + attrs.push_back(cur.get_section_offset()); + cur.skip_form(attr.form); + } + next = cur.get_section_offset(); +} + +bool +die::has(DW_AT attr) const +{ + if (!abbrev) + return false; + // XXX Totally lame + for (auto &a : abbrev->attributes) + if (a.name == attr) + return true; + return false; +} + +value +die::operator[](DW_AT attr) const +{ + // XXX We can pre-compute almost all of this work in the + // abbrev_entry. + if (abbrev) { + int i = 0; + for (auto &a : abbrev->attributes) { + if (a.name == attr) + return value(cu, a.name, a.form, a.type, attrs[i]); + i++; + } + } + throw out_of_range("DIE does not have attribute " + to_string(attr)); +} + +value +die::resolve(DW_AT attr) const +{ + // DWARF4 section 2.13, DWARF4 section 3.3.8 + + // DWARF4 is unclear about what to do when there's both a + // DW_AT::specification and a DW_AT::abstract_origin. + // Conceptually, though, a concrete inlined instance cannot + // itself complete an external function that wasn't first + // completed by its abstract instance, so we first try to + // resolve abstract_origin, then we resolve specification. + + // XXX This traverses the abbrevs at least twice and + // potentially several more times + + if (has(attr)) + return (*this)[attr]; + + if (has(DW_AT::abstract_origin)) { + die ao = (*this)[DW_AT::abstract_origin].as_reference(); + if (ao.has(attr)) + return ao[attr]; + if (ao.has(DW_AT::specification)) { + die s = ao[DW_AT::specification].as_reference(); + if (s.has(attr)) + return s[attr]; + } + } else if (has(DW_AT::specification)) { + die s = (*this)[DW_AT::specification].as_reference(); + if (s.has(attr)) + return s[attr]; + } + + return value(); +} + +die::iterator +die::begin() const +{ + if (!abbrev || !abbrev->children) + return end(); + return iterator(cu, next); +} + +die::iterator::iterator(const unit *cu, section_offset off) + : d(cu) +{ + d.read(off); +} + +die::iterator & +die::iterator::operator++() +{ + if (!d.abbrev) + return *this; + + if (!d.abbrev->children) { + // The DIE has no children, so its successor follows + // immediately + d.read(d.next); + } else if (d.has(DW_AT::sibling)) { + // They made it easy on us. Follow the sibling + // pointer. XXX Probably worth optimizing + d = d[DW_AT::sibling].as_reference(); + } else { + // It's a hard-knock life. We have to iterate through + // the children to find the next DIE. + // XXX Particularly unfortunate if the user is doing a + // DFS, since this will result in N^2 behavior. Maybe + // a small cache of terminator locations in the CU? + iterator sub(d.cu, d.next); + while (sub->abbrev) + ++sub; + d.read(sub->next); + } + + return *this; +} + +const vector > +die::attributes() const +{ + vector > res; + + if (!abbrev) + return res; + + // XXX Quite slow, especially when using this to traverse an + // entire DIE tree since each DIE will produce a new vector + // (whereas other vectors get reused). Might be worth a + // custom iterator. + int i = 0; + for (auto &a : abbrev->attributes) { + res.push_back(make_pair(a.name, value(cu, a.name, a.form, a.type, attrs[i]))); + i++; + } + return res; +} + +bool +die::operator==(const die &o) const +{ + return cu == o.cu && offset == o.offset; +} + +bool +die::operator!=(const die &o) const +{ + return !(*this == o); +} + +DWARFPP_END_NAMESPACE + +size_t +std::hash::operator()(const dwarf::die &a) const +{ + return hash()(a.cu) ^ + hash()(a.get_unit_offset()); +} diff --git a/3party/libelfin/dwarf/die_str_map.cc b/3party/libelfin/dwarf/die_str_map.cc new file mode 100644 index 0000000..b035a2f --- /dev/null +++ b/3party/libelfin/dwarf/die_str_map.cc @@ -0,0 +1,118 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "internal.hh" + +#include +#include + +using namespace std; + +// XXX Make this more readily available? +namespace std { + template<> + struct hash + { + typedef size_t result_type; + typedef dwarf::DW_TAG argument_type; + result_type operator()(argument_type a) const + { + return (result_type)a; + } + }; +} + +DWARFPP_BEGIN_NAMESPACE + +struct string_hash +{ + typedef size_t result_type; + typedef const char *argument_type; + result_type operator()(const char *s) const + { + result_type h = 0; + for (; *s; ++s) + h += 33 * h + *s; + return h; + } +}; + +struct string_eq +{ + typedef bool result_type; + typedef const char *first_argument_type; + typedef const char *second_argument_type; + bool operator()(const char *x, const char *y) const + { + return strcmp(x, y) == 0; + } +}; + +struct die_str_map::impl +{ + impl(const die &parent, DW_AT attr, + const initializer_list &accept) + : attr(attr), accept(accept.begin(), accept.end()), + pos(parent.begin()), end(parent.end()) { } + + unordered_map str_map; + DW_AT attr; + unordered_set accept; + die::iterator pos, end; + die invalid; +}; + +die_str_map::die_str_map(const die &parent, DW_AT attr, + const initializer_list &accept) + : m(make_shared(parent, attr, accept)) +{ +} + +die_str_map +die_str_map::from_type_names(const die &parent) +{ + return die_str_map + (parent, DW_AT::name, + // All DWARF type tags (this is everything that ends + // with _type except thrown_type). + {DW_TAG::array_type, DW_TAG::class_type, + DW_TAG::enumeration_type, DW_TAG::pointer_type, + DW_TAG::reference_type, DW_TAG::string_type, + DW_TAG::structure_type, DW_TAG::subroutine_type, + DW_TAG::union_type, DW_TAG::ptr_to_member_type, + DW_TAG::set_type, DW_TAG::subrange_type, + DW_TAG::base_type, DW_TAG::const_type, + DW_TAG::file_type, DW_TAG::packed_type, + DW_TAG::volatile_type, DW_TAG::restrict_type, + DW_TAG::interface_type, DW_TAG::unspecified_type, + DW_TAG::shared_type, DW_TAG::rvalue_reference_type}); +} + +const die & +die_str_map::operator[](const char *val) const +{ + // Do we have this value? + auto it = m->str_map.find(val); + if (it != m->str_map.end()) + return it->second; + // Read more until we find the value or the end + while (m->pos != m->end) { + const die &d = *m->pos; + ++m->pos; + + if (!m->accept.count(d.tag) || !d.has(m->attr)) + continue; + value dval(d[m->attr]); + if (dval.get_type() != value::type::string) + continue; + const char *dstr = dval.as_cstr(); + m->str_map[dstr] = d; + if (strcmp(val, dstr) == 0) + return m->str_map[dstr]; + } + // Not found + return m->invalid; +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/dwarf/dwarf++.hh b/3party/libelfin/dwarf/dwarf++.hh new file mode 100644 index 0000000..a53f87e --- /dev/null +++ b/3party/libelfin/dwarf/dwarf++.hh @@ -0,0 +1,1558 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#ifndef _DWARFPP_HH_ +#define _DWARFPP_HH_ + +#ifndef DWARFPP_BEGIN_NAMESPACE +#define DWARFPP_BEGIN_NAMESPACE namespace dwarf { +#define DWARFPP_END_NAMESPACE } +#endif + +#include "data.hh" +#include "small_vector.hh" + +#include +#include +#include +#include +#include +#include + +DWARFPP_BEGIN_NAMESPACE + +// Forward declarations +class dwarf; +class loader; +class compilation_unit; +class type_unit; +class die; +class value; +class expr; +class expr_context; +class expr_result; +class rangelist; +class line_table; + +// Internal type forward-declarations +struct section; +struct abbrev_entry; +struct cursor; + +// XXX Audit for binary-compatibility + +// XXX Might be able to reduce private coupling by making class +// section public (and clean it up and maybe rename it slice) and +// provide methods to get the backing data of things. +// +// XXX Make slice generic, without formatting information? Still want +// lightweight cursors, so maybe the cursor methods that need the +// format should take a const reference to a format stored in the +// compilation unit? + +// XXX operator==/!= and hash functions + +// XXX Indicate DWARF4 in all spec references + +// XXX Big missing support: .debug_aranges, .debug_frame, loclists, +// macros + +////////////////////////////////////////////////////////////////// +// DWARF files +// + +/** + * An exception indicating malformed DWARF data. + */ +class format_error : public std::runtime_error +{ +public: + explicit format_error(const std::string &what_arg) + : std::runtime_error(what_arg) { } + explicit format_error(const char *what_arg) + : std::runtime_error(what_arg) { } +}; + +/** + * DWARF section types. These correspond to the names of ELF + * sections, though DWARF can be embedded in other formats. + */ +enum class section_type +{ + abbrev, + aranges, + frame, + info, + line, + loc, + macinfo, + pubnames, + pubtypes, + ranges, + str, + types, +}; + +std::string +to_string(section_type v); + +/** + * A DWARF file. This class is internally reference counted and can + * be efficiently copied. + * + * Objects retrieved from this object may depend on it; the caller is + * responsible for keeping this object live as long as any retrieved + * object may be in use. + */ +class dwarf +{ +public: + /** + * Construct a DWARF file that is backed by sections read from + * the given loader. + */ + explicit dwarf(const std::shared_ptr &l); + + /** + * Construct a DWARF file that is initially not valid. + */ + dwarf() = default; + dwarf(const dwarf&) = default; + dwarf(dwarf&&) = default; + ~dwarf(); + + dwarf& operator=(const dwarf &o) = default; + dwarf& operator=(dwarf &&o) = default; + + bool operator==(const dwarf &o) const + { + return m == o.m; + } + + bool operator!=(const dwarf &o) const + { + return m != o.m; + } + + /** + * Return true if this object represents a DWARF file. + * Default constructed dwarf objects are not valid. + */ + bool valid() const + { + return !!m; + } + + // XXX This allows the compilation units to be modified and + // ties us to a vector. Probably should return an opaque + // iterable collection over const references. + /** + * Return the list of compilation units in this DWARF file. + */ + const std::vector &compilation_units() const; + + /** + * Return the type unit with the given signature. If the + * signature does not correspond to a type unit, throws + * out_of_range. + */ + const type_unit &get_type_unit(uint64_t type_signature) const; + + /** + * \internal Retrieve the specified section from this file. + * If the section does not exist, throws format_error. + */ + std::shared_ptr
get_section(section_type type) const; + +private: + struct impl; + std::shared_ptr m; +}; + +/** + * An interface for lazily loading DWARF sections. + */ +class loader +{ +public: + virtual ~loader() { } + + /** + * Load the requested DWARF section into memory and return a + * pointer to the beginning of it. This memory must remain + * valid and unchanged until the loader is destroyed. If the + * requested section does not exist, this should return + * nullptr. If the section exists but cannot be loaded for + * any reason, this should throw an exception. + */ + virtual const void *load(section_type section, size_t *size_out) = 0; +}; + +/** + * The base class for a compilation unit or type unit within a DWARF + * file. A unit consists of a rooted tree of DIEs, plus additional + * metadata that depends on the type of unit. + */ +class unit +{ +public: + virtual ~unit() = 0; + + bool operator==(const unit &o) const + { + return m == o.m; + } + + bool operator!=(const unit &o) const + { + return m != o.m; + } + + /** + * Return true if this object is valid. Default constructed + * unit objects are not valid. + */ + bool valid() const + { + return !!m; + } + + /** + * Return the dwarf file this unit is in. + */ + const dwarf &get_dwarf() const; + + /** + * Return the byte offset of this unit's header in its + * section (.debug_info or .debug_types). + */ + section_offset get_section_offset() const; + + /** + * Return the root DIE of this unit. For a compilation unit, + * this should be a DW_TAG::compilation_unit or + * DW_TAG::partial_unit. + */ + const die &root() const; + + /** + * \internal Return the data for this unit. + */ + const std::shared_ptr
&data() const; + + /** + * \internal Return the abbrev for the specified abbrev + * code. + */ + const abbrev_entry &get_abbrev(std::uint64_t acode) const; + +protected: + friend struct ::std::hash; + struct impl; + std::shared_ptr m; +}; + +/** + * A compilation unit within a DWARF file. Most of the information + * in a DWARF file is divided up by compilation unit. This class is + * internally reference counted and can be efficiently copied. + */ +class compilation_unit : public unit +{ +public: + compilation_unit() = default; + compilation_unit(const compilation_unit &o) = default; + compilation_unit(compilation_unit &&o) = default; + + compilation_unit& operator=(const compilation_unit &o) = default; + compilation_unit& operator=(compilation_unit &&o) = default; + + /** + * \internal Construct a compilation unit whose header begins + * offset bytes into the .debug_info section of file. + */ + compilation_unit(const dwarf &file, section_offset offset); + + /** + * Return the line number table of this compilation unit. + * Returns an invalid line table if this unit has no line + * table. + */ + const line_table &get_line_table() const; +}; + +/** + * A type unit. Type units allow complex type information to be + * shared between compilation units. + */ +class type_unit : public unit +{ +public: + type_unit() = default; + type_unit(const type_unit &o) = default; + type_unit(type_unit &&o) = default; + + type_unit &operator=(const type_unit &o) = default; + type_unit &operator=(type_unit &&o) = default; + + /** + * \internal Construct a type unit whose header begins offset + * bytes into the .debug_types section of file. + */ + type_unit(const dwarf &file, section_offset offset); + + /** + * Return the 64-bit unique signature that identifies this + * type unit. This is how DIEs from other units refer to type + * described by this unit. + */ + uint64_t get_type_signature() const; + + // XXX Can a type unit contain more than one top-level DIE? + // The description of type_offset makes it sound like it + // might. + + /** + * Return the DIE of the type described by this type unit. + * This may not be the root DIE of this unit if the type is + * nested in namespaces or other structures. + */ + const die &type() const; +}; + +////////////////////////////////////////////////////////////////// +// Debugging information entries (DIEs) +// + +/** + * A Debugging Information Entry, or DIE. The basic unit of + * information in a DWARF file. + */ +class die +{ + // XXX Make this class better for use in maps. Currently dies + // are fairly big and expensive to copy, but most of that + // information can be constructed lazily. This is also bad + // for use in caches since it will keep the DWARF file alive. + // OTOH, maybe caches need eviction anyway. +public: + DW_TAG tag; + + die() : cu(nullptr), abbrev(nullptr) { } + die(const die &o) = default; + die(die &&o) = default; + + die& operator=(const die &o) = default; + die& operator=(die &&o) = default; + + /** + * Return true if this object represents a DIE in a DWARF + * file. Default constructed objects are not valid and some + * methods return invalid DIEs to indicate failures. + */ + bool valid() const + { + return abbrev != nullptr; + } + + /** + * Return the unit containing this DIE. + */ + const unit &get_unit() const; + + /** + * Return this DIE's byte offset within its compilation unit. + */ + section_offset get_unit_offset() const + { + return offset; + } + + /** + * Return this DIE's byte offset within its section. + */ + section_offset get_section_offset() const; + + /** + * Return true if this DIE has the requested attribute. + */ + bool has(DW_AT attr) const; + + /** + * Return the value of attr. Throws out_of_range if this DIE + * does not have the specified attribute. It is generally + * better to use the type-safe attribute getters (the global + * functions beginning with at_*) when possible. + */ + value operator[](DW_AT attr) const; + + /** + * Return the value of attr after resolving specification and + * abstract origin references. If the attribute cannot be + * resolved, returns an invalid value. Declaration DIEs can + * "complete" a previous non-defining declaration DIE and + * similarly inherit the non-defining declaration's attributes + * (DWARF4 section 2.13) Likewise, any DIE that is a child of + * a concrete inlined instance can specify another DIE as its + * "abstract origin" and the original DIE will inherit the + * attributes of its abstract origin (DWARF4 section 3.3.8.2). + */ + value resolve(DW_AT attr) const; + + class iterator; + + /** + * Return an iterator over the children of this DIE. Note + * that the DIEs returned by this iterator are temporary, so + * if you need to store a DIE for more than one loop + * iteration, you must copy it. + */ + iterator begin() const; + iterator end() const; + + /** + * Return a vector of the attributes of this DIE. + */ + const std::vector > attributes() const; + + bool operator==(const die &o) const; + bool operator!=(const die &o) const; + +private: + friend class unit; + friend class type_unit; + friend class value; + // XXX If we can get the CU, we don't need this + friend struct ::std::hash; + + const unit *cu; + // The abbrev of this DIE. By convention, if this DIE + // represents a sibling list terminator, this is null. This + // object is kept live by the CU. + const abbrev_entry *abbrev; + // The beginning of this DIE, relative to the CU. + section_offset offset; + // Offsets of attributes, relative to cu's subsection. The + // vast majority of DIEs tend to have six or fewer attributes, + // so we reserve space in the DIE itself for six attributes. + small_vector attrs; + // The offset of the next DIE, relative to cu'd subsection. + // This is set even for sibling list terminators. + section_offset next; + + die(const unit *cu); + + /** + * Read this DIE from the given offset in cu. + */ + void read(section_offset off); +}; + +/** + * An iterator over a sequence of sibling DIEs. + */ +class die::iterator +{ +public: + iterator() = default; + iterator(const iterator &o) = default; + iterator(iterator &&o) = default; + + iterator& operator=(const iterator &o) = default; + iterator& operator=(iterator &&o) = default; + + const die &operator*() const + { + return d; + } + + const die *operator->() const + { + return &d; + } + + // XXX Make this less confusing by implementing operator== instead + bool operator!=(const iterator &o) const + { + // Quick test of abbrevs. In particular, this weeds + // out non-end against end, which is a common + // comparison while iterating, though it also weeds + // out many other things. + if (d.abbrev != o.d.abbrev) + return true; + + // Same, possibly NULL abbrev. If abbrev is NULL, + // then next's are uncomparable, so we need to stop + // now. We consider all ends to be the same, without + // comparing cu's. + if (d.abbrev == nullptr) + return false; + + // Comparing two non-end abbrevs. + return d.next != o.d.next || d.cu != o.d.cu; + } + + iterator &operator++(); + +private: + friend class die; + + iterator(const unit *cu, section_offset off); + + die d; +}; + +inline die::iterator +die::end() const +{ + return iterator(); +} + +/** + * An exception indicating that a value is not of the requested type. + */ +class value_type_mismatch : public std::logic_error +{ +public: + explicit value_type_mismatch(const std::string &what_arg) + : std::logic_error(what_arg) { } + explicit value_type_mismatch(const char *what_arg) + : std::logic_error(what_arg) { } +}; + +/** + * The value of a DIE attribute. + * + * This is logically a union of many different types. Each type has a + * corresponding as_* methods that will return the value as that type + * or throw value_type_mismatch if the attribute is not of the + * requested type. + * + * Values of "constant" type are somewhat ambiguous and + * context-dependent. Constant forms with specified signed-ness have + * type "uconstant" or "sconstant", while other constant forms have + * type "constant". If the value's type is "constant", it can be + * retrieved using either as_uconstant or as_sconstant. + * + * Some other types can also be coerced. These are documented on the + * individual as_* methods. + * + * There is no as_line; while there is an attribute for line tables, + * line tables are really associated with compilation units (and + * require additional context from the compilation unit). Use + * compilation_unit::get_line_table instead. + */ +class value +{ +public: + enum class type + { + invalid, + address, + block, + constant, + uconstant, + sconstant, + exprloc, + flag, + line, + loclist, + mac, + rangelist, + reference, + string + }; + + /** + * Construct a value with type `type::invalid`. + */ + value() : cu(nullptr), typ(type::invalid) { } + + value(const value &o) = default; + value(value &&o) = default; + + value& operator=(const value &o) = default; + value& operator=(value &&o) = default; + + /** + * Return true if this object represents a valid value. + * Default constructed line tables are not valid. + */ + bool valid() const + { + return typ != type::invalid; + } + + /** + * Return this value's byte offset within its compilation + * unit. + */ + section_offset get_unit_offset() const + { + return offset; + } + + /** + * Return this value's byte offset within its section. + */ + section_offset get_section_offset() const; + + type get_type() const + { + return typ; + } + + /** + * Return this value's attribute encoding. This automatically + * resolves indirect encodings, so this will never return + * DW_FORM::indirect. Note that the mapping from forms to + * types is non-trivial and often depends on the attribute + * (especially prior to DWARF 4). + */ + DW_FORM get_form() const + { + return form; + } + + /** + * Return this value as a target machine address. + */ + taddr as_address() const; + + /** + * Return this value as a block. The returned pointer points + * directly into the section data, so the caller must ensure + * that remains valid as long as the data is in use. + * *size_out is set to the length of the returned block, in + * bytes. + * + * This automatically coerces "exprloc" type values by + * returning the raw bytes of the encoded expression. + */ + const void *as_block(size_t *size_out) const; + + /** + * Return this value as an unsigned constant. This + * automatically coerces "constant" type values by + * interpreting their bytes as unsigned. + */ + uint64_t as_uconstant() const; + + /** + * Return this value as a signed constant. This automatically + * coerces "constant" type values by interpreting their bytes + * as twos-complement signed values. + */ + int64_t as_sconstant() const; + + /** + * Return this value as an expression. This automatically + * coerces "block" type values by interpreting the bytes in + * the block as an expression (prior to DWARF 4, exprlocs were + * always encoded as blocks, though the library automatically + * distinguishes these types based on context). + */ + expr as_exprloc() const; + + /** + * Return this value as a boolean flag. + */ + bool as_flag() const; + + // XXX loclistptr, macptr + + /** + * Return this value as a rangelist. + */ + rangelist as_rangelist() const; + + /** + * For a reference type value, return the referenced DIE. + * This DIE may be in a different compilation unit or could + * be a DIE in a type unit. + */ + die as_reference() const; + + /** + * Return this value as a string. + */ + std::string as_string() const; + + /** + * Fill the given string buffer with the string value of this + * value. This is useful to minimize allocation when reading + * several string-type values. + */ + void as_string(std::string &buf) const; + + /** + * Return this value as a NUL-terminated character string. + * The returned pointer points directly into the section data, + * so the caller must ensure that remains valid as long as the + * data is in use. *size_out, if not NULL, is set to the + * length of the returned string without the NUL-terminator. + */ + const char *as_cstr(size_t *size_out = nullptr) const; + + /** + * Return this value as a section offset. This is applicable + * to lineptr, loclistptr, macptr, and rangelistptr. + */ + section_offset as_sec_offset() const; + +private: + friend class die; + + value(const unit *cu, + DW_AT name, DW_FORM form, type typ, section_offset offset); + + void resolve_indirect(DW_AT name); + + const unit *cu; + DW_FORM form; + type typ; + section_offset offset; +}; + +std::string +to_string(value::type v); + +std::string +to_string(const value &v); + +////////////////////////////////////////////////////////////////// +// Expressions and location descriptions +// + +/** + * An exception during expression evaluation. + */ +class expr_error : public std::runtime_error +{ +public: + explicit expr_error(const std::string &what_arg) + : std::runtime_error(what_arg) { } + explicit expr_error(const char *what_arg) + : std::runtime_error(what_arg) { } +}; + +/** + * A DWARF expression or location description. + */ +class expr +{ +public: + /** + * Short-hand for evaluate(ctx, {}). + */ + expr_result evaluate(expr_context *ctx) const; + + /** + * Short-hand for evaluate(ctx, {argument}). + */ + expr_result evaluate(expr_context *ctx, taddr argument) const; + + /** + * Return the result of evaluating this expression using the + * specified expression context. The expression stack will be + * initialized with the given arguments such that the first + * arguments is at the top of the stack and the last argument + * at the bottom of the stack. + * + * Throws expr_error if there is an error evaluating the + * expression (such as an unknown operation, stack underflow, + * bounds error, etc.) + */ + expr_result evaluate(expr_context *ctx, const std::initializer_list &arguments) const; + +private: + // XXX This will need more information for some operations + expr(const unit *cu, + section_offset offset, section_length len); + + friend class value; + + const unit *cu; + section_offset offset; + section_length len; +}; + +/** + * An interface that provides contextual information for expression + * evaluation. Callers of expr::evaluate are expected to subclass + * this in order to provide this information to the expression + * evaluation engine. The default implementation throws expr_error + * for all methods. + */ +class expr_context +{ +public: + virtual ~expr_context() { } + + /** + * Return the value stored in register regnum. This is used + * to implement DW_OP_breg* operations. + */ + virtual taddr reg(unsigned regnum) + { + throw expr_error("DW_OP_breg* operations not supported"); + } + + /** + * Implement DW_OP_deref_size. + */ + virtual taddr deref_size(taddr address, unsigned size) + { + throw expr_error("DW_OP_deref_size operations not supported"); + } + + /** + * Implement DW_OP_xderef_size. + */ + virtual taddr xderef_size(taddr address, taddr asid, unsigned size) + { + throw expr_error("DW_OP_xderef_size operations not supported"); + } + + /** + * Implement DW_OP_form_tls_address. + */ + virtual taddr form_tls_address(taddr address) + { + throw expr_error("DW_OP_form_tls_address operations not supported"); + } +}; + +/** + * An instance of expr_context that throws expr_error for all methods. + * This is equivalent to the default construction of expr_context, but + * often more convenient to use. + */ +extern expr_context no_expr_context; + +// XXX Provide methods to check type and fetch value? +/** + * The result of evaluating a DWARF expression or location + * description. + */ +class expr_result +{ +public: + enum class type { + /** + * value specifies the address in memory of an object. + * This is also the result type used for general + * expressions that do not refer to object locations. + */ + address, + /** + * value specifies a register storing an object. + */ + reg, + /** + * The object does not have a location. value is the + * value of the object. + */ + literal, + /** + * The object does not have a location. Its value is + * pointed to by the 'implicit' field. + */ + implicit, + /** + * The object is present in the source, but not in the + * object code, and hence does not have a location or + * a value. + */ + empty, + }; + + /** + * For location descriptions, the type of location this result + * describes. + */ + type location_type; + + /** + * For general-purpose expressions, the result of expression. + * For address location descriptions, the address in memory of + * the object. For register location descriptions, the + * register storing the object. For literal location + * descriptions, the value of the object. + */ + taddr value; + + /** + * For implicit location descriptions, a pointer to a block + * representing the value in the memory representation of the + * target machine. + */ + const char *implicit; + size_t implicit_len; + + // XXX Composite locations +}; + +std::string +to_string(expr_result::type v); + +////////////////////////////////////////////////////////////////// +// Range lists +// + +/** + * A DWARF range list describing a set of possibly non-contiguous + * addresses. + */ +class rangelist +{ +public: + /** + * \internal Construct a range list whose data begins at the + * given offset in sec. cu_addr_size is the address size of + * the associated compilation unit. cu_low_pc is the + * DW_AT::low_pc attribute of the compilation unit containing + * the referring DIE or 0 (this is used as the base address of + * the range list). + */ + rangelist(const std::shared_ptr
&sec, section_offset off, + unsigned cu_addr_size, taddr cu_low_pc); + + /** + * Construct a range list from a sequence of {low, high} + * pairs. + */ + rangelist(const std::initializer_list > &ranges); + + /** + * Construct an empty range list. + */ + rangelist() = default; + + /** Copy constructor */ + rangelist(const rangelist &o) = default; + /** Move constructor */ + rangelist(rangelist &&o) = default; + + rangelist& operator=(const rangelist &o) = default; + rangelist& operator=(rangelist &&o) = default; + + class entry; + typedef entry value_type; + + class iterator; + + /** + * Return an iterator over the entries in this range list. + * The ranges returned by this iterator are temporary, so if + * you need to store a range for more than one loop iteration, + * you must copy it. + */ + iterator begin() const; + + /** + * Return an iterator to one past the last entry in this range + * list. + */ + iterator end() const; + + /** + * Return true if this range list contains the given address. + */ + bool contains(taddr addr) const; + +private: + std::vector synthetic; + std::shared_ptr
sec; + taddr base_addr; +}; + +/** + * An entry in a range list. The range spans addresses [low, high). + */ +class rangelist::entry +{ +public: + taddr low, high; + + /** + * Return true if addr is within this entry's bounds. + */ + bool contains(taddr addr) const + { + return low <= addr && addr < high; + } +}; + +/** + * An iterator over a sequence of ranges in a range list. + */ +class rangelist::iterator +{ +public: + /** + * \internal Construct an end iterator. + */ + iterator() : sec(nullptr), base_addr(0), pos(0) { } + + /** + * \internal Construct an iterator that reads rangelist data + * from the beginning of the given section and starts with the + * given base address. + */ + iterator(const std::shared_ptr
&sec, taddr base_addr); + + /** Copy constructor */ + iterator(const iterator &o) = default; + /** Move constructor */ + iterator(iterator &&o) = default; + + iterator& operator=(const iterator &o) = default; + iterator& operator=(iterator &&o) = default; + + /** + * Return the current range list entry. This entry is reused + * internally, so the caller should copy it if it needs to + * persist past the next increment. + */ + const rangelist::entry &operator*() const + { + return entry; + } + + /** Dereference operator */ + const rangelist::entry *operator->() const + { + return &entry; + } + + /** Equality operator */ + bool operator==(const iterator &o) const + { + return sec == o.sec && pos == o.pos; + } + + /** Inequality operator */ + bool operator!=(const iterator &o) const + { + return !(*this == o); + } + + /** + * Increment this iterator to point to the next range list + * entry. + */ + iterator &operator++(); + +private: + std::shared_ptr
sec; + taddr base_addr; + section_offset pos; + rangelist::entry entry; +}; + +////////////////////////////////////////////////////////////////// +// Line number tables +// + +/** + * A DWARF line number table. A line number table is a list of line + * table entries, broken up into "sequences". Within a sequence, + * entries are in order of increasing program counter ("address") and + * an entry provides information for all program counters between the + * entry's address and the address of the next entry. Each sequence + * is terminated by a special entry with its + * line_table::entry::end_sequence flag set. The line number table + * also records the set of source files for a given compilation unit, + * which can be referred to from other DIE attributes. + */ +class line_table +{ +public: + /** + * \internal Construct a line number table whose header begins + * at the given offset in sec. cu_addr_size is the address + * size of the associated compilation unit. cu_comp_dir and + * cu_name give the DW_AT::comp_dir and DW_AT::name attributes + * of the associated compilation unit. + */ + line_table(const std::shared_ptr
&sec, section_offset offset, + unsigned cu_addr_size, const std::string &cu_comp_dir, + const std::string &cu_name); + + /** + * Construct an invalid, empty line table. + */ + line_table() = default; + + /** Copy constructor */ + line_table(const line_table &o) = default; + /** Move constructor */ + line_table(line_table &&o) = default; + + line_table &operator=(const line_table &o) = default; + line_table &operator=(line_table &&o) = default; + + /** + * Return true if this object represents an initialized line + * table. Default constructed line tables are not valid. + */ + bool valid() const + { + return !!m; + } + + class file; + class entry; + typedef entry value_type; + + class iterator; + + /** + * Return an iterator to the beginning of this line number + * table. If called on an invalid line table, this will + * return an iterator equal to end(). + */ + iterator begin() const; + + /** + * Return an iterator to one past the last entry in this line + * number table. + */ + iterator end() const; + + /** + * Return an iterator to the line table entry containing addr + * (roughly, the entry with the highest address less than or + * equal to addr, but accounting for end_sequence entries). + * Returns end() if there is no such entry. + */ + iterator find_address(taddr addr) const; + + /** + * Return the index'th file in the line table. These indexes + * are typically used by declaration and call coordinates. If + * index is out of range, throws out_of_range. + */ + const file *get_file(unsigned index) const; + +private: + friend class iterator; + + struct impl; + std::shared_ptr m; +}; + +/** + * A source file in a line table. + */ +class line_table::file +{ +public: + /** + * The absolute path of this source file. + */ + std::string path; + + /** + * The last modification time of this source file in an + * implementation-defined encoding or 0 if unknown. + */ + uint64_t mtime; + + /** + * The size in bytes of this source file or 0 if unknown. + */ + uint64_t length; + + /** + * Construct a source file object. + */ + file(std::string path = "", uint64_t mtime = 0, uint64_t length = 0); +}; + +/** + * An entry in the line table. + */ +class line_table::entry +{ +public: + /** + * The program counter value corresponding to a machine + * instruction generated by the compiler. + */ + taddr address; + + /** + * The index of an operation within a VLIW instruction. The + * index of the first operation is 0. For non-VLIW + * architectures, this will always be 0. + */ + unsigned op_index; + + /** + * The source file containing this instruction. + */ + const line_table::file *file; + + /** + * The index of the source file containing this instruction. + */ + unsigned file_index; + + /** + * The source line number of this instruction, starting at 1. + * This may be 0 if this instruction cannot be attributed to + * any source line. + */ + unsigned line; + + /** + * The column number within this source line, starting at 1. + * The value 0 indicates that a statement begins at the "left + * edge" of the line, whatever that means. + */ + unsigned column; + + /** + * True if this instruction is a recommended breakpoint + * location. Typically this is the beginning of a statement. + */ + bool is_stmt; + + /** + * True if this instruction is the beginning of a basic block. + */ + bool basic_block; + + /** + * True if this address is the first byte after the end of a + * sequence of target machine instructions. In this case, all + * other fields besides address are not meaningful. + */ + bool end_sequence; + + /** + * True if this address is one where execution should be + * suspended for an entry breakpoint of a function. + */ + bool prologue_end; + + /** + * True if this address is one where execution should be + * suspended for an exit breakpoint of a function. + */ + bool epilogue_begin; + + /** + * The instruction set architecture of this instruction. The + * meaning of this field is generally defined by an + * architecture's ABI. + */ + unsigned isa; + + /** + * A number that identifies the block containing the current + * instruction if multiple blocks are associated with the same + * source file, line, and column. + */ + unsigned discriminator; + + /** + * Reset this line info object to the default initial values + * for all fields. is_stmt has no default value, so the + * caller must provide it. + */ + void reset(bool is_stmt); + + /** + * Return a descriptive string of the form + * "filename[:line[:column]]". + */ + std::string get_description() const; +}; + +/** + * An iterator over the entries in a line table. + */ +class line_table::iterator +{ +public: + /** + * \internal Construct an iterator for the given line table + * starting pos bytes into the table's section. + */ + iterator(const line_table *table, section_offset pos); + + /** Copy constructor */ + iterator(const iterator &o) = default; + /** Move constructor */ + iterator(iterator &&o) = default; + + iterator &operator=(const iterator &o) = default; + iterator &operator=(iterator &&o) = default; + + /** + * Return the current line table entry. This entry is reused + * internally, so the caller should copy it if it needs to + * persist past the next increment. + */ + const line_table::entry &operator*() const + { + return entry; + } + + /** Dereference operator */ + const line_table::entry *operator->() const + { + return &entry; + } + + /** Equality operator */ + bool operator==(const iterator &o) const + { + return o.pos == pos && o.table == table; + } + + /** Inequality operator */ + bool operator!=(const iterator &o) const + { + return !(*this == o); + } + + /** + * Increment this iterator to point to the next line table + * entry. + */ + iterator &operator++(); + + /** Post-increment operator */ + iterator operator++(int) + { + iterator tmp(*this); + ++(*this); + return tmp; + } + +private: + const line_table *table; + line_table::entry entry, regs; + section_offset pos; + + /** + * Process the next opcode. If the opcode "adds a row to the + * table", update entry to reflect the row and return true. + */ + bool step(cursor *cur); +}; + +////////////////////////////////////////////////////////////////// +// Type-safe attribute getters +// + +// XXX More + +die at_abstract_origin(const die &d); +DW_ACCESS at_accessibility(const die &d); +uint64_t at_allocated(const die &d, expr_context *ctx); +bool at_artificial(const die &d); +uint64_t at_associated(const die &d, expr_context *ctx); +uint64_t at_bit_offset(const die &d, expr_context *ctx); +uint64_t at_bit_size(const die &d, expr_context *ctx); +uint64_t at_bit_stride(const die &d, expr_context *ctx); +uint64_t at_byte_size(const die &d, expr_context *ctx); +uint64_t at_byte_stride(const die &d, expr_context *ctx); +DW_CC at_calling_convention(const die &d); +die at_common_reference(const die &d); +std::string at_comp_dir(const die &d); +value at_const_value(const die &d); +bool at_const_expr(const die &d); +die at_containing_type(const die &d); +uint64_t at_count(const die &d, expr_context *ctx); +expr_result at_data_member_location(const die &d, expr_context *ctx, taddr base, taddr pc); +bool at_declaration(const die &d); +std::string at_description(const die &d); +die at_discr(const die &d); +value at_discr_value(const die &d); +bool at_elemental(const die &d); +DW_ATE at_encoding(const die &d); +DW_END at_endianity(const die &d); +taddr at_entry_pc(const die &d); +bool at_enum_class(const die &d); +bool at_explicit(const die &d); +die at_extension(const die &d); +bool at_external(const die &d); +die at_friend(const die &d); +taddr at_high_pc(const die &d); +DW_ID at_identifier_case(const die &d); +die at_import(const die &d); +DW_INL at_inline(const die &d); +bool at_is_optional(const die &d); +DW_LANG at_language(const die &d); +std::string at_linkage_name(const die &d); +taddr at_low_pc(const die &d); +uint64_t at_lower_bound(const die &d, expr_context *ctx); +bool at_main_subprogram(const die &d); +bool at_mutable(const die &d); +std::string at_name(const die &d); +die at_namelist_item(const die &d); +die at_object_pointer(const die &d); +DW_ORD at_ordering(const die &d); +std::string at_picture_string(const die &d); +die at_priority(const die &d); +std::string at_producer(const die &d); +bool at_prototyped(const die &d); +bool at_pure(const die &d); +rangelist at_ranges(const die &d); +bool at_recursive(const die &d); +die at_sibling(const die &d); +die at_signature(const die &d); +die at_small(const die &d); +die at_specification(const die &d); +bool at_threads_scaled(const die &d); +die at_type(const die &d); +uint64_t at_upper_bound(const die &d, expr_context *ctx); +bool at_use_UTF8(const die &d); +bool at_variable_parameter(const die &d); +DW_VIRTUALITY at_virtuality(const die &d); +DW_VIS at_visibility(const die &d); + +/** + * Return the PC range spanned by the code of a DIE. The DIE must + * either have DW_AT::ranges or DW_AT::low_pc. It may optionally have + * DW_AT::high_pc. + */ +rangelist die_pc_range(const die &d); + +////////////////////////////////////////////////////////////////// +// Utilities +// + +/** + * An index of sibling DIEs by some string attribute. This index is + * lazily constructed and space-efficient. + */ +class die_str_map +{ +public: + /** + * Construct the index of the attr attribute of all immediate + * children of parent whose tags are in accept. + */ + die_str_map(const die &parent, DW_AT attr, + const std::initializer_list &accept); + + die_str_map() = default; + die_str_map(const die_str_map &o) = default; + die_str_map(die_str_map &&o) = default; + + die_str_map& operator=(const die_str_map &o) = default; + die_str_map& operator=(die_str_map &&o) = default; + + /** + * Construct a string map for the type names of parent's + * immediate children. + * + * XXX This should use .debug_pubtypes if parent is a compile + * unit's root DIE, but it currently does not. + */ + static die_str_map from_type_names(const die &parent); + + /** + * Return the DIE whose attribute matches val. If no such DIE + * exists, return an invalid die object. + */ + const die &operator[](const char *val) const; + + /** + * Short-hand for [value.c_str()]. + */ + const die &operator[](const std::string &val) const + { + return (*this)[val.c_str()]; + } + +private: + struct impl; + std::shared_ptr m; +}; + +////////////////////////////////////////////////////////////////// +// ELF support +// + +namespace elf +{ + /** + * Translate an ELF section name info a DWARF section type. + * If the section is a valid DWARF section name, sets *out to + * the type and returns true. If not, returns false. + */ + bool section_name_to_type(const char *name, section_type *out); + + /** + * Translate a DWARF section type into an ELF section name. + */ + const char *section_type_to_name(section_type type); + + template + class elf_loader : public loader + { + Elf f; + + public: + elf_loader(const Elf &file) : f(file) { } + + const void *load(section_type section, size_t *size_out) + { + auto sec = f.get_section(section_type_to_name(section)); + if (!sec.valid()) + return nullptr; + *size_out = sec.size(); + return sec.data(); + } + }; + + /** + * Create a DWARF section loader backed by the given ELF + * file. This is templatized to eliminate a static dependency + * between the libelf++ and libdwarf++, though it can only + * reasonably be used with elf::elf from libelf++. + */ + template + std::shared_ptr > create_loader(const Elf &f) + { + return std::make_shared >(f); + } +}; + +DWARFPP_END_NAMESPACE + +////////////////////////////////////////////////////////////////// +// Hash specializations +// + +namespace std +{ + template<> + struct hash + { + typedef size_t result_type; + typedef const dwarf::unit &argument_type; + result_type operator()(argument_type a) const + { + return hash()(a.m); + } + }; + + template<> + struct hash + { + typedef size_t result_type; + typedef const dwarf::die &argument_type; + result_type operator()(argument_type a) const; + }; +} + +#endif diff --git a/3party/libelfin/dwarf/dwarf.cc b/3party/libelfin/dwarf/dwarf.cc new file mode 100644 index 0000000..09d6fb0 --- /dev/null +++ b/3party/libelfin/dwarf/dwarf.cc @@ -0,0 +1,366 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "internal.hh" + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +////////////////////////////////////////////////////////////////// +// class dwarf +// + +struct dwarf::impl +{ + impl(const std::shared_ptr &l) + : l(l), have_type_units(false) { } + + std::shared_ptr l; + + std::shared_ptr
sec_info; + std::shared_ptr
sec_abbrev; + + std::vector compilation_units; + + std::unordered_map type_units; + bool have_type_units; + + std::map > sections; +}; + +dwarf::dwarf(const std::shared_ptr &l) + : m(make_shared(l)) +{ + const void *data; + size_t size; + + // Get required sections + data = l->load(section_type::info, &size); + if (!data) + throw format_error("required .debug_info section missing"); + m->sec_info = make_shared
(section_type::info, data, size, byte_order::lsb); + + // Sniff the endianness from the version field of the first + // CU. This is always a small but non-zero integer. + cursor endcur(m->sec_info); + // Skip length. + section_length length = endcur.fixed(); + if (length == 0xffffffff) + endcur.fixed(); + // Get version in both little and big endian. + uhalf version = endcur.fixed(); + uhalf versionbe = (version >> 8) | ((version & 0xFF) << 8); + if (versionbe < version) { + m->sec_info = make_shared
(section_type::info, data, size, byte_order::msb); + } + + data = l->load(section_type::abbrev, &size); + if (!data) + throw format_error("required .debug_abbrev section missing"); + m->sec_abbrev = make_shared
(section_type::abbrev, data, size, m->sec_info->ord); + + // Get compilation units. Everything derives from these, so + // there's no point in doing it lazily. + cursor infocur(m->sec_info); + while (!infocur.end()) { + // XXX Circular reference. Given that we now require + // the dwarf object to stick around for DIEs, maybe we + // might as well require that for units, too. + m->compilation_units.emplace_back( + *this, infocur.get_section_offset()); + infocur.subsection(); + } +} + +dwarf::~dwarf() +{ +} + +const std::vector & +dwarf::compilation_units() const +{ + static std::vector empty; + if (!m) + return empty; + return m->compilation_units; +} + +const type_unit & +dwarf::get_type_unit(uint64_t type_signature) const +{ + if (!m->have_type_units) { + cursor tucur(get_section(section_type::types)); + while (!tucur.end()) { + // XXX Circular reference + type_unit tu(*this, tucur.get_section_offset()); + m->type_units[tu.get_type_signature()] = tu; + tucur.subsection(); + } + m->have_type_units = true; + } + if (!m->type_units.count(type_signature)) + throw out_of_range("type signature 0x" + to_hex(type_signature)); + return m->type_units[type_signature]; +} + +std::shared_ptr
+dwarf::get_section(section_type type) const +{ + if (type == section_type::info) + return m->sec_info; + if (type == section_type::abbrev) + return m->sec_abbrev; + + auto it = m->sections.find(type); + if (it != m->sections.end()) + return it->second; + + size_t size; + const void *data = m->l->load(type, &size); + if (!data) + throw format_error(std::string(elf::section_type_to_name(type)) + + " section missing"); + m->sections[type] = std::make_shared
(section_type::str, data, size, m->sec_info->ord); + return m->sections[type]; +} + +////////////////////////////////////////////////////////////////// +// class unit +// + +/** + * Implementation of a unit. + */ +struct unit::impl +{ + const dwarf file; + const section_offset offset; + const std::shared_ptr
subsec; + const section_offset debug_abbrev_offset; + const section_offset root_offset; + + // Type unit-only values + const uint64_t type_signature; + const section_offset type_offset; + + // Lazily constructed root and type DIEs + die root, type; + + // Lazily constructed line table + line_table lt; + + // Map from abbrev code to abbrev. If the map is dense, it + // will be stored in the vector; otherwise it will be stored + // in the map. + bool have_abbrevs; + std::vector abbrevs_vec; + std::unordered_map abbrevs_map; + + impl(const dwarf &file, section_offset offset, + const std::shared_ptr
&subsec, + section_offset debug_abbrev_offset, section_offset root_offset, + uint64_t type_signature = 0, section_offset type_offset = 0) + : file(file), offset(offset), subsec(subsec), + debug_abbrev_offset(debug_abbrev_offset), + root_offset(root_offset), type_signature(type_signature), + type_offset(type_offset), have_abbrevs(false) { } + + void force_abbrevs(); +}; + +unit::~unit() +{ +} + +const dwarf & +unit::get_dwarf() const +{ + return m->file; +} + +section_offset +unit::get_section_offset() const +{ + return m->offset; +} + +const die& +unit::root() const +{ + if (!m->root.valid()) { + m->force_abbrevs(); + m->root = die(this); + m->root.read(m->root_offset); + } + return m->root; +} + +const std::shared_ptr
& +unit::data() const +{ + return m->subsec; +} + +const abbrev_entry & +unit::get_abbrev(abbrev_code acode) const +{ + if (!m->have_abbrevs) + m->force_abbrevs(); + + if (!m->abbrevs_vec.empty()) { + if (acode >= m->abbrevs_vec.size()) + goto unknown; + const abbrev_entry &entry = m->abbrevs_vec[acode]; + if (entry.code == 0) + goto unknown; + return entry; + } else { + auto it = m->abbrevs_map.find(acode); + if (it == m->abbrevs_map.end()) + goto unknown; + return it->second; + } + +unknown: + throw format_error("unknown abbrev code 0x" + to_hex(acode)); +} + +void +unit::impl::force_abbrevs() +{ + // XXX Compilation units can share abbrevs. Parse each table + // at most once. + if (have_abbrevs) + return; + + // Section 7.5.3 + cursor c(file.get_section(section_type::abbrev), + debug_abbrev_offset); + abbrev_entry entry; + abbrev_code highest = 0; + while (entry.read(&c)) { + abbrevs_map[entry.code] = entry; + if (entry.code > highest) + highest = entry.code; + } + + // Typically, abbrev codes are assigned linearly, so it's more + // space efficient and time efficient to store the table in a + // vector. Convert to a vector if it's dense enough, by some + // rough estimate of "enough". + if (highest * 10 < abbrevs_map.size() * 15) { + // Move the map into the vector + abbrevs_vec.resize(highest + 1); + for (auto &entry : abbrevs_map) + abbrevs_vec[entry.first] = move(entry.second); + abbrevs_map.clear(); + } + + have_abbrevs = true; +} + +////////////////////////////////////////////////////////////////// +// class compilation_unit +// + +compilation_unit::compilation_unit(const dwarf &file, section_offset offset) +{ + // Read the CU header (DWARF4 section 7.5.1.1) + cursor cur(file.get_section(section_type::info), offset); + std::shared_ptr
subsec = cur.subsection(); + cursor sub(subsec); + sub.skip_initial_length(); + uhalf version = sub.fixed(); + (void)version; + if (version > 5) + throw format_error("unknown compilation unit version " + std::to_string(version)); + // .debug_abbrev-relative offset of this unit's abbrevs + section_offset debug_abbrev_offset; + if(version >= 5) + { + sub.skip_unit_type(); + ubyte address_size = sub.fixed(); + subsec->addr_size = address_size; + debug_abbrev_offset = sub.offset(); + } + else { + debug_abbrev_offset = sub.offset(); + ubyte address_size = sub.fixed(); + subsec->addr_size = address_size; + } + + m = make_shared(file, offset, subsec, debug_abbrev_offset, + sub.get_section_offset()); +} + +const line_table & +compilation_unit::get_line_table() const +{ + if (!m->lt.valid()) { + const die &d = root(); + if (!d.has(DW_AT::stmt_list) || !d.has(DW_AT::name)) + goto done; + + shared_ptr
sec; + try { + sec = m->file.get_section(section_type::line); + } catch (format_error &e) { + goto done; + } + + auto comp_dir = d.has(DW_AT::comp_dir) ? at_comp_dir(d) : ""; + + m->lt = line_table(sec, d[DW_AT::stmt_list].as_sec_offset(), + m->subsec->addr_size, comp_dir, + at_name(d)); + } +done: + return m->lt; +} + +////////////////////////////////////////////////////////////////// +// class type_unit +// + +type_unit::type_unit(const dwarf &file, section_offset offset) +{ + // Read the type unit header (DWARF4 section 7.5.1.2) + cursor cur(file.get_section(section_type::types), offset); + std::shared_ptr
subsec = cur.subsection(); + cursor sub(subsec); + sub.skip_initial_length(); + uhalf version = sub.fixed(); + if (version != 4) + throw format_error("unknown type unit version " + std::to_string(version)); + // .debug_abbrev-relative offset of this unit's abbrevs + section_offset debug_abbrev_offset = sub.offset(); + ubyte address_size = sub.fixed(); + subsec->addr_size = address_size; + uint64_t type_signature = sub.fixed(); + section_offset type_offset = sub.offset(); + + m = make_shared(file, offset, subsec, debug_abbrev_offset, + sub.get_section_offset(), type_signature, + type_offset); +} + +uint64_t +type_unit::get_type_signature() const +{ + return m->type_signature; +} + +const die & +type_unit::type() const +{ + if (!m->type.valid()) { + m->force_abbrevs(); + m->type = die(this); + m->type.read(m->type_offset); + } + return m->type; +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/dwarf/elf.cc b/3party/libelfin/dwarf/elf.cc new file mode 100644 index 0000000..baf8e67 --- /dev/null +++ b/3party/libelfin/dwarf/elf.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "dwarf++.hh" + +#include + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +static const struct +{ + const char *name; + section_type type; +} sections[] = { + {".debug_abbrev", section_type::abbrev}, + {".debug_aranges", section_type::aranges}, + {".debug_frame", section_type::frame}, + {".debug_info", section_type::info}, + {".debug_line", section_type::line}, + {".debug_loc", section_type::loc}, + {".debug_macinfo", section_type::macinfo}, + {".debug_pubnames", section_type::pubnames}, + {".debug_pubtypes", section_type::pubtypes}, + {".debug_ranges", section_type::ranges}, + {".debug_str", section_type::str}, + {".debug_types", section_type::types}, +}; + +bool +elf::section_name_to_type(const char *name, section_type *out) +{ + for (auto &sec : sections) { + if (strcmp(sec.name, name) == 0) { + *out = sec.type; + return true; + } + } + return false; +} + +const char * +elf::section_type_to_name(section_type type) +{ + for (auto &sec : sections) { + if (sec.type == type) + return sec.name; + } + return nullptr; +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/dwarf/expr.cc b/3party/libelfin/dwarf/expr.cc new file mode 100644 index 0000000..4783587 --- /dev/null +++ b/3party/libelfin/dwarf/expr.cc @@ -0,0 +1,423 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "internal.hh" + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +expr_context no_expr_context; + +expr::expr(const unit *cu, + section_offset offset, section_length len) + : cu(cu), offset(offset), len(len) +{ +} + +expr_result +expr::evaluate(expr_context *ctx) const +{ + return evaluate(ctx, {}); +} + +expr_result +expr::evaluate(expr_context *ctx, taddr argument) const +{ + return evaluate(ctx, {argument}); +} + +expr_result +expr::evaluate(expr_context *ctx, const std::initializer_list &arguments) const +{ + // The stack machine's stack. The top of the stack is + // stack.back(). + // XXX This stack must be in target machine representation, + // since I see both (DW_OP_breg0 (eax): -28; DW_OP_stack_value) + // and (DW_OP_lit1; DW_OP_stack_value). + small_vector stack; + + // Create the initial stack. arguments are in reverse order + // (that is, element 0 is TOS), so reverse it. + stack.reserve(arguments.size()); + for (const taddr *elt = arguments.end() - 1; + elt >= arguments.begin(); elt--) + stack.push_back(*elt); + + // Create a subsection for just this expression so we can + // easily detect the end (including premature end). + auto cusec = cu->data(); + shared_ptr
subsec + (make_shared
(cusec->type, + cusec->begin + offset, len, + cusec->ord, cusec->fmt, + cusec->addr_size)); + cursor cur(subsec); + + // Prepare the expression result. Some location descriptions + // create the result directly, rather than using the top of + // stack. + expr_result result; + + // 2.6.1.1.4 Empty location descriptions + if (cur.end()) { + result.location_type = expr_result::type::empty; + result.value = 0; + return result; + } + + // Assume the result is an address for now and should be + // grabbed from the top of stack at the end. + result.location_type = expr_result::type::address; + + // Execute! + while (!cur.end()) { +#define CHECK() do { if (stack.empty()) goto underflow; } while (0) +#define CHECKN(n) do { if (stack.size() < n) goto underflow; } while (0) + union + { + uint64_t u; + int64_t s; + } tmp1, tmp2, tmp3; + static_assert(sizeof(tmp1) == sizeof(taddr), "taddr is not 64 bits"); + + // Tell GCC to warn us about missing switch cases, + // even though we have a default case. +#pragma GCC diagnostic push +#pragma GCC diagnostic warning "-Wswitch-enum" + DW_OP op = (DW_OP)cur.fixed(); + switch (op) { + // 2.5.1.1 Literal encodings + case DW_OP::lit0...DW_OP::lit31: + stack.push_back((unsigned)op - (unsigned)DW_OP::lit0); + break; + case DW_OP::addr: + stack.push_back(cur.address()); + break; + case DW_OP::const1u: + stack.push_back(cur.fixed()); + break; + case DW_OP::const2u: + stack.push_back(cur.fixed()); + break; + case DW_OP::const4u: + stack.push_back(cur.fixed()); + break; + case DW_OP::const8u: + stack.push_back(cur.fixed()); + break; + case DW_OP::const1s: + stack.push_back(cur.fixed()); + break; + case DW_OP::const2s: + stack.push_back(cur.fixed()); + break; + case DW_OP::const4s: + stack.push_back(cur.fixed()); + break; + case DW_OP::const8s: + stack.push_back(cur.fixed()); + break; + case DW_OP::constu: + stack.push_back(cur.uleb128()); + break; + case DW_OP::consts: + stack.push_back(cur.sleb128()); + break; + + // 2.5.1.2 Register based addressing + case DW_OP::fbreg: + // XXX + throw runtime_error("DW_OP_fbreg not implemented"); + case DW_OP::breg0...DW_OP::breg31: + tmp1.u = (unsigned)op - (unsigned)DW_OP::breg0; + tmp2.s = cur.sleb128(); + stack.push_back((int64_t)ctx->reg(tmp1.u) + tmp2.s); + break; + case DW_OP::bregx: + tmp1.u = cur.uleb128(); + tmp2.s = cur.sleb128(); + stack.push_back((int64_t)ctx->reg(tmp1.u) + tmp2.s); + break; + + // 2.5.1.3 Stack operations + case DW_OP::dup: + CHECK(); + stack.push_back(stack.back()); + break; + case DW_OP::drop: + CHECK(); + stack.pop_back(); + break; + case DW_OP::pick: + tmp1.u = cur.fixed(); + CHECKN(tmp1.u); + stack.push_back(stack.revat(tmp1.u)); + break; + case DW_OP::over: + CHECKN(2); + stack.push_back(stack.revat(1)); + break; + case DW_OP::swap: + CHECKN(2); + tmp1.u = stack.back(); + stack.back() = stack.revat(1); + stack.revat(1) = tmp1.u; + break; + case DW_OP::rot: + CHECKN(3); + tmp1.u = stack.back(); + stack.back() = stack.revat(1); + stack.revat(1) = stack.revat(2); + stack.revat(2) = tmp1.u; + break; + case DW_OP::deref: + tmp1.u = subsec->addr_size; + goto deref_common; + case DW_OP::deref_size: + tmp1.u = cur.fixed(); + if (tmp1.u > subsec->addr_size) + throw expr_error("DW_OP_deref_size operand exceeds address size"); + deref_common: + CHECK(); + stack.back() = ctx->deref_size(stack.back(), tmp1.u); + break; + case DW_OP::xderef: + tmp1.u = subsec->addr_size; + goto xderef_common; + case DW_OP::xderef_size: + tmp1.u = cur.fixed(); + if (tmp1.u > subsec->addr_size) + throw expr_error("DW_OP_xderef_size operand exceeds address size"); + xderef_common: + CHECKN(2); + tmp2.u = stack.back(); + stack.pop_back(); + stack.back() = ctx->xderef_size(tmp2.u, stack.back(), tmp1.u); + break; + case DW_OP::push_object_address: + // XXX + throw runtime_error("DW_OP_push_object_address not implemented"); + case DW_OP::form_tls_address: + CHECK(); + stack.back() = ctx->form_tls_address(stack.back()); + break; + case DW_OP::call_frame_cfa: + // XXX + throw runtime_error("DW_OP_call_frame_cfa not implemented"); + + // 2.5.1.4 Arithmetic and logical operations +#define UBINOP(binop) \ + do { \ + CHECKN(2); \ + tmp1.u = stack.back(); \ + stack.pop_back(); \ + tmp2.u = stack.back(); \ + stack.back() = tmp2.u binop tmp1.u; \ + } while (0) + case DW_OP::abs: + CHECK(); + tmp1.u = stack.back(); + if (tmp1.s < 0) + tmp1.s = -tmp1.s; + stack.back() = tmp1.u; + break; + case DW_OP::and_: + UBINOP(&); + break; + case DW_OP::div: + CHECKN(2); + tmp1.u = stack.back(); + stack.pop_back(); + tmp2.u = stack.back(); + tmp3.s = tmp1.s / tmp2.s; + stack.back() = tmp3.u; + break; + case DW_OP::minus: + UBINOP(-); + break; + case DW_OP::mod: + UBINOP(%); + break; + case DW_OP::mul: + UBINOP(*); + break; + case DW_OP::neg: + CHECK(); + tmp1.u = stack.back(); + tmp1.s = -tmp1.s; + stack.back() = tmp1.u; + break; + case DW_OP::not_: + CHECK(); + stack.back() = ~stack.back(); + break; + case DW_OP::or_: + UBINOP(|); + break; + case DW_OP::plus: + UBINOP(+); + break; + case DW_OP::plus_uconst: + tmp1.u = cur.uleb128(); + CHECK(); + stack.back() += tmp1.u; + break; + case DW_OP::shl: + CHECKN(2); + tmp1.u = stack.back(); + stack.pop_back(); + tmp2.u = stack.back(); + // C++ does not define what happens if you + // shift by more bits than the width of the + // type, so we handle this case specially + if (tmp1.u < sizeof(tmp2.u)*8) + stack.back() = tmp2.u << tmp1.u; + else + stack.back() = 0; + break; + case DW_OP::shr: + CHECKN(2); + tmp1.u = stack.back(); + stack.pop_back(); + tmp2.u = stack.back(); + // Same as above + if (tmp1.u < sizeof(tmp2.u)*8) + stack.back() = tmp2.u >> tmp1.u; + else + stack.back() = 0; + break; + case DW_OP::shra: + CHECKN(2); + tmp1.u = stack.back(); + stack.pop_back(); + tmp2.u = stack.back(); + // Shifting a negative number is + // implementation-defined in C++. + tmp3.u = (tmp2.s < 0); + if (tmp3.u) + tmp2.s = -tmp2.s; + if (tmp1.u < sizeof(tmp2.u)*8) + tmp2.u >>= tmp1.u; + else + tmp2.u = 0; + // DWARF implies that over-shifting a negative + // number should result in 0, not ~0. + if (tmp3.u) + tmp2.s = -tmp2.s; + stack.back() = tmp2.u; + break; + case DW_OP::xor_: + UBINOP(^); + break; +#undef UBINOP + + // 2.5.1.5 Control flow operations +#define SRELOP(relop) \ + do { \ + CHECKN(2); \ + tmp1.u = stack.back(); \ + stack.pop_back(); \ + tmp2.u = stack.back(); \ + stack.back() = (tmp2.s <= tmp1.s) ? 1 : 0; \ + } while (0) + case DW_OP::le: + SRELOP(<=); + break; + case DW_OP::ge: + SRELOP(>=); + break; + case DW_OP::eq: + SRELOP(==); + break; + case DW_OP::lt: + SRELOP(<); + break; + case DW_OP::gt: + SRELOP(>); + break; + case DW_OP::ne: + SRELOP(!=); + break; + case DW_OP::skip: + tmp1.s = cur.fixed(); + goto skip_common; + case DW_OP::bra: + tmp1.s = cur.fixed(); + CHECK(); + tmp2.u = stack.back(); + stack.pop_back(); + if (tmp2.u == 0) + break; + skip_common: + cur = cursor(subsec, (int64_t)cur.get_section_offset() + tmp1.s); + break; + case DW_OP::call2: + case DW_OP::call4: + case DW_OP::call_ref: + // XXX + throw runtime_error(to_string(op) + " not implemented"); +#undef SRELOP + + // 2.5.1.6 Special operations + case DW_OP::nop: + break; + + // 2.6.1.1.2 Register location descriptions + case DW_OP::reg0...DW_OP::reg31: + result.location_type = expr_result::type::reg; + result.value = (unsigned)op - (unsigned)DW_OP::reg0; + break; + case DW_OP::regx: + result.location_type = expr_result::type::reg; + result.value = cur.uleb128(); + break; + + // 2.6.1.1.3 Implicit location descriptions + case DW_OP::implicit_value: + result.location_type = expr_result::type::implicit; + result.implicit_len = cur.uleb128(); + cur.ensure(result.implicit_len); + result.implicit = cur.pos; + break; + case DW_OP::stack_value: + CHECK(); + result.location_type = expr_result::type::literal; + result.value = stack.back(); + break; + + // 2.6.1.2 Composite location descriptions + case DW_OP::piece: + case DW_OP::bit_piece: + // XXX + throw runtime_error(to_string(op) + " not implemented"); + + case DW_OP::lo_user...DW_OP::hi_user: + // XXX We could let the context evaluate this, + // but it would need access to the cursor. + throw expr_error("unknown user op " + to_string(op)); + + default: + throw expr_error("bad operation " + to_string(op)); + } +#pragma GCC diagnostic pop +#undef CHECK +#undef CHECKN + } + + if (result.location_type == expr_result::type::address) { + // The result type is still and address, so we should + // fetch it from the top of stack. + if (stack.empty()) + throw expr_error("final stack is empty; no result given"); + result.value = stack.back(); + } + + return result; + +underflow: + throw expr_error("stack underflow evaluating DWARF expression"); +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/dwarf/internal.hh b/3party/libelfin/dwarf/internal.hh new file mode 100644 index 0000000..42679ca --- /dev/null +++ b/3party/libelfin/dwarf/internal.hh @@ -0,0 +1,297 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#ifndef _DWARFPP_INTERNAL_HH_ +#define _DWARFPP_INTERNAL_HH_ + +#include "dwarf++.hh" +#include "../elf/to_hex.hh" + +#include +#include +#include +#include + +DWARFPP_BEGIN_NAMESPACE + +enum class format +{ + unknown, + dwarf32, + dwarf64 +}; + +enum class byte_order +{ + lsb, + msb +}; + +/** + * Return this system's native byte order. + */ +static inline byte_order +native_order() +{ + static const union + { + int i; + char c[sizeof(int)]; + } test = {1}; + + return test.c[0] == 1 ? byte_order::lsb : byte_order::msb; +} + +/** + * A single DWARF section or a slice of a section. This also tracks + * dynamic information necessary to decode values in this section. + */ +struct section +{ + section_type type; + const char *begin, *end; + const format fmt; + const byte_order ord; + unsigned addr_size; + + section(section_type type, const void *begin, + section_length length, + byte_order ord, format fmt = format::unknown, + unsigned addr_size = 0) + : type(type), begin((char*)begin), end((char*)begin + length), + fmt(fmt), ord(ord), addr_size(addr_size) { } + + section(const section &o) = default; + + std::shared_ptr
slice(section_offset start, section_length len, + format fmt = format::unknown, + unsigned addr_size = 0) + { + if (fmt == format::unknown) + fmt = this->fmt; + if (addr_size == 0) + addr_size = this->addr_size; + + return std::make_shared
( + type, begin+start, + std::min(len, (section_length)(end-begin)), + ord, fmt, addr_size); + } + + size_t size() const + { + return end - begin; + } +}; + +/** + * A cursor pointing into a DWARF section. Provides deserialization + * operations and bounds checking. + */ +struct cursor +{ + // XXX There's probably a lot of overhead to maintaining the + // shared pointer to the section from this. Perhaps the rule + // should be that all objects keep the dwarf::impl alive + // (directly or indirectly) and that keeps the loader alive, + // so a cursor just needs a regular section*. + + std::shared_ptr
sec; + const char *pos; + + cursor() + : pos(nullptr) { } + cursor(const std::shared_ptr
sec, section_offset offset = 0) + : sec(sec), pos(sec->begin + offset) { } + + /** + * Read a subsection. The cursor must be at an initial + * length. After, the cursor will point just past the end of + * the subsection. The returned section has the appropriate + * DWARF format and begins at the current location of the + * cursor (so this is usually followed by a + * skip_initial_length). + */ + std::shared_ptr
subsection(); + std::int64_t sleb128(); + section_offset offset(); + void string(std::string &out); + const char *cstr(size_t *size_out = nullptr); + + void + ensure(section_offset bytes) + { + if ((section_offset)(sec->end - pos) < bytes || pos >= sec->end) + underflow(); + } + + template + T fixed() + { + ensure(sizeof(T)); + static_assert(sizeof(T) <= 8, "T too big"); + uint64_t val = 0; + const unsigned char *p = (const unsigned char*)pos; + if (sec->ord == byte_order::lsb) { + for (unsigned i = 0; i < sizeof(T); i++) + val |= ((uint64_t)p[i]) << (i * 8); + } else { + for (unsigned i = 0; i < sizeof(T); i++) + val = (val << 8) | (uint64_t)p[i]; + } + pos += sizeof(T); + return (T)val; + } + + std::uint64_t uleb128() + { + // Appendix C + // XXX Pre-compute all two byte ULEB's + std::uint64_t result = 0; + int shift = 0; + while (pos < sec->end) { + uint8_t byte = *(uint8_t*)(pos++); + result |= (uint64_t)(byte & 0x7f) << shift; + if ((byte & 0x80) == 0) + return result; + shift += 7; + } + underflow(); + return 0; + } + + taddr address() + { + switch (sec->addr_size) { + case 1: + return fixed(); + case 2: + return fixed(); + case 4: + return fixed(); + case 8: + return fixed(); + default: + throw std::runtime_error("address size " + std::to_string(sec->addr_size) + " not supported"); + } + } + + void skip_initial_length(); + void skip_unit_type(); + void skip_form(DW_FORM form); + + cursor &operator+=(section_offset offset) + { + pos += offset; + return *this; + } + + cursor operator+(section_offset offset) const + { + return cursor(sec, pos + offset); + } + + bool operator<(const cursor &o) const + { + return pos < o.pos; + } + + bool end() const + { + return pos >= sec->end; + } + + bool valid() const + { + return !!pos; + } + + section_offset get_section_offset() const + { + return pos - sec->begin; + } + +private: + cursor(const std::shared_ptr
sec, const char *pos) + : sec(sec), pos(pos) { } + + void underflow(); +}; + +/** + * An attribute specification in an abbrev. + */ +struct attribute_spec +{ + DW_AT name; + DW_FORM form; + + // Computed information + value::type type; + + attribute_spec(DW_AT name, DW_FORM form); +}; + +typedef std::uint64_t abbrev_code; + +/** + * An entry in .debug_abbrev. + */ +struct abbrev_entry +{ + abbrev_code code; + DW_TAG tag; + bool children; + std::vector attributes; + + abbrev_entry() : code(0) { } + + bool read(cursor *cur); +}; + +/** + * A section header in .debug_pubnames or .debug_pubtypes. + */ +struct name_unit +{ + uhalf version; + section_offset debug_info_offset; + section_length debug_info_length; + // Cursor to the first name_entry in this unit. This cursor's + // section is limited to this unit. + cursor entries; + + void read(cursor *cur) + { + // Section 7.19 + std::shared_ptr
subsec = cur->subsection(); + cursor sub(subsec); + sub.skip_initial_length(); + version = sub.fixed(); + if (version != 2) + throw format_error("unknown name unit version " + std::to_string(version)); + debug_info_offset = sub.offset(); + debug_info_length = sub.offset(); + entries = sub; + } +}; + +/** + * An entry in a .debug_pubnames or .debug_pubtypes unit. + */ +struct name_entry +{ + section_offset offset; + std::string name; + + void read(cursor *cur) + { + offset = cur->offset(); + cur->string(name); + } +}; + +DWARFPP_END_NAMESPACE + +#endif diff --git a/3party/libelfin/dwarf/line.cc b/3party/libelfin/dwarf/line.cc new file mode 100644 index 0000000..d3cbc4e --- /dev/null +++ b/3party/libelfin/dwarf/line.cc @@ -0,0 +1,438 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "internal.hh" + +#include + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +// The expected number of arguments for standard opcodes. This is +// used to check the opcode_lengths header field for compatibility. +static const int opcode_lengths[] = { + 0, + // DW_LNS::copy + 0, 1, 1, 1, 1, + // DW_LNS::negate_stmt + 0, 0, 0, 1, 0, + // DW_LNS::set_epilogue_begin + 0, 1 +}; + +struct line_table::impl +{ + shared_ptr
sec; + + // Header information + section_offset program_offset; + ubyte minimum_instruction_length; + ubyte maximum_operations_per_instruction; + bool default_is_stmt; + sbyte line_base; + ubyte line_range; + ubyte opcode_base; + vector standard_opcode_lengths; + vector include_directories; + vector file_names; + + // The offset in sec following the last read file name entry. + // File name entries can appear both in the line table header + // and in the line number program itself. Since we can + // iterate over the line number program repeatedly, this keeps + // track of how far we've gotten so we don't add the same + // entry twice. + section_offset last_file_name_end; + // If an iterator has traversed the entire program, then we + // know we've gathered all file names. + bool file_names_complete; + + impl() : last_file_name_end(0), file_names_complete(false) {}; + + bool read_file_entry(cursor *cur, bool in_header); +}; + +line_table::line_table(const shared_ptr
&sec, section_offset offset, + unsigned cu_addr_size, const string &cu_comp_dir, + const string &cu_name) + : m(make_shared()) +{ + // XXX DWARF2 and 3 give a weird specification for DW_AT_comp_dir + + string comp_dir, abs_path; + if (cu_comp_dir.empty() || cu_comp_dir.back() == '/') + comp_dir = cu_comp_dir; + else + comp_dir = cu_comp_dir + '/'; + + // Read the line table header (DWARF2 section 6.2.4, DWARF3 + // section 6.2.4, DWARF4 section 6.2.3) + cursor cur(sec, offset); + m->sec = cur.subsection(); + cur = cursor(m->sec); + cur.skip_initial_length(); + m->sec->addr_size = cu_addr_size; + + // Basic header information + uhalf version = cur.fixed(); + if (version < 2 || version > 4) + throw format_error("unknown line number table version " + + std::to_string(version)); + section_length header_length = cur.offset(); + m->program_offset = cur.get_section_offset() + header_length; + m->minimum_instruction_length = cur.fixed(); + m->maximum_operations_per_instruction = 1; + if (version >= 4) + m->maximum_operations_per_instruction = cur.fixed(); + if (m->maximum_operations_per_instruction == 0) + throw format_error("maximum_operations_per_instruction cannot" + " be 0 in line number table"); + m->default_is_stmt = cur.fixed(); + m->line_base = cur.fixed(); + m->line_range = cur.fixed(); + if (m->line_range == 0) + throw format_error("line_range cannot be 0 in line number table"); + m->opcode_base = cur.fixed(); + + static_assert(sizeof(opcode_lengths) / sizeof(opcode_lengths[0]) == 13, + "opcode_lengths table has wrong length"); + + // Opcode length table + m->standard_opcode_lengths.resize(m->opcode_base); + m->standard_opcode_lengths[0] = 0; + for (unsigned i = 1; i < m->opcode_base; i++) { + ubyte length = cur.fixed(); + if (length != opcode_lengths[i]) + // The spec never says what to do if the + // opcode length of a standard opcode doesn't + // match the header. Do the safe thing. + throw format_error( + "expected " + + std::to_string(opcode_lengths[i]) + + " arguments for line number opcode " + + std::to_string(i) + ", got " + + std::to_string(length)); + m->standard_opcode_lengths[i] = length; + } + + // Include directories list + string incdir; + // Include directory 0 is implicitly the compilation unit + // current directory + m->include_directories.push_back(comp_dir); + while (true) { + cur.string(incdir); + if (incdir.empty()) + break; + if (incdir.back() != '/') + incdir += '/'; + if (incdir[0] == '/') + m->include_directories.push_back(move(incdir)); + else + m->include_directories.push_back(comp_dir + incdir); + } + + // File name list + string file_name; + // File name 0 is implicitly the compilation unit file name. + // cu_name can be relative to comp_dir or absolute. + if (!cu_name.empty() && cu_name[0] == '/') + m->file_names.emplace_back(cu_name); + else + m->file_names.emplace_back(comp_dir + cu_name); + while (m->read_file_entry(&cur, true)); +} + +line_table::iterator +line_table::begin() const +{ + if (!valid()) + return iterator(nullptr, 0); + return iterator(this, m->program_offset); +} + +line_table::iterator +line_table::end() const +{ + if (!valid()) + return iterator(nullptr, 0); + return iterator(this, m->sec->size()); +} + +line_table::iterator +line_table::find_address(taddr addr) const +{ + iterator prev = begin(), e = end(); + if (prev == e) + return prev; + + iterator it = prev; + for (++it; it != e; prev = it++) { + if (prev->address <= addr && it->address > addr && + !prev->end_sequence) + return prev; + } + prev = e; + return prev; +} + +const line_table::file * +line_table::get_file(unsigned index) const +{ + if (index >= m->file_names.size()) { + // It could be declared in the line table program. + // This is unlikely, so we don't have to be + // super-efficient about this. Just force our way + // through the whole line table program. + if (!m->file_names_complete) { + for (auto &ent : *this) + (void)ent; + } + if (index >= m->file_names.size()) + throw out_of_range + ("file name index " + std::to_string(index) + + " exceeds file table size of " + + std::to_string(m->file_names.size())); + } + return &m->file_names[index]; +} + +bool +line_table::impl::read_file_entry(cursor *cur, bool in_header) +{ + assert(cur->sec == sec); + + string file_name; + cur->string(file_name); + if (in_header && file_name.empty()) + return false; + uint64_t dir_index = cur->uleb128(); + uint64_t mtime = cur->uleb128(); + uint64_t length = cur->uleb128(); + + // Have we already processed this file entry? + if (cur->get_section_offset() <= last_file_name_end) + return true; + last_file_name_end = cur->get_section_offset(); + + if (file_name[0] == '/') + file_names.emplace_back(move(file_name), mtime, length); + else if (dir_index < include_directories.size()) + file_names.emplace_back( + include_directories[dir_index] + file_name, + mtime, length); + else + throw format_error("file name directory index out of range: " + + std::to_string(dir_index)); + + return true; +} + +line_table::file::file(string path, uint64_t mtime, uint64_t length) + : path(path), mtime(mtime), length(length) +{ +} + +void +line_table::entry::reset(bool is_stmt) +{ + address = op_index = 0; + file = nullptr; + file_index = line = 1; + column = 0; + this->is_stmt = is_stmt; + basic_block = end_sequence = prologue_end = epilogue_begin = false; + isa = discriminator = 0; +} + +string +line_table::entry::get_description() const +{ + string res = file->path; + if (line) { + res.append(":").append(std::to_string(line)); + if (column) + res.append(":").append(std::to_string(column)); + } + return res; +} + +line_table::iterator::iterator(const line_table *table, section_offset pos) + : table(table), pos(pos) +{ + if (table) { + regs.reset(table->m->default_is_stmt); + ++(*this); + } +} + +line_table::iterator & +line_table::iterator::operator++() +{ + cursor cur(table->m->sec, pos); + + // Execute opcodes until we reach the end of the stream or an + // opcode emits a line table row + bool stepped = false, output = false; + while (!cur.end() && !output) { + output = step(&cur); + stepped = true; + } + if (stepped && !output) + throw format_error("unexpected end of line table"); + if (stepped && cur.end()) { + // Record that all file names must be known now + table->m->file_names_complete = true; + } + if (output) { + // Resolve file name of entry + if (entry.file_index < table->m->file_names.size()) + entry.file = &table->m->file_names[entry.file_index]; + else + throw format_error("bad file index " + + std::to_string(entry.file_index) + + " in line table"); + } + + pos = cur.get_section_offset(); + return *this; +} + +bool +line_table::iterator::step(cursor *cur) +{ + struct line_table::impl *m = table->m.get(); + + // Read the opcode (DWARF4 section 6.2.3) + ubyte opcode = cur->fixed(); + if (opcode >= m->opcode_base) { + // Special opcode (DWARF4 section 6.2.5.1) + ubyte adjusted_opcode = opcode - m->opcode_base; + unsigned op_advance = adjusted_opcode / m->line_range; + signed line_inc = m->line_base + (signed)adjusted_opcode % m->line_range; + + regs.line += line_inc; + regs.address += m->minimum_instruction_length * + ((regs.op_index + op_advance) + / m->maximum_operations_per_instruction); + regs.op_index = (regs.op_index + op_advance) + % m->maximum_operations_per_instruction; + entry = regs; + + regs.basic_block = regs.prologue_end = + regs.epilogue_begin = false; + regs.discriminator = 0; + + return true; + } else if (opcode != 0) { + // Standard opcode (DWARF4 sections 6.2.3 and 6.2.5.2) + // + // According to the standard, any opcode between the + // highest defined opcode for a given DWARF version + // and opcode_base should be treated as a + // vendor-specific opcode. However, the de facto + // standard seems to be to process these as standard + // opcodes even if they're from a later version of the + // standard than the line table header claims. + uint64_t uarg; +#pragma GCC diagnostic push +#pragma GCC diagnostic warning "-Wswitch-enum" + switch ((DW_LNS)opcode) { + case DW_LNS::copy: + entry = regs; + regs.basic_block = regs.prologue_end = + regs.epilogue_begin = false; + regs.discriminator = 0; + break; + case DW_LNS::advance_pc: + // Opcode advance (as for special opcodes) + uarg = cur->uleb128(); + advance_pc: + regs.address += m->minimum_instruction_length * + ((regs.op_index + uarg) + / m->maximum_operations_per_instruction); + regs.op_index = (regs.op_index + uarg) + % m->maximum_operations_per_instruction; + break; + case DW_LNS::advance_line: + regs.line = (signed)regs.line + cur->sleb128(); + break; + case DW_LNS::set_file: + regs.file_index = cur->uleb128(); + break; + case DW_LNS::set_column: + regs.column = cur->uleb128(); + break; + case DW_LNS::negate_stmt: + regs.is_stmt = !regs.is_stmt; + break; + case DW_LNS::set_basic_block: + regs.basic_block = true; + break; + case DW_LNS::const_add_pc: + uarg = (255 - m->opcode_base) / m->line_range; + goto advance_pc; + case DW_LNS::fixed_advance_pc: + regs.address += cur->fixed(); + regs.op_index = 0; + break; + case DW_LNS::set_prologue_end: + regs.prologue_end = true; + break; + case DW_LNS::set_epilogue_begin: + regs.epilogue_begin = true; + break; + case DW_LNS::set_isa: + regs.isa = cur->uleb128(); + break; + default: + // XXX Vendor extensions + throw format_error("unknown line number opcode " + + to_string((DW_LNS)opcode)); + } + return ((DW_LNS)opcode == DW_LNS::copy); + } else { // opcode == 0 + // Extended opcode (DWARF4 sections 6.2.3 and 6.2.5.3) + assert(opcode == 0); + uint64_t length = cur->uleb128(); + section_offset end = cur->get_section_offset() + length; + opcode = cur->fixed(); + switch ((DW_LNE)opcode) { + case DW_LNE::end_sequence: + regs.end_sequence = true; + entry = regs; + regs.reset(m->default_is_stmt); + break; + case DW_LNE::set_address: + regs.address = cur->address(); + regs.op_index = 0; + break; + case DW_LNE::define_file: + m->read_file_entry(cur, false); + break; + case DW_LNE::set_discriminator: + // XXX Only DWARF4 + regs.discriminator = cur->uleb128(); + break; + case DW_LNE::lo_user...DW_LNE::hi_user: + // XXX Vendor extensions + throw runtime_error("vendor line number opcode " + + to_string((DW_LNE)opcode) + + " not implemented"); + default: + // XXX Prior to DWARF4, any opcode number + // could be a vendor extension + throw format_error("unknown line number opcode " + + to_string((DW_LNE)opcode)); + } +#pragma GCC diagnostic pop + if (cur->get_section_offset() > end) + throw format_error("extended line number opcode exceeded its size"); + cur += end - cur->get_section_offset(); + return ((DW_LNE)opcode == DW_LNE::end_sequence); + } +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/dwarf/rangelist.cc b/3party/libelfin/dwarf/rangelist.cc new file mode 100644 index 0000000..f9eb8e9 --- /dev/null +++ b/3party/libelfin/dwarf/rangelist.cc @@ -0,0 +1,103 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "internal.hh" + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +rangelist::rangelist(const std::shared_ptr
&sec, section_offset off, + unsigned cu_addr_size, taddr cu_low_pc) + : sec(sec->slice(off, ~0, format::unknown, cu_addr_size)), + base_addr(cu_low_pc) +{ +} + +rangelist::rangelist(const initializer_list > &ranges) +{ + synthetic.reserve(ranges.size() * 2 + 2); + for (auto &range : ranges) { + synthetic.push_back(range.first); + synthetic.push_back(range.second); + } + synthetic.push_back(0); + synthetic.push_back(0); + + sec = make_shared
( + section_type::ranges, (const char*)synthetic.data(), + synthetic.size() * sizeof(taddr), + native_order(), format::unknown, sizeof(taddr)); + + base_addr = 0; +} + +rangelist::iterator +rangelist::begin() const +{ + if (sec) + return iterator(sec, base_addr); + return end(); +} + +rangelist::iterator +rangelist::end() const +{ + return iterator(); +} + +bool +rangelist::contains(taddr addr) const +{ + for (auto ent : *this) + if (ent.contains(addr)) + return true; + return false; +} + +rangelist::iterator::iterator(const std::shared_ptr
&sec, taddr base_addr) + : sec(sec), base_addr(base_addr), pos(0) +{ + // Read in the first entry + ++(*this); +} + +rangelist::iterator & +rangelist::iterator::operator++() +{ + // DWARF4 section 2.17.3 + taddr largest_offset = ~(taddr)0; + if (sec->addr_size < sizeof(taddr)) + largest_offset += 1 << (8 * sec->addr_size); + + // Read in entries until we reach a regular entry of an + // end-of-list. Note that pos points to the beginning of the + // entry *following* the current entry, so that's where we + // start. + cursor cur(sec, pos); + while (true) { + entry.low = cur.address(); + entry.high = cur.address(); + + if (entry.low == 0 && entry.high == 0) { + // End of list + sec.reset(); + pos = 0; + break; + } else if (entry.low == largest_offset) { + // Base address change + base_addr = entry.high; + } else { + // Regular entry. Adjust by base address. + entry.low += base_addr; + entry.high += base_addr; + pos = cur.get_section_offset(); + break; + } + } + + return *this; +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/dwarf/small_vector.hh b/3party/libelfin/dwarf/small_vector.hh new file mode 100644 index 0000000..e73ab60 --- /dev/null +++ b/3party/libelfin/dwarf/small_vector.hh @@ -0,0 +1,197 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#ifndef _DWARFPP_SMALL_VECTOR_HH_ +#define _DWARFPP_SMALL_VECTOR_HH_ + +DWARFPP_BEGIN_NAMESPACE + +/** + * A vector-like class that only heap allocates above a specified + * size. + */ +template +class small_vector +{ +public: + typedef T value_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + + small_vector() + : base((T*)buf), end(base), cap((T*)&buf[sizeof(T[Min])]) + { + } + + small_vector(const small_vector &o) + : base((T*)buf), end(base), cap((T*)&buf[sizeof(T[Min])]) + { + *this = o; + } + + small_vector(small_vector &&o) + : base((T*)buf), end(base), cap((T*)&buf[sizeof(T[Min])]) + { + if ((char*)o.base == o.buf) { + // Elements are inline; have to copy them + base = (T*)buf; + end = base; + cap = (T*)&buf[sizeof(T[Min])]; + + *this = o; + o.clear(); + } else { + // Elements are external; swap pointers + base = o.base; + end = o.end; + cap = o.cap; + + o.base = (T*)o.buf; + o.end = o.base; + o.cap = (T*)&o.buf[sizeof(T[Min])]; + } + } + + ~small_vector() + { + clear(); + if ((char*)base != buf) + delete[] (char*)base; + } + + small_vector &operator=(const small_vector &o) + { + size_type osize = o.size(); + clear(); + reserve(osize); + for (size_type i = 0; i < osize; i++) + new (&base[i]) T(o[i]); + end = base + osize; + return *this; + } + + size_type size() const + { + return end - base; + } + + bool empty() const + { + return base == end; + } + + void reserve(size_type n) + { + if (n <= (size_type)(cap - base)) + return; + + size_type target = cap - base; + if (target == 0) + target = 1; + while (target < n) + target <<= 1; + + char *newbuf = new char[sizeof(T[target])]; + T *src = base, *dest = (T*)newbuf; + for (; src < end; src++, dest++) { + new(dest) T(*src); + dest->~T(); + } + if ((char*)base != buf) + delete[] (char*)base; + base = (T*)newbuf; + end = dest; + cap = base + target; + } + + reference operator[](size_type n) + { + return base[n]; + } + + const_reference operator[](size_type n) const + { + return base[n]; + } + + reference at(size_type n) + { + return base[n]; + } + + const_reference at(size_type n) const + { + return base[n]; + } + + /** + * "Reverse at". revat(0) is equivalent to back(). revat(1) + * is the element before back. Etc. + */ + reference revat(size_type n) + { + return *(end - 1 - n); + } + + const_reference revat(size_type n) const + { + return *(end - 1 - n); + } + + reference front() + { + return base[0]; + } + + const_reference front() const + { + return base[0]; + } + + reference back() + { + return *(end-1); + } + + const_reference back() const + { + return *(end-1); + } + + void push_back(const T& x) + { + reserve(size() + 1); + new (end) T(x); + end++; + } + + void push_back(T&& x) + { + reserve(size() + 1); + new (end) T(std::move(x)); + end++; + } + + void pop_back() + { + end--; + end->~T(); + } + + void clear() + { + for (T* p = base; p < end; ++p) + p->~T(); + end = base; + } + +private: + char buf[sizeof(T[Min])]; + T *base, *end, *cap; +}; + +DWARFPP_END_NAMESPACE + +#endif diff --git a/3party/libelfin/dwarf/value.cc b/3party/libelfin/dwarf/value.cc new file mode 100644 index 0000000..2ab6431 --- /dev/null +++ b/3party/libelfin/dwarf/value.cc @@ -0,0 +1,336 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "internal.hh" + +#include + +using namespace std; + +DWARFPP_BEGIN_NAMESPACE + +value::value(const unit *cu, + DW_AT name, DW_FORM form, type typ, section_offset offset) + : cu(cu), form(form), typ(typ), offset(offset) { + if (form == DW_FORM::indirect) + resolve_indirect(name); +} + +section_offset +value::get_section_offset() const +{ + return cu->get_section_offset() + offset; +} + +taddr +value::as_address() const +{ + if (form != DW_FORM::addr) + throw value_type_mismatch("cannot read " + to_string(typ) + " as address"); + + cursor cur(cu->data(), offset); + return cur.address(); +} + +const void * +value::as_block(size_t *size_out) const +{ + // XXX Blocks can contain all sorts of things, including + // references, which couldn't be resolved by callers in the + // current minimal API. + cursor cur(cu->data(), offset); + switch (form) { + case DW_FORM::block1: + *size_out = cur.fixed(); + break; + case DW_FORM::block2: + *size_out = cur.fixed(); + break; + case DW_FORM::block4: + *size_out = cur.fixed(); + break; + case DW_FORM::block: + case DW_FORM::exprloc: + *size_out = cur.uleb128(); + break; + default: + throw value_type_mismatch("cannot read " + to_string(typ) + " as block"); + } + cur.ensure(*size_out); + return cur.pos; +} + +uint64_t +value::as_uconstant() const +{ + cursor cur(cu->data(), offset); + switch (form) { + case DW_FORM::data1: + return cur.fixed(); + case DW_FORM::data2: + return cur.fixed(); + case DW_FORM::data4: + return cur.fixed(); + case DW_FORM::data8: + return cur.fixed(); + case DW_FORM::udata: + return cur.uleb128(); + default: + throw value_type_mismatch("cannot read " + to_string(typ) + " as uconstant"); + } +} + +int64_t +value::as_sconstant() const +{ + cursor cur(cu->data(), offset); + switch (form) { + case DW_FORM::data1: + return cur.fixed(); + case DW_FORM::data2: + return cur.fixed(); + case DW_FORM::data4: + return cur.fixed(); + case DW_FORM::data8: + return cur.fixed(); + case DW_FORM::sdata: + return cur.sleb128(); + default: + throw value_type_mismatch("cannot read " + to_string(typ) + " as sconstant"); + } +} + +expr +value::as_exprloc() const +{ + cursor cur(cu->data(), offset); + size_t size; + // Prior to DWARF 4, exprlocs were encoded as blocks. + switch (form) { + case DW_FORM::exprloc: + case DW_FORM::block: + size = cur.uleb128(); + break; + case DW_FORM::block1: + size = cur.fixed(); + break; + case DW_FORM::block2: + size = cur.fixed(); + break; + case DW_FORM::block4: + size = cur.fixed(); + break; + default: + throw value_type_mismatch("cannot read " + to_string(typ) + " as exprloc"); + } + return expr(cu, cur.get_section_offset(), size); +} + +bool +value::as_flag() const +{ + switch (form) { + case DW_FORM::flag: { + cursor cur(cu->data(), offset); + return cur.fixed() != 0; + } + case DW_FORM::flag_present: + return true; + default: + throw value_type_mismatch("cannot read " + to_string(typ) + " as flag"); + } +} + +rangelist +value::as_rangelist() const +{ + section_offset off = as_sec_offset(); + + // The compilation unit may not have a base address. In this + // case, the first entry in the range list must be a base + // address entry, but we'll just assume 0 for the initial base + // address. + die cudie = cu->root(); + taddr cu_low_pc = cudie.has(DW_AT::low_pc) ? at_low_pc(cudie) : 0; + auto sec = cu->get_dwarf().get_section(section_type::ranges); + auto cusec = cu->data(); + return rangelist(sec, off, cusec->addr_size, cu_low_pc); +} + +die +value::as_reference() const +{ + section_offset off; + // XXX Would be nice if we could avoid this. The cursor is + // all overhead here. + cursor cur(cu->data(), offset); + switch (form) { + case DW_FORM::ref1: + off = cur.fixed(); + break; + case DW_FORM::ref2: + off = cur.fixed(); + break; + case DW_FORM::ref4: + off = cur.fixed(); + break; + case DW_FORM::ref8: + off = cur.fixed(); + break; + case DW_FORM::ref_udata: + off = cur.uleb128(); + break; + + case DW_FORM::ref_addr: { + off = cur.offset(); + // These seem to be extremely rare in practice (I + // haven't been able to get gcc to produce a + // ref_addr), so it's not worth caching this lookup. + const compilation_unit *base_cu = nullptr; + for (auto &file_cu : cu->get_dwarf().compilation_units()) { + if (file_cu.get_section_offset() > off) + break; + base_cu = &file_cu; + } + die d(base_cu); + d.read(off - base_cu->get_section_offset()); + return d; + } + + case DW_FORM::ref_sig8: { + uint64_t sig = cur.fixed(); + try { + return cu->get_dwarf().get_type_unit(sig).type(); + } catch (std::out_of_range &e) { + throw format_error("unknown type signature 0x" + to_hex(sig)); + } + } + + default: + throw value_type_mismatch("cannot read " + to_string(typ) + " as reference"); + } + + die d(cu); + d.read(off); + return d; +} + +void +value::as_string(string &buf) const +{ + size_t size; + const char *p = as_cstr(&size); + buf.resize(size); + memmove(&buf.front(), p, size); +} + +string +value::as_string() const +{ + size_t size; + const char *s = as_cstr(&size); + return string(s, size); +} + +const char * +value::as_cstr(size_t *size_out) const +{ + cursor cur(cu->data(), offset); + switch (form) { + case DW_FORM::string: + return cur.cstr(size_out); + case DW_FORM::strp: { + section_offset off = cur.offset(); + cursor scur(cu->get_dwarf().get_section(section_type::str), off); + return scur.cstr(size_out); + } + default: + throw value_type_mismatch("cannot read " + to_string(typ) + " as string"); + } +} + +section_offset +value::as_sec_offset() const +{ + // Prior to DWARF 4, sec_offsets were encoded as data4 or + // data8. + cursor cur(cu->data(), offset); + switch (form) { + case DW_FORM::data4: + return cur.fixed(); + case DW_FORM::data8: + return cur.fixed(); + case DW_FORM::sec_offset: + return cur.offset(); + default: + throw value_type_mismatch("cannot read " + to_string(typ) + " as sec_offset"); + } +} + +void +value::resolve_indirect(DW_AT name) +{ + if (form != DW_FORM::indirect) + return; + + cursor c(cu->data(), offset); + DW_FORM form; + do { + form = (DW_FORM)c.uleb128(); + } while (form == DW_FORM::indirect); + typ = attribute_spec(name, form).type; + offset = c.get_section_offset(); +} + +string +to_string(const value &v) +{ + switch (v.get_type()) { + case value::type::invalid: + return ""; + case value::type::address: + return "0x" + to_hex(v.as_address()); + case value::type::block: { + size_t size; + const char *b = (const char*)v.as_block(&size); + string res = ::to_string(size) + " byte block:"; + for (size_t pos = 0; pos < size; ++pos) { + res += ' '; + res += to_hex(b[pos]); + } + return res; + } + case value::type::constant: + return "0x" + to_hex(v.as_uconstant()); + case value::type::uconstant: + return ::to_string(v.as_uconstant()); + case value::type::sconstant: + return ::to_string(v.as_sconstant()); + case value::type::exprloc: + // XXX + return ""; + case value::type::flag: + return v.as_flag() ? "true" : "false"; + case value::type::line: + return ""; + case value::type::loclist: + return ""; + case value::type::mac: + return ""; + case value::type::rangelist: + return ""; + case value::type::reference: { + die d = v.as_reference(); + auto tu = dynamic_cast(&d.get_unit()); + if (tu) + return "<.debug_types+0x" + to_hex(d.get_section_offset()) + ">"; + return "<0x" + to_hex(d.get_section_offset()) + ">"; + } + case value::type::string: + return v.as_string(); + } + return ""; +} + +DWARFPP_END_NAMESPACE diff --git a/3party/libelfin/elf/.gitignore b/3party/libelfin/elf/.gitignore new file mode 100644 index 0000000..0166efc --- /dev/null +++ b/3party/libelfin/elf/.gitignore @@ -0,0 +1,6 @@ +*.o +to_string.cc +libelf++.a +libelf++.so +libelf++.so.* +libelf++.pc diff --git a/3party/libelfin/elf/common.hh b/3party/libelfin/elf/common.hh new file mode 100644 index 0000000..38b8f10 --- /dev/null +++ b/3party/libelfin/elf/common.hh @@ -0,0 +1,109 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#ifndef _ELFPP_COMMON_HH_ +#define _ELFPP_COMMON_HH_ + +#define ELFPP_BEGIN_NAMESPACE namespace elf { +#define ELFPP_END_NAMESPACE } +#define ELFPP_BEGIN_INTERNAL namespace internal { +#define ELFPP_END_INTERNAL } + +#include + +ELFPP_BEGIN_NAMESPACE + +/** + * A byte ordering. + */ +enum class byte_order +{ + native, + lsb, + msb +}; + +/** + * Return either byte_order::lsb or byte_order::msb. If the argument + * is byte_order::native, it will be resolved to whatever the native + * byte order is. + */ +static inline byte_order +resolve_order(byte_order o) +{ + static const union + { + int i; + char c[sizeof(int)]; + } test = {1}; + + if (o == byte_order::native) + return test.c[0] == 1 ? byte_order::lsb : byte_order::msb; + return o; +} + +/** + * Return v converted from one byte order to another. + */ +template +T +swizzle(T v, byte_order from, byte_order to) +{ + static_assert(sizeof(T) == 1 || + sizeof(T) == 2 || + sizeof(T) == 4 || + sizeof(T) == 8, + "cannot swizzle type"); + + from = resolve_order(from); + to = resolve_order(to); + + if (from == to) + return v; + + switch (sizeof(T)) { + case 1: + return v; + case 2: { + std::uint16_t x = (std::uint16_t)v; + return (T)(((x&0xFF) << 8) | (x >> 8)); + } + case 4: + return (T)__builtin_bswap32((std::uint32_t)v); + case 8: + return (T)__builtin_bswap64((std::uint64_t)v); + } +} + +ELFPP_BEGIN_INTERNAL + +/** + * OrderPick selects between Native, LSB, and MSB based on ord. + */ +template +struct OrderPick; + +template +struct OrderPick +{ + typedef Native T; +}; + +template +struct OrderPick +{ + typedef LSB T; +}; + +template +struct OrderPick +{ + typedef MSB T; +}; + +ELFPP_END_INTERNAL + +ELFPP_END_NAMESPACE + +#endif diff --git a/3party/libelfin/elf/data.hh b/3party/libelfin/elf/data.hh new file mode 100644 index 0000000..4a60944 --- /dev/null +++ b/3party/libelfin/elf/data.hh @@ -0,0 +1,574 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#ifndef _ELFPP_DATA_HH_ +#define _ELFPP_DATA_HH_ + +#include "common.hh" + +#include +#include +#include + +ELFPP_BEGIN_NAMESPACE + +// Object file classes (ELF64 table 3) +enum class elfclass : unsigned char +{ + _32 = 1, // 32-bit objects + _64 = 2, // 64-bit objects +}; + +std::string +to_string(elfclass v); + +// Common basic data types +struct ElfTypes +{ + typedef std::uint16_t Half; + typedef std::uint32_t Word; + typedef std::int32_t Sword; +}; + +struct Elf32 : public ElfTypes +{ + // ELF class + static const elfclass cls = elfclass::_32; + + // Basic data types (ELF32 figure 1-2) + typedef std::uint32_t Addr; + typedef std::uint32_t Off; + + // Predicated types + typedef Word Word32_Xword64; + + template + struct pick + { + typedef t32 t; + }; +}; + +struct Elf64 : ElfTypes +{ + // ELF class + static const elfclass cls = elfclass::_64; + + // Basic data types (ELF64 table 1) + typedef std::uint64_t Addr; + typedef std::uint64_t Off; + typedef std::uint64_t Xword; + typedef std::int64_t Sxword; + + // Predicated types + typedef Xword Word32_Xword64; + + template + struct pick + { + typedef t64 t; + }; +}; + +// Data encodings (ELF64 table 4) +enum class elfdata : unsigned char +{ + lsb = 1, + msb = 2, +}; + +std::string +to_string(elfdata v); + +// Operating system and ABI identifiers (ELF64 table 5) +enum class elfosabi : unsigned char +{ + sysv = 0, + hpux = 1, + standalone = 255, +}; + +std::string +to_string(elfosabi v); + +// Object file types (ELF64 table 6) +enum class et : ElfTypes::Half +{ + none = 0, // No file type + rel = 1, // Relocatable object file + exec = 2, // Executable file + dyn = 3, // Shared object file + core = 4, // Core file + loos = 0xfe00, // Environment-specific use + hios = 0xfeff, + loproc = 0xff00, // Processor-specific use + hiproc = 0xffff, +}; + +std::string +to_string(et v); + +// ELF header (ELF32 figure 1-3, ELF64 figure 2) +template +struct Ehdr +{ + typedef E types; + static const byte_order order = Order; + + // ELF identification + unsigned char ei_magic[4]; + elfclass ei_class; + elfdata ei_data; + unsigned char ei_version; + elfosabi ei_osabi; + unsigned char ei_abiversion; + unsigned char ei_pad[7]; + + et type; // Object file type + typename E::Half machine; // Machine type + typename E::Word version; // Object file version + typename E::Addr entry; // Entry point address + typename E::Off phoff; // Program header offset + typename E::Off shoff; // Section header offset + typename E::Word flags; // Processor-specific flags + typename E::Half ehsize; // ELF header size + typename E::Half phentsize; // Size of program header entry + typename E::Half phnum; // Number of program header entries + typename E::Half shentsize; // Size of section header entry + typename E::Half shnum; // Number of section header entries + typename E::Half shstrndx; // Section name string table index + + template + void from(const E2 &o) + { + std::memcpy(ei_magic, o.ei_magic, sizeof(ei_magic)); + ei_class = swizzle(o.ei_class, o.order, order); + ei_data = swizzle(o.ei_data, o.order, order); + ei_version = swizzle(o.ei_version, o.order, order); + ei_osabi = swizzle(o.ei_osabi, o.order, order); + ei_abiversion = swizzle(o.ei_abiversion, o.order, order); + std::memcpy(ei_pad, o.ei_pad, sizeof(ei_pad)); + + type = swizzle(o.type, o.order, order); + machine = swizzle(o.machine, o.order, order); + version = swizzle(o.version, o.order, order); + entry = swizzle(o.entry, o.order, order); + phoff = swizzle(o.phoff, o.order, order); + shoff = swizzle(o.shoff, o.order, order); + flags = swizzle(o.flags, o.order, order); + ehsize = swizzle(o.ehsize, o.order, order); + phentsize = swizzle(o.phentsize, o.order, order); + phnum = swizzle(o.phnum, o.order, order); + shentsize = swizzle(o.shentsize, o.order, order); + shnum = swizzle(o.shnum, o.order, order); + shstrndx = swizzle(o.shstrndx, o.order, order); + } +}; + +// Special section indices (ELF32 figure 1-7, ELF64 table 7) +// +// This is an integer with a few special values, so this is a regular +// enum, rather than a type-safe enum. However, this is declared in a +// namespace and then used to avoid polluting the elf:: namespace. +namespace enums { +enum shn : ElfTypes::Half // This is a Word in Shdr and Half in Sym. +{ + undef = 0, // Undefined or meaningless + + loproc = 0xff00, // Processor-specific use + hiproc = 0xff1f, + loos = 0xff20, // Environment-specific use + hios = 0xff3f, + + abs = 0xfff1, // Reference is an absolute value + common = 0xfff2, // Symbol declared as a common block +}; + +std::string +to_string(shn v); +} + +using enums::shn; + +// Section types (ELF64 table 8) +enum class sht : ElfTypes::Word +{ + null = 0, // Marks an unseen section header + progbits = 1, // Contains information defined by the program + symtab = 2, // Contains a linker symbol table + strtab = 3, // Contains a string table + rela = 4, // Contains "Rela" type relocation entries + hash = 5, // Contains a symbol hash table + dynamic = 6, // Contains dynamic linking tables + note = 7, // Contains note information + nobits = 8, // Contains uninitialized space; + // does not occupy any space in the file + rel = 9, // Contains "Rel" type relocation entries + shlib = 10, // Reserved + dynsym = 11, // Contains a dynamic loader symbol table + loos = 0x60000000, // Environment-specific use + hios = 0x6FFFFFFF, + loproc = 0x70000000, // Processor-specific use + hiproc = 0x7FFFFFFF, +}; + +std::string +to_string(sht v); + +// Section attributes (ELF64 table 9). Note: This is an Elf32_Word in +// ELF32. We use the larger ELF64 type for the canonical +// representation and switch it out for a plain Elf32_Word in the +// ELF32 format. +enum class shf : Elf64::Xword +{ + write = 0x1, // Section contains writable data + alloc = 0x2, // Section is allocated in memory image of program + execinstr = 0x4, // Section contains executable instructions + maskos = 0x0F000000, // Environment-specific use + maskproc = 0xF0000000, // Processor-specific use +}; + +std::string +to_string(shf v); + +static inline constexpr shf operator&(shf a, shf b) +{ + return (shf)((std::uint64_t)a & (std::uint64_t)b); +} + +static inline constexpr shf operator|(shf a, shf b) +{ + return (shf)((std::uint64_t)a | (std::uint64_t)b); +} + +static inline constexpr shf operator^(shf a, shf b) +{ + return (shf)((std::uint64_t)a ^ (std::uint64_t)b); +} + +static inline constexpr shf operator~(shf a) +{ + return (shf)~((std::uint64_t)a); +} + +static inline shf& operator&=(shf &a, shf b) +{ + a = a & b; + return a; +} + +static inline shf& operator|=(shf &a, shf b) +{ + a = a | b; + return a; +} + +static inline shf& operator^=(shf &a, shf b) +{ + a = a ^ b; + return a; +} + +// Section header (ELF32 figure 1-8, ELF64 figure 3) +template +struct Shdr +{ + typedef E types; + static const byte_order order = Order; + // Section numbers are half-words in some structures and full + // words in others. Here we declare a local shn type that is + // elf::shn for the native byte order, but the full word for + // specific encoding byte orders. + typedef typename internal::OrderPick::T shn; + + typename E::Word name; // Section name + sht type; // Section type + typename E::template pick::t flags; // Section attributes + typename E::Addr addr; // Virtual address in memory + typename E::Off offset; // Offset in file + typename E::Word32_Xword64 size; // Size of section + shn link; // Link to other section + typename E::Word info; // Miscellaneous information + typename E::Word32_Xword64 addralign; // Address alignment boundary + typename E::Word32_Xword64 entsize; // Size of entries, if section has table + + template + void from(const E2 &o) + { + name = swizzle(o.name, o.order, order); + type = swizzle(o.type, o.order, order); + flags = (decltype(flags))swizzle(o.flags, o.order, order); + addr = swizzle(o.addr, o.order, order); + offset = swizzle(o.offset, o.order, order); + size = swizzle(o.size, o.order, order); + link = (decltype(link))swizzle((typename E::Word)o.link, o.order, order); + info = swizzle(o.info, o.order, order); + addralign = swizzle(o.addralign, o.order, order); + entsize = swizzle(o.entsize, o.order, order); + } +}; + +// Segment types (ELF64 table 16) +enum class pt : ElfTypes::Word +{ + null = 0, // Unused entry + load = 1, // Loadable segment + dynamic = 2, // Dynamic linking tables + interp = 3, // Program interpreter path name + note = 4, // Note sections + shlib = 5, // Reserved + phdr = 6, // Program header table + loos = 0x60000000, // Environment-specific use + hios = 0x6FFFFFFF, + loproc = 0x70000000, // Processor-specific use + hiproc = 0x7FFFFFFF, +}; + +std::string +to_string(pt v); + +// Segment attributes +enum class pf : ElfTypes::Word +{ + x = 0x1, // Execute permission + w = 0x2, // Write permission + r = 0x4, // Read permission + maskos = 0x00FF0000, // Environment-specific use + maskproc = 0xFF000000, // Processor-specific use +}; + +std::string +to_string(pf v); + +static inline constexpr pf operator&(pf a, pf b) +{ + return (pf)((std::uint64_t)a & (std::uint64_t)b); +} + +static inline constexpr pf operator|(pf a, pf b) +{ + return (pf)((std::uint64_t)a | (std::uint64_t)b); +} + +static inline constexpr pf operator^(pf a, pf b) +{ + return (pf)((std::uint64_t)a ^ (std::uint64_t)b); +} + +static inline constexpr pf operator~(pf a) +{ + return (pf)~((std::uint64_t)a); +} + +static inline pf& operator&=(pf &a, pf b) +{ + a = a & b; + return a; +} + +static inline pf& operator|=(pf &a, pf b) +{ + a = a | b; + return a; +} + +static inline pf& operator^=(pf &a, pf b) +{ + a = a ^ b; + return a; +} + +// Program header (ELF32 figure 2-1, ELF64 figure 6) +template +struct Phdr; + +template +struct Phdr +{ + typedef Elf32 types; + static const byte_order order = Order; + + pt type; // Type of segment + Elf32::Off offset; // Offset in file + Elf32::Addr vaddr; // Virtual address in memory + Elf32::Addr paddr; // Reserved + Elf32::Word filesz; // Size of segment in file + Elf32::Word memsz; // Size of segment in memory + pf flags; // Segment attributes + Elf32::Word align; // Alignment of segment + + template + void from(const E2 &o) + { + type = swizzle(o.type, o.order, order); + offset = swizzle(o.offset, o.order, order); + vaddr = swizzle(o.vaddr, o.order, order); + paddr = swizzle(o.paddr, o.order, order); + filesz = swizzle(o.filesz, o.order, order); + memsz = swizzle(o.memsz, o.order, order); + flags = swizzle(o.flags, o.order, order); + align = swizzle(o.align, o.order, order); + } +}; + +template +struct Phdr +{ + typedef Elf64 types; + static const byte_order order = Order; + + pt type; // Type of segment + pf flags; // Segment attributes + Elf64::Off offset; // Offset in file + Elf64::Addr vaddr; // Virtual address in memory + Elf64::Addr paddr; // Reserved + Elf64::Xword filesz; // Size of segment in file + Elf64::Xword memsz; // Size of segment in memory + Elf64::Xword align; // Alignment of segment + + template + void from(const E2 &o) + { + type = swizzle(o.type, o.order, order); + offset = swizzle(o.offset, o.order, order); + vaddr = swizzle(o.vaddr, o.order, order); + paddr = swizzle(o.paddr, o.order, order); + filesz = swizzle(o.filesz, o.order, order); + memsz = swizzle(o.memsz, o.order, order); + flags = swizzle(o.flags, o.order, order); + align = swizzle(o.align, o.order, order); + } +}; + +// Symbol bindings (ELF32 figure 1-16, ELF64 table 14) +enum class stb : unsigned char +{ + local = 0, // Not visible outside the object file + global = 1, // Global symbol + weak = 2, // Global scope, but with lower + // precedence than global symbols + loos = 10, // Environment-specific use + hios = 12, + loproc = 13, // Processor-specific use + hiproc = 15, +}; + +std::string +to_string(stb v); + +// Symbol types (ELF32 figure 1-17, ELF64 table 15) +enum class stt : unsigned char +{ + notype = 0, // No type (e.g., absolute symbol) + object = 1, // Data object + func = 2, // Function entry point + section = 3, // Symbol is associated with a section + file = 4, // Source file associated with the + // object file + loos = 10, // Environment-specific use + hios = 12, + loproc = 13, // Processor-specific use + hiproc = 15, +}; + +std::string +to_string(stt v); + +// Symbol table (ELF32 figure 1-15, ELF64 figure 4) +template +struct Sym; + +template +struct Sym +{ + typedef Elf32 types; + static const byte_order order = Order; + + Elf32::Word name; // Symbol name (strtab offset) + Elf32::Addr value; // Symbol value (address) + Elf32::Word size; // Size of object + unsigned char info; // Type and binding attributes + unsigned char other; // Reserved + shn shnxd; // Section table index + + template + void from(const E2 &o) + { + name = swizzle(o.name, o.order, order); + value = swizzle(o.value, o.order, order); + size = swizzle(o.size, o.order, order); + info = o.info; + other = o.other; + shnxd = swizzle(o.shnxd, o.order, order); + } + + stb binding() const + { + return (stb)(info >> 4); + } + + void set_binding(stb v) + { + info = (info & 0x0F) | ((unsigned char)v << 4); + } + + stt type() const + { + return (stt)(info & 0xF); + } + + void set_type(stt v) + { + info = (info & 0xF0) | (unsigned char)v; + } +}; + +template +struct Sym +{ + typedef Elf64 types; + static const byte_order order = Order; + + Elf64::Word name; // Symbol name (strtab offset) + unsigned char info; // Type and binding attributes + unsigned char other; // Reserved + shn shnxd; // Section table index + Elf64::Addr value; // Symbol value (address) + Elf64::Xword size; // Size of object + + template + void from(const E2 &o) + { + name = swizzle(o.name, o.order, order); + value = swizzle(o.value, o.order, order); + size = swizzle(o.size, o.order, order); + info = o.info; + other = o.other; + shnxd = swizzle(o.shnxd, o.order, order); + } + + stb binding() const + { + return (stb)(info >> 4); + } + + void set_binding(stb v) + { + info = (info & 0xF) | ((unsigned char)v << 4); + } + + stt type() const + { + return (stt)(info & 0xF); + } + + void set_type(stt v) + { + info = (info & 0xF0) | (unsigned char)v; + } +}; + +ELFPP_END_NAMESPACE + +#endif diff --git a/3party/libelfin/elf/elf++.hh b/3party/libelfin/elf/elf++.hh new file mode 100644 index 0000000..ee59ed0 --- /dev/null +++ b/3party/libelfin/elf/elf++.hh @@ -0,0 +1,454 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#ifndef _ELFPP_HH_ +#define _ELFPP_HH_ + +#include "common.hh" +#include "data.hh" + +#include +#include +#include +#include + +ELFPP_BEGIN_NAMESPACE + +class elf; +class loader; +class section; +class strtab; +class symtab; +class segment; +// XXX Audit for binary compatibility + +// XXX Segments, other section types + +/** + * An exception indicating malformed ELF data. + */ +class format_error : public std::runtime_error +{ +public: + explicit format_error(const std::string &what_arg) + : std::runtime_error(what_arg) { } + explicit format_error(const char *what_arg) + : std::runtime_error(what_arg) { } +}; + +/** + * An ELF file. + * + * This class is internally reference counted and efficiently + * copyable. + * + * Raw pointers to ELF data returned by any method of this object or + * any object derived from this object point directly into loaded + * section data. Hence, callers must ensure that the loader passed to + * this file remains live as long as any such pointer is in use. + * Keeping any object that can return such a pointer live is + * sufficient to keep the loader live. + */ +class elf +{ +public: + /** + * Construct an ELF file that is backed by data read from the + * given loader. + */ + explicit elf(const std::shared_ptr &l); + + /** + * Construct an ELF file that is initially not valid. Calling + * methods other than operator= and valid on this results in + * undefined behavior. + */ + elf() = default; + elf(const elf &o) = default; + elf(elf &&o) = default; + + elf& operator=(const elf &o) = default; + + bool valid() const + { + return !!m; + } + + /** + * Return the ELF file header in canonical form (ELF64 in + * native byte order). + */ + const Ehdr<> &get_hdr() const; + + /** + * Return the loader used by this file. + */ + std::shared_ptr get_loader() const; + + /** + * Return the segments in this file. + */ + const std::vector &segments() const; + + /** + * Return the segment at the given index. If no such segment + * is found, return an invalid segment. + */ + const segment &get_segment(unsigned index) const; + + /** + * Return the sections in this file. + */ + const std::vector
§ions() const; + + /** + * Return the section with the specified name. If no such + * section is found, return an invalid section. + */ + const section &get_section(const std::string &name) const; + + /** + * Return the section at the given index. If no such section + * is found, return an invalid section. + */ + const section &get_section(unsigned index) const; + +private: + struct impl; + std::shared_ptr m; +}; + +/** + * An interface for loading sections of an ELF file. + */ +class loader +{ +public: + virtual ~loader() { } + + /** + * Load the requested file section into memory and return a + * pointer to the beginning of it. This memory must remain + * valid and unchanged until the loader is destroyed. If the + * loader cannot satisfy the full request for any reason + * (including a premature EOF), it must throw an exception. + */ + virtual const void *load(off_t offset, size_t size) = 0; +}; + +/** + * An mmap-based loader that maps requested sections on demand. This + * will close fd when done, so the caller should dup the file + * descriptor if it intends to continue using it. + */ +std::shared_ptr create_mmap_loader(int fd); + +/** + * An exception indicating that a section is not of the requested type. + */ +class section_type_mismatch : public std::logic_error +{ +public: + explicit section_type_mismatch(const std::string &what_arg) + : std::logic_error(what_arg) { } + explicit section_type_mismatch(const char *what_arg) + : std::logic_error(what_arg) { } +}; + +/** + * An ELF segment. + * + * This class is internally reference counted and efficiently + * copyable. + */ +class segment +{ +public: + /** + * Construct a segment that is initially not valid. Calling + * methods other than operator= and valid on this results in + * undefined behavior. + */ + segment() { } + + segment(const elf &f, const void *hdr); + segment(const segment &o) = default; + segment(segment &&o) = default; + + /** + * Return true if this segment is valid and corresponds to a + * segment in the ELF file. + */ + bool valid() const + { + return !!m; + } + + /** + * Return the ELF section header in canonical form (ELF64 in + * native byte order). + */ + const Phdr<> &get_hdr() const; + + /** + * Return this segment's data. The returned buffer will + * be file_size() bytes long. + */ + const void *data() const; + + /** + * Return the on disk size of this segment in bytes. + */ + size_t file_size() const; + + /** + * Return the in-memory size of this segment in bytes. + * Bytes between file_size() and mem_size() are implicitly zeroes. + */ + size_t mem_size() const; + +private: + struct impl; + std::shared_ptr m; +}; + +/** + * An ELF section. + * + * This class is internally reference counted and efficiently + * copyable. + */ +class section +{ +public: + /** + * Construct a section that is initially not valid. Calling + * methods other than operator= and valid on this results in + * undefined behavior. + */ + section() { } + + section(const elf &f, const void *hdr); + section(const section &o) = default; + section(section &&o) = default; + + /** + * Return true if this section is valid and corresponds to a + * section in the ELF file. + */ + bool valid() const + { + return !!m; + } + + /** + * Return the ELF section header in canonical form (ELF64 in + * native byte order). + */ + const Shdr<> &get_hdr() const; + + /** + * Return this section's name. + */ + const char *get_name(size_t *len_out) const; + /** + * Return this section's name. The returned string copies its + * data, so loader liveness requirements don't apply. + */ + std::string get_name() const; + + /** + * Return this section's data. If this is a NOBITS section, + * return nullptr. + */ + const void *data() const; + /** + * Return the size of this section in bytes. + */ + size_t size() const; + + /** + * Return this section as a strtab. Throws + * section_type_mismatch if this section is not a string + * table. + */ + strtab as_strtab() const; + + /** + * Return this section as a symtab. Throws + * section_type_mismatch if this section is not a symbol + * table. + */ + symtab as_symtab() const; + +private: + struct impl; + std::shared_ptr m; +}; + +/** + * A string table. + * + * This class is internally reference counted and efficiently + * copyable. + */ +class strtab +{ +public: + /** + * Construct a strtab that is initially not valid. Calling + * methods other than operator= and valid on this results in + * undefined behavior. + */ + strtab() = default; + strtab(elf f, const void *data, size_t size); + + bool valid() const + { + return !!m; + } + + /** + * Return the string at the given offset in this string table. + * If the offset is out of bounds, throws std::range_error. + * This is very efficient since the returned pointer points + * directly into the loaded section, though this still + * verifies that the returned string is NUL-terminated. + */ + const char *get(Elf64::Off offset, size_t *len_out) const; + /** + * Return the string at the given offset in this string table. + */ + std::string get(Elf64::Off offset) const; + +private: + struct impl; + std::shared_ptr m; +}; + +/** + * A symbol from a symbol table. + */ +class sym +{ + const strtab strs; + Sym<> data; + +public: + sym(elf f, const void *data, strtab strs); + + /** + * Return this symbol's raw data. + */ + const Sym<> &get_data() const + { + return data; + } + + /** + * Return this symbol's name. + * + * This returns a pointer into the string table and, as such, + * is very efficient. If len_out is non-nullptr, *len_out + * will be set the length of the returned string. + */ + const char *get_name(size_t *len_out) const; + + /** + * Return this symbol's name as a string. + */ + std::string get_name() const; +}; + +/** + * A symbol table. + * + * This class is internally reference counted and efficiently + * copyable. + */ +class symtab +{ +public: + /** + * Construct a symtab that is initially not valid. Calling + * methods other than operator= and valid on this results in + * undefined behavior. + */ + symtab() = default; + symtab(elf f, const void *data, size_t size, strtab strs); + + bool valid() const + { + return !!m; + } + + class iterator + { + const elf f; + const strtab strs; + const char *pos; + size_t stride; + + iterator(const symtab &tab, const char *pos); + friend class symtab; + + public: + sym operator*() const + { + return sym(f, pos, strs); + } + + iterator& operator++() + { + return *this += 1; + } + + iterator operator++(int) + { + iterator cur(*this); + *this += 1; + return cur; + } + + iterator& operator+=(std::ptrdiff_t x) + { + pos += x * stride; + return *this; + } + + iterator& operator-=(std::ptrdiff_t x) + { + pos -= x * stride; + return *this; + } + + bool operator==(iterator &o) const + { + return pos == o.pos; + } + + bool operator!=(iterator &o) const + { + return pos != o.pos; + } + }; + + /** + * Return an iterator to the first symbol. + */ + iterator begin() const; + + /** + * Return an iterator just past the last symbol. + */ + iterator end() const; + +private: + struct impl; + std::shared_ptr m; +}; + +ELFPP_END_NAMESPACE + +#endif diff --git a/3party/libelfin/elf/elf.cc b/3party/libelfin/elf/elf.cc new file mode 100644 index 0000000..61172ac --- /dev/null +++ b/3party/libelfin/elf/elf.cc @@ -0,0 +1,403 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "elf++.hh" + +#include + +using namespace std; + +ELFPP_BEGIN_NAMESPACE + +template class Hdr> +void canon_hdr(Hdr *out, const void *data, + elfclass ei_class, elfdata ei_data) +{ + switch (ei_class) { + case elfclass::_32: + switch (ei_data) { + case elfdata::lsb: + out->from(*(Hdr*)data); + break; + case elfdata::msb: + out->from(*(Hdr*)data); + break; + } + break; + case elfclass::_64: + switch (ei_data) { + case elfdata::lsb: + out->from(*(Hdr*)data); + break; + case elfdata::msb: + out->from(*(Hdr*)data); + return; + } + } +} + +////////////////////////////////////////////////////////////////// +// class elf +// + +struct elf::impl +{ + impl(const shared_ptr &l) + : l(l) { } + + const shared_ptr l; + Ehdr<> hdr; + vector
sections; + vector segments; + + section invalid_section; + segment invalid_segment; +}; + +elf::elf(const std::shared_ptr &l) + : m(make_shared(l)) +{ + // Read the first six bytes to check the magic number, ELF + // class, and byte order. + struct core_hdr + { + char ei_magic[4]; + elfclass ei_class; + elfdata ei_data; + unsigned char ei_version; + } *core_hdr = (struct core_hdr*)l->load(0, sizeof *core_hdr); + + // Check basic header + if (strncmp(core_hdr->ei_magic, "\x7f" "ELF", 4) != 0) + throw format_error("bad ELF magic number"); + if (core_hdr->ei_version != 1) + throw format_error("unknown ELF version"); + if (core_hdr->ei_class != elfclass::_32 && + core_hdr->ei_class != elfclass::_64) + throw format_error("bad ELF class"); + if (core_hdr->ei_data != elfdata::lsb && + core_hdr->ei_data != elfdata::msb) + throw format_error("bad ELF data order"); + + // Read in the real header and canonicalize it + size_t hdr_size = (core_hdr->ei_class == elfclass::_32 ? + sizeof(Ehdr) : sizeof(Ehdr)); + const void *hdr = l->load(0, hdr_size); + canon_hdr(&m->hdr, hdr, core_hdr->ei_class, core_hdr->ei_data); + + // More checks + if (m->hdr.version != 1) + throw format_error("bad section ELF version"); + if (m->hdr.shnum && m->hdr.shstrndx >= m->hdr.shnum) + throw format_error("bad section name string table index"); + + // Load segments + const void *seg_data = l->load(m->hdr.phoff, + m->hdr.phentsize * m->hdr.phnum); + for (unsigned i = 0; i < m->hdr.phnum; i++) { + const void *seg = ((const char*)seg_data) + i * m->hdr.phentsize; + m->segments.push_back(segment(*this, seg)); + } + + // Load sections + const void *sec_data = l->load(m->hdr.shoff, + m->hdr.shentsize * m->hdr.shnum); + for (unsigned i = 0; i < m->hdr.shnum; i++) { + const void *sec = ((const char*)sec_data) + i * m->hdr.shentsize; + // XXX Circular reference. Maybe this should be + // constructed on the fly? Canonicalizing the header + // isn't super-cheap. + m->sections.push_back(section(*this, sec)); + } +} + +const Ehdr<> & +elf::get_hdr() const +{ + return m->hdr; +} + +shared_ptr +elf::get_loader() const +{ + return m->l; +} + +const std::vector
& +elf::sections() const +{ + return m->sections; +} + +const std::vector & +elf::segments() const +{ + return m->segments; +} + +const section & +elf::get_section(const std::string &name) const +{ + for (auto &sec : sections()) + if (name == sec.get_name(nullptr)) + return sec; + return m->invalid_section; +} + +const section & +elf::get_section(unsigned index) const +{ + if (index >= sections().size()) + return m->invalid_section; + return sections().at(index); +} + +const segment& +elf::get_segment(unsigned index) const +{ + if (index >= segments().size()) + return m->invalid_segment; + return segments().at(index); +} + +////////////////////////////////////////////////////////////////// +// class segment +// + +struct segment::impl { + impl(const elf &f) + : f(f), data(nullptr) { } + + const elf f; + Phdr<> hdr; + const void *data; +}; + +segment::segment(const elf &f, const void *hdr) + : m(make_shared(f)) { + canon_hdr(&m->hdr, hdr, f.get_hdr().ei_class, f.get_hdr().ei_data); +} + +const Phdr<> & +segment::get_hdr() const { + return m->hdr; +} + +const void * +segment::data() const { + if (!m->data) + m->data = m->f.get_loader()->load(m->hdr.offset, + m->hdr.filesz); + return m->data; +} + +size_t +segment::file_size() const { + return m->hdr.filesz; +} + +size_t +segment::mem_size() const { + return m->hdr.memsz; +} + +////////////////////////////////////////////////////////////////// +// class section +// + +std::string +enums::to_string(shn v) +{ + if (v == shn::undef) + return "undef"; + if (v == shn::abs) + return "abs"; + if (v == shn::common) + return "common"; + return std::to_string(v); +} + +struct section::impl +{ + impl(const elf &f) + : f(f), name(nullptr), data(nullptr) { } + + const elf f; + Shdr<> hdr; + const char *name; + size_t name_len; + const void *data; +}; + +section::section(const elf &f, const void *hdr) + : m(make_shared(f)) +{ + canon_hdr(&m->hdr, hdr, f.get_hdr().ei_class, f.get_hdr().ei_data); +} + +const Shdr<> & +section::get_hdr() const +{ + return m->hdr; +} + +const char * +section::get_name(size_t *len_out) const +{ + // XXX Should the section name strtab be cached? + if (!m->name) + m->name = m->f.get_section(m->f.get_hdr().shstrndx) + .as_strtab().get(m->hdr.name, &m->name_len); + if (len_out) + *len_out = m->name_len; + return m->name; +} + +string +section::get_name() const +{ + return get_name(nullptr); +} + +const void * +section::data() const +{ + if (m->hdr.type == sht::nobits) + return nullptr; + if (!m->data) + m->data = m->f.get_loader()->load(m->hdr.offset, m->hdr.size); + return m->data; +} + +size_t +section::size() const +{ + return m->hdr.size; +} + +strtab +section::as_strtab() const +{ + if (m->hdr.type != sht::strtab) + throw section_type_mismatch("cannot use section as strtab"); + return strtab(m->f, data(), size()); +} + +symtab +section::as_symtab() const +{ + if (m->hdr.type != sht::symtab && m->hdr.type != sht::dynsym) + throw section_type_mismatch("cannot use section as symtab"); + return symtab(m->f, data(), size(), + m->f.get_section(get_hdr().link).as_strtab()); +} + +////////////////////////////////////////////////////////////////// +// class strtab +// + +struct strtab::impl +{ + impl(const elf &f, const char *data, const char *end) + : f(f), data(data), end(end) { } + + const elf f; + const char *data, *end; +}; + +strtab::strtab(elf f, const void *data, size_t size) + : m(make_shared(f, (const char*)data, (const char *)data + size)) +{ +} + +const char * +strtab::get(Elf64::Off offset, size_t *len_out) const +{ + const char *start = m->data + offset; + + if (start >= m->end) + throw range_error("string offset " + std::to_string(offset) + " exceeds section size"); + + // Find the null terminator + const char *p = start; + while (p < m->end && *p) + p++; + if (p == m->end) + throw format_error("unterminated string"); + + if (len_out) + *len_out = p - start; + return start; +} + +std::string +strtab::get(Elf64::Off offset) const +{ + return get(offset, nullptr); +} + +////////////////////////////////////////////////////////////////// +// class sym +// + +sym::sym(elf f, const void *data, strtab strs) + : strs(strs) +{ + canon_hdr(&this->data, data, f.get_hdr().ei_class, f.get_hdr().ei_data); +} + +const char * +sym::get_name(size_t *len_out) const +{ + return strs.get(get_data().name, len_out); +} + +std::string +sym::get_name() const +{ + return strs.get(get_data().name); +} + +////////////////////////////////////////////////////////////////// +// class symtab +// + +struct symtab::impl +{ + impl(const elf &f, const char *data, const char *end, strtab strs) + : f(f), data(data), end(end), strs(strs) { } + + const elf f; + const char *data, *end; + const strtab strs; +}; + +symtab::symtab(elf f, const void *data, size_t size, strtab strs) + : m(make_shared(f, (const char*)data, (const char *)data + size, + strs)) +{ +} + +symtab::iterator::iterator(const symtab &tab, const char *pos) + : f(tab.m->f), strs(tab.m->strs), pos(pos) +{ + if (f.get_hdr().ei_class == elfclass::_32) + stride = sizeof(Sym); + else + stride = sizeof(Sym); +} + +symtab::iterator +symtab::begin() const +{ + return iterator(*this, m->data); +} + +symtab::iterator +symtab::end() const +{ + return iterator(*this, m->end); +} + +ELFPP_END_NAMESPACE diff --git a/3party/libelfin/elf/enum-print.py b/3party/libelfin/elf/enum-print.py new file mode 100644 index 0000000..e7e9f78 --- /dev/null +++ b/3party/libelfin/elf/enum-print.py @@ -0,0 +1,163 @@ +# Copyright (c) 2013 Austin T. Clements. All rights reserved. +# Use of this source code is governed by an MIT license +# that can be found in the LICENSE file. + +import sys, re +from optparse import OptionParser + +def read_toks(): + data = sys.stdin.read() + while data: + data = data.lstrip() + if data.startswith("//") or data.startswith("#"): + data = data.split("\n",1)[1] + elif data.startswith("/*"): + data = data.split("*/",1)[1] + elif data.startswith("\"") or data.startswith("'"): + c = data[0] + m = re.match(r'%s([^\\%s]|\\.)*%s' % (c,c,c), data) + yield m.group(0) + data = data[m.end():] + else: + m = re.match(r"[_a-zA-Z0-9]+|[{}();]|[^_a-zA-Z0-9 \n\t\f]+", data) + yield m.group(0) + data = data[m.end():] + +enums = {} + +def do_top_level(toks, ns=[]): + while toks: + tok = toks.pop(0) + if tok == "enum" and toks[0] == "class": + toks.pop(0) + name = toks.pop(0) + # Get to the first token in the body + while toks.pop(0) != "{": + pass + # Consume body and close brace + do_enum_body("::".join(ns + [name]), toks) + elif tok == "class": + name = do_qname(toks) + # Find the class body, if there is one + while toks[0] != "{" and toks[0] != ";": + toks.pop(0) + # Enter the class's namespace + if toks[0] == "{": + toks.pop(0) + do_top_level(toks, ns + [name]) + elif tok == "{": + # Enter an unknown namespace + do_top_level(toks, ns + [None]) + elif tok == "}": + # Exit the namespace + assert len(ns) + return + elif not ns and tok == "string" and toks[:2] == ["to_string", "("]: + # Get the argument type and name + toks.pop(0) + toks.pop(0) + typ = do_qname(toks) + if typ not in enums: + continue + arg = toks.pop(0) + assert toks[0] == ")" + + if typ in options.mask: + make_to_string_mask(typ, arg) + else: + make_to_string(typ, arg) + +def fmt_value(typ, key): + if options.no_type: + val = key + else: + val = "%s%s%s" % (typ, options.separator, key) + if options.strip_underscore: + val = val.strip("_") + return val + +def expr_remainder(typ, arg): + if options.hex: + return "\"(%s)0x\" + to_hex((int)%s)" % (typ, arg) + else: + return "\"(%s)\" + std::to_string((int)%s)" % (typ, arg) + +def make_to_string(typ, arg): + print("std::string") + print("to_string(%s %s)" % (typ, arg)) + print("{") + print(" switch (%s) {" % arg) + for key in enums[typ]: + if key in options.exclude: + print(" case %s::%s: break;" % (typ, key)) + continue + print(" case %s::%s: return \"%s\";" % \ + (typ, key, fmt_value(typ, key))) + print(" }") + print(" return %s;" % expr_remainder(typ, arg)) + print("}") + print() + +def make_to_string_mask(typ, arg): + print("std::string") + print("to_string(%s %s)" % (typ, arg)) + print("{") + print(" std::string res;") + for key in enums[typ]: + if key in options.exclude: + continue + print(" if ((%s & %s::%s) == %s::%s) { res += \"%s|\"; %s &= ~%s::%s; }" % \ + (arg, typ, key, typ, key, fmt_value(typ, key), arg, typ, key)) + print(" if (res.empty() || %s != (%s)0) res += %s;" % \ + (arg, typ, expr_remainder(typ, arg))) + print(" else res.pop_back();") + print(" return res;") + print("}") + print() + +def do_enum_body(name, toks): + keys = [] + while True: + key = toks.pop(0) + if key == "}": + assert toks.pop(0) == ";" + enums[name] = keys + return + keys.append(key) + if toks[0] == "=": + toks.pop(0) + toks.pop(0) + if toks[0] == ",": + toks.pop(0) + else: + assert toks[0] == "}" + +def do_qname(toks): + # Get a nested-name-specifier followed by an identifier + res = [] + while True: + res.append(toks.pop(0)) + if toks[0] != "::": + return "::".join(res) + toks.pop(0) + +parser = OptionParser() +parser.add_option("-x", "--exclude", dest="exclude", action="append", + help="exclude FIELD", metavar="FIELD", default=[]) +parser.add_option("-u", "--strip-underscore", dest="strip_underscore", + action="store_true", + help="strip leading and trailing underscores") +parser.add_option("-s", "--separator", dest="separator", + help="use SEP between type and field", metavar="SEP", + default="::") +parser.add_option("--hex", dest="hex", action="store_true", + help="return unknown values in hex", default=False) +parser.add_option("--no-type", dest="no_type", action="store_true", + help="omit type") +parser.add_option("--mask", dest="mask", action="append", + help="treat TYPE as a bit-mask", metavar="TYPE", default=[]) +(options, args) = parser.parse_args() +if args: + parser.error("expected 0 arguments") + +do_top_level(list(read_toks())) diff --git a/3party/libelfin/elf/mmap_loader.cc b/3party/libelfin/elf/mmap_loader.cc new file mode 100644 index 0000000..875d7bd --- /dev/null +++ b/3party/libelfin/elf/mmap_loader.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#include "elf++.hh" + +#include + +#include +#include +#include +#include +#include +#include + +using namespace std; + +ELFPP_BEGIN_NAMESPACE + +class mmap_loader : public loader +{ + void *base; + size_t lim; + +public: + mmap_loader(int fd) + { + off_t end = lseek(fd, 0, SEEK_END); + if (end == (off_t)-1) + throw system_error(errno, system_category(), + "finding file length"); + lim = end; + + base = mmap(nullptr, lim, PROT_READ, MAP_SHARED, fd, 0); + if (base == MAP_FAILED) + throw system_error(errno, system_category(), + "mmap'ing file"); + close(fd); + } + + ~mmap_loader() + { + munmap(base, lim); + } + + const void *load(off_t offset, size_t size) + { + if (offset + size > lim) + throw range_error("offset exceeds file size"); + return (const char*)base + offset; + } +}; + +std::shared_ptr +create_mmap_loader(int fd) +{ + return make_shared(fd); +} + +ELFPP_END_NAMESPACE diff --git a/3party/libelfin/elf/to_hex.hh b/3party/libelfin/elf/to_hex.hh new file mode 100644 index 0000000..f8b9442 --- /dev/null +++ b/3party/libelfin/elf/to_hex.hh @@ -0,0 +1,34 @@ +// Copyright (c) 2013 Austin T. Clements. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +#ifndef _ELFPP_TO_HEX_HH_ +#define _ELFPP_TO_HEX_HH_ + +#include +#include + +template +std::string +to_hex(T v) +{ + static_assert(std::is_integral::value, + "to_hex applied to non-integral type"); + if (v == 0) + return std::string("0"); + char buf[sizeof(T)*2 + 1]; + char *pos = &buf[sizeof(buf)-1]; + *pos-- = '\0'; + while (v && pos >= buf) { + int digit = v & 0xf; + if (digit < 10) + *pos = '0' + digit; + else + *pos = 'a' + (digit - 10); + pos--; + v >>= 4; + } + return std::string(pos + 1); +} + +#endif // _ELFPP_TO_HEX_HH_ diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f06e63..520bbb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,8 @@ project( set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_EXTENSIONS OFF) +# set(CMAKE_C_FLAGS "-gdwarf-3 -gstrict-dwarf") set(CMAKE_CXX_FLAGS "-gdwarf-3 +# -gstrict-dwarf") option(SLED_BUILD_BENCHMARK "Build benchmark" OFF) option(SLED_BUILD_TESTS "Build tests" OFF) @@ -24,6 +26,7 @@ target_include_directories(benchmark_main PUBLIC src/) add_library(sled STATIC "") +add_subdirectory(3party/libelfin EXCLUDE_FROM_ALL) add_subdirectory(3party/minilua EXCLUDE_FROM_ALL) # add_subdirectory(3party/gperftools EXCLUDE_FROM_ALL) add_subdirectory(3party/asyncplusplus EXCLUDE_FROM_ALL) @@ -50,8 +53,7 @@ target_include_directories(sled PUBLIC src/ 3party/eigen 3party/inja 3party/rxcpp) target_sources( sled - PRIVATE - src/sled/async/async.cc + PRIVATE src/sled/async/async.cc src/sled/debugging/demangle.cc src/sled/debugging/symbolize.cc src/sled/event_bus/event_bus.cc @@ -88,20 +90,12 @@ target_sources( src/sled/time_utils.cc src/sled/units/time_delta.cc src/sled/units/timestamp.cc - src/sled/uri.cc -) -# set(BUILD_RTTR_DYNAMIC OFF) set(BUILD_UNIT_TESTS OFF) -# set(BUILD_WITH_STATIC_RUNTIME_LIBS ON) set(BUILD_WITH_DOCUMENTATION OFF) -# add_subdirectory(3party/rttr EXCLUDE_FROM_ALL) include(CheckCCompilerFlag) -# check_c_compiler_flag("-Wl,--whole-archive" SUPPORT_COMPILE_WHOLE_ARCHIVE) -# if(SUPPORT_COMPILE_WHOLE_ARCHIVE) set(WHOLE_ARCHIVE_WRAPPER_START -# "-Wl,--whole-archive") set(WHOLE_ARCHIVE_WRAPPER_END "-Wl,--no-whole-archive") -# endif() + src/sled/uri.cc) target_link_libraries( sled PUBLIC rpc_core fmt marl Async++ minilua - PRIVATE dl + PRIVATE dl elf # protobuf::libprotobuf ${WHOLE_ARCHIVE_WRAPPER_START} # tcmalloc_and_profiler_static ${WHOLE_ARCHIVE_WRAPPER_END} ) diff --git a/src/sled/debugging/symbolize.cc b/src/sled/debugging/symbolize.cc index 6d70f7c..7f58fce 100644 --- a/src/sled/debugging/symbolize.cc +++ b/src/sled/debugging/symbolize.cc @@ -2,6 +2,7 @@ #include "sled/lang/attributes.h" #ifdef _WIN32 +namespace sled { void InitializeSymbolizer(const char *argv0) {} @@ -11,6 +12,7 @@ Symbolize(const void *pc, char *out, int out_size) { return false; } +}// namespace sled #elif defined(__APPLE__) #include "symbolize_darwin.inc" #elif defined(__linux__) diff --git a/src/sled/debugging/symbolize_elf.inc b/src/sled/debugging/symbolize_elf.inc index ab51283..c917a18 100644 --- a/src/sled/debugging/symbolize_elf.inc +++ b/src/sled/debugging/symbolize_elf.inc @@ -1,140 +1,61 @@ -#include -#include -#include -#include -#include +#include "sled/log/log.h" +#include +#include #include -#include -#include -#include -#include -#include #include -#include -#include #include namespace sled { -static constexpr char kTag[] = "symbolizer"; -static char *g_exe = nullptr; - -static const uint64_t -GetHex(const char *start, const char *end) -{ - uint64_t hex = 0; - for (const char *p = start; p < end; ++p) { - int ch = *p; - if (ch >= '0' && ch <= '9') { - hex = hex * 16 + (ch - '0'); - } else if (ch >= 'a' && ch <= 'f') { - hex = hex * 16 + (ch - 'a' + 10); - } else if (ch >= 'A' && ch <= 'F') { - hex = hex * 16 + (ch - 'A' + 10); - } else { - break; - } - } - - return hex; -} - -static const uint64_t -GetHex(const std::string &str) -{ - return GetHex(str.c_str(), str.c_str() + str.size()); -} - -struct ObjFile { - ObjFile() : filename(nullptr), start_addr(nullptr), end_addr(nullptr), offset(0), fd(-1), elf_type(-1) - { - memset(&elf_header, 0, sizeof(elf_header)); - memset(phdrs.data(), 0, sizeof(ElfW(Phdr)) * phdrs.size()); - } - - char *filename; - const void *start_addr; - const void *end_addr; - uint64_t offset; - - int fd; - int elf_type; - ElfW(Ehdr) elf_header; - - std::array phdrs; -}; - -std::string -ReadFullFile(int fd) -{ - std::stringstream ss; - char buf[4096]; - ssize_t n; - while ((n = read(fd, buf, sizeof(buf))) > 0) { ss.write(buf, n); } - return ss.str(); -} - -bool -ReadAddrMap() -{ - char maps_path[80]; - snprintf(maps_path, sizeof(maps_path), "/proc/self/task/%d/maps", getpid()); - int maps_fd; - do { - maps_fd = open(maps_path, O_RDONLY); - } while (maps_fd < 0 && errno == EINTR); - - if (maps_fd < 0) { - LOGE(kTag, "open {} failed, {}", maps_path, strerror(errno)); - return false; - } - - std::string full_file = ReadFullFile(maps_fd); - auto lines = sled::StrSplit(full_file, "\n"); - - // maps start_addr-end_addr permission offset dev inode pathname - for (const auto &line : lines) { - auto fields = sled::StrSplit(line, " \n\t", true); - if (fields.size() < 6) { continue; } - auto addrs = sled::StrSplit(fields[0], "-", false); - - uint64_t start_addr = GetHex(addrs[0]); - uint64_t end_addr = GetHex(addrs[1]); - uint64_t offset = GetHex(fields[2]); - LOGD(kTag, "addr: {}-{} {} {} {}", start_addr, end_addr, offset, fields[1], fields[5]); - } - - return true; -} - -class Symbolizer {}; +static elf::elf *g_elf = nullptr; +static dwarf::dwarf *g_dwarf = nullptr; void InitializeSymbolizer(const char *argv0) { - if (g_exe) { - free(g_exe); - g_exe = nullptr; - } + if (g_elf) { return; } + int self_fd = open(argv0, O_RDONLY); + if (self_fd < 0) { return; } + g_elf = new elf::elf(elf::create_mmap_loader(self_fd)); + g_dwarf = new dwarf::dwarf(dwarf::elf::create_loader(*g_elf)); +} - if (argv0 && argv0[0] != '\0') { g_exe = strdup(argv0); } +static bool +FindPC(dwarf::taddr pc) +{ + return false; } bool Symbolize(const void *pc, char *out, int out_size) { - Dl_info info; - if (!dladdr(pc, &info)) { return false; } - if (info.dli_fname && out_size > 0) { - strncpy(out, info.dli_fname, out_size); + if (!g_elf || !g_dwarf) { return false; } + if (out_size <= 0) { return false; } + out[0] = '\0'; - const auto end_pos = static_cast(out_size) - 1; - if (out[end_pos] != '\0') { - // add ... - out[end_pos] = '\0'; - } + union { + const void *void_pc; + dwarf::taddr pc_addr; + } u; + + u.void_pc = pc; + bool found = false; + for (auto &cu : g_dwarf->compilation_units()) { + try { + if (dwarf::die_pc_range(cu.root()).contains(u.pc_addr)) { + auto < = cu.get_line_table(); + auto iter = lt.find_address(u.pc_addr); + if (iter == lt.end()) { + return false; + } else { + LOGD("found line {}", iter->get_description()); + } + found = true; + break; + } + } catch (...) {} } - return true; + return found; } }// namespace sled diff --git a/src/sled/debugging/symbolize_test.cc b/src/sled/debugging/symbolize_test.cc index 7040831..ef4d84b 100644 --- a/src/sled/debugging/symbolize_test.cc +++ b/src/sled/debugging/symbolize_test.cc @@ -64,7 +64,7 @@ void_cast(TRet (TClass::*mem_func)(Args...)) } // TODO: Support Linux -#if defined(__APPLE__) +// #if defined(__APPLE__) TEST_SUITE("Symbolize") { TEST_CASE("Trivial Function") @@ -87,7 +87,8 @@ TEST_SUITE("Symbolize") CHECK_EQ(doctest::String("Class::StaticFunc()"), TrySymbolize(void_cast(&Class::StaticFunc))); } } -#endif + +// #endif int main(int argc, char *argv[]) diff --git a/src/sled/experimental/design_patterns/dispatcher.h b/src/sled/experimental/design_patterns/dispatcher.h new file mode 100644 index 0000000..53871df --- /dev/null +++ b/src/sled/experimental/design_patterns/dispatcher.h @@ -0,0 +1,59 @@ +#ifndef SLED_EXP_DESIGN_PATTERNS_DISPATCHER_H +#define SLED_EXP_DESIGN_PATTERNS_DISPATCHER_H + +#include "sled/synchronization/mutex.h" +#include +#include + +namespace sled { +namespace experimental { + +template +class Dispatcher { +public: + enum class DispatchResult { kHandled, kNotFound }; + + struct Handler { + virtual ~Handler() = default; + virtual bool HandleMessage(const T &) = 0; + virtual bool OnMessage(T &&) = 0; + }; + + virtual ~Dispatcher() = default; + + void AddHandler(std::shared_ptr handler) + { + if (!handler) { return; } + // sled::MutexLock lock(&mutex_); + sled::SharedMutexWriteLock lock(&rwlock_); + handlers_.insert(handler); + } + + void RemoveHandler(std::shared_ptr handler) + { + if (!handler) { return; } + // sled::MutexLock lock(&mutex_); + sled::SharedMutexWriteLock lock(&rwlock_); + handlers_.erase(handler); + } + + DispatchResult Dispatch(const T &message) + { + // sled::MutexLock lock(&mutex_); + sled::SharedMutexReadLock lock(&rwlock_); + for (auto &handler : handlers_) { + if (handler->HandleMessage(message)) { return DispatchResult::kHandled; } + } + return DispatchResult::kNotFound; + } + +private: + std::set> handlers_; + sled::SharedMutex rwlock_; + // sled::Mutex mutex_; +}; + +}// namespace experimental + +}// namespace sled +#endif// SLED_EXP_DESIGN_PATTERNS_DISPATCHER_H diff --git a/src/sled/uri_fuzz.cc b/src/sled/uri_fuzz.cc index 7821d08..6952c6e 100644 --- a/src/sled/uri_fuzz.cc +++ b/src/sled/uri_fuzz.cc @@ -4,6 +4,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size > 15) { return 1; } auto uri_or = sled::URI::ParseURI(std::string(reinterpret_cast(data), size)); if (!uri_or.ok()) { return 0; } auto uri = std::move(uri_or.value());