feat add dispatcher

This commit is contained in:
tqcq
2024-04-07 02:42:12 +00:00
parent 7807eecc08
commit cc7d131d95
32 changed files with 7240 additions and 133 deletions

7
3party/libelfin/dwarf/.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
*.o
to_string.cc
libdwarf++.a
libdwarf++.so
libdwarf++.so.*
libdwarf++.pc
/doc/

View File

@@ -0,0 +1,176 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "internal.hh"
using namespace std;
DWARFPP_BEGIN_NAMESPACE
static value::type
resolve_type(DW_AT name, DW_FORM form)
{
switch (form) {
case DW_FORM::addr:
return value::type::address;
case DW_FORM::block:
case DW_FORM::block1:
case DW_FORM::block2:
case DW_FORM::block4:
// Prior to DWARF 4, exprlocs didn't have their own
// form and were represented as blocks.
// XXX Should this be predicated on version?
switch (name) {
case DW_AT::location:
case DW_AT::byte_size:
case DW_AT::bit_offset:
case DW_AT::bit_size:
case DW_AT::string_length:
case DW_AT::lower_bound:
case DW_AT::return_addr:
case DW_AT::bit_stride:
case DW_AT::upper_bound:
case DW_AT::count:
case DW_AT::data_member_location:
case DW_AT::frame_base:
case DW_AT::segment:
case DW_AT::static_link:
case DW_AT::use_location:
case DW_AT::vtable_elem_location:
case DW_AT::allocated:
case DW_AT::associated:
case DW_AT::data_location:
case DW_AT::byte_stride:
return value::type::exprloc;
default:
return value::type::block;
}
case DW_FORM::data4:
case DW_FORM::data8:
// Prior to DWARF 4, section offsets didn't have their
// own form and were represented as data4 or data8.
// DWARF 3 clarified that types that accepted both
// constants and section offsets were to treat data4
// and data8 as section offsets and other constant
// forms as constants.
// XXX Should this be predicated on version?
switch (name) {
case DW_AT::location:
case DW_AT::stmt_list:
case DW_AT::string_length:
case DW_AT::return_addr:
case DW_AT::start_scope:
case DW_AT::data_member_location:
case DW_AT::frame_base:
case DW_AT::macro_info:
case DW_AT::segment:
case DW_AT::static_link:
case DW_AT::use_location:
case DW_AT::vtable_elem_location:
case DW_AT::ranges:
goto sec_offset;
default:
// Fall through
break;
}
case DW_FORM::data1:
case DW_FORM::data2:
return value::type::constant;
case DW_FORM::udata:
return value::type::uconstant;
case DW_FORM::sdata:
return value::type::sconstant;
case DW_FORM::exprloc:
return value::type::exprloc;
case DW_FORM::flag:
case DW_FORM::flag_present:
return value::type::flag;
case DW_FORM::ref1:
case DW_FORM::ref2:
case DW_FORM::ref4:
case DW_FORM::ref8:
case DW_FORM::ref_addr:
case DW_FORM::ref_sig8:
case DW_FORM::ref_udata:
return value::type::reference;
case DW_FORM::string:
case DW_FORM::strp:
return value::type::string;
case DW_FORM::indirect:
// There's nothing meaningful we can do
return value::type::invalid;
case DW_FORM::sec_offset:
sec_offset:
// The type of this form depends on the attribute
switch (name) {
case DW_AT::stmt_list:
return value::type::line;
case DW_AT::location:
case DW_AT::string_length:
case DW_AT::return_addr:
case DW_AT::data_member_location:
case DW_AT::frame_base:
case DW_AT::segment:
case DW_AT::static_link:
case DW_AT::use_location:
case DW_AT::vtable_elem_location:
return value::type::loclist;
case DW_AT::macro_info:
return value::type::mac;
case DW_AT::start_scope:
case DW_AT::ranges:
return value::type::rangelist;
case DW_AT::lo_user...DW_AT::hi_user:
//HACK: ignore vendor extensions
return value::type::invalid;
default:
throw format_error("DW_FORM_sec_offset not expected for attribute " +
to_string(name));
}
}
throw format_error("unknown attribute form " + to_string(form));
}
attribute_spec::attribute_spec(DW_AT name, DW_FORM form)
: name(name), form(form), type(resolve_type(name, form))
{
}
bool
abbrev_entry::read(cursor *cur)
{
attributes.clear();
// Section 7.5.3
code = cur->uleb128();
if (!code)
return false;
tag = (DW_TAG)cur->uleb128();
children = cur->fixed<DW_CHILDREN>() == DW_CHILDREN::yes;
while (1) {
DW_AT name = (DW_AT)cur->uleb128();
DW_FORM form = (DW_FORM)cur->uleb128();
if (name == (DW_AT)0 && form == (DW_FORM)0)
break;
attributes.push_back(attribute_spec(name, form));
}
attributes.shrink_to_fit();
return true;
}
DWARFPP_END_NAMESPACE

View File

@@ -0,0 +1,267 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "dwarf++.hh"
using namespace std;
DWARFPP_BEGIN_NAMESPACE
#define AT_ANY(name) \
value at_##name(const die &d) \
{ \
return d[DW_AT::name]; \
} \
static_assert(true, "")
#define AT_ADDRESS(name) \
taddr at_##name(const die &d) \
{ \
return d[DW_AT::name].as_address(); \
} \
static_assert(true, "")
#define AT_ENUM(name, type) \
type at_##name(const die &d) \
{ \
return (type)d[DW_AT::name].as_uconstant(); \
} \
static_assert(true, "")
#define AT_FLAG(name) \
bool at_##name(const die &d) \
{ \
return d[DW_AT::name].as_flag(); \
} \
static_assert(true, "")
#define AT_FLAG_(name) \
bool at_##name(const die &d) \
{ \
return d[DW_AT::name##_].as_flag(); \
} \
static_assert(true, "")
#define AT_REFERENCE(name) \
die at_##name(const die &d) \
{ \
return d[DW_AT::name].as_reference(); \
} \
static_assert(true, "")
#define AT_STRING(name) \
string at_##name(const die &d) \
{ \
return d[DW_AT::name].as_string(); \
} \
static_assert(true, "")
#define AT_UDYNAMIC(name) \
uint64_t at_##name(const die &d, expr_context *ctx) \
{ \
return _at_udynamic(DW_AT::name, d, ctx); \
} \
static_assert(true, "")
static uint64_t _at_udynamic(DW_AT attr, const die &d, expr_context *ctx, int depth = 0)
{
// DWARF4 section 2.19
if (depth > 16)
throw format_error("reference depth exceeded for " + to_string(attr));
value v(d[attr]);
switch (v.get_type()) {
case value::type::constant:
case value::type::uconstant:
return v.as_uconstant();
case value::type::reference:
return _at_udynamic(attr, v.as_reference(), ctx, depth + 1);
case value::type::exprloc:
return v.as_exprloc().evaluate(ctx).value;
default:
throw format_error(to_string(attr) + " has unexpected type " +
to_string(v.get_type()));
}
}
//////////////////////////////////////////////////////////////////
// 0x0X
//
AT_REFERENCE(sibling);
// XXX location
AT_STRING(name);
AT_ENUM(ordering, DW_ORD);
AT_UDYNAMIC(byte_size);
AT_UDYNAMIC(bit_offset);
AT_UDYNAMIC(bit_size);
//////////////////////////////////////////////////////////////////
// 0x1X
//
// XXX stmt_list
AT_ADDRESS(low_pc);
taddr
at_high_pc(const die &d)
{
value v(d[DW_AT::high_pc]);
switch (v.get_type()) {
case value::type::address:
return v.as_address();
case value::type::constant:
case value::type::uconstant:
return at_low_pc(d) + v.as_uconstant();
default:
throw format_error(to_string(DW_AT::high_pc) + " has unexpected type " +
to_string(v.get_type()));
}
}
AT_ENUM(language, DW_LANG);
AT_REFERENCE(discr);
AT_ANY(discr_value); // XXX Signed or unsigned
AT_ENUM(visibility, DW_VIS);
AT_REFERENCE(import);
// XXX string_length
AT_REFERENCE(common_reference);
AT_STRING(comp_dir);
AT_ANY(const_value);
AT_REFERENCE(containing_type);
// XXX default_value
//////////////////////////////////////////////////////////////////
// 0x2X
//
DW_INL at_inline(const die &d)
{
// XXX Missing attribute is equivalent to DW_INL_not_inlined
// (DWARF4 section 3.3.8)
return (DW_INL)d[DW_AT::inline_].as_uconstant();
}
AT_FLAG(is_optional);
AT_UDYNAMIC(lower_bound); // XXX Language-based default?
AT_STRING(producer);
AT_FLAG(prototyped);
// XXX return_addr
// XXX start_scope
AT_UDYNAMIC(bit_stride);
AT_UDYNAMIC(upper_bound);
//////////////////////////////////////////////////////////////////
// 0x3X
//
AT_REFERENCE(abstract_origin);
AT_ENUM(accessibility, DW_ACCESS);
// XXX const address_class
AT_FLAG(artificial);
// XXX base_types
AT_ENUM(calling_convention, DW_CC);
AT_UDYNAMIC(count);
expr_result
at_data_member_location(const die &d, expr_context *ctx, taddr base, taddr pc)
{
value v(d[DW_AT::data_member_location]);
switch (v.get_type()) {
case value::type::constant:
case value::type::uconstant:
return {expr_result::type::address, base + v.as_uconstant()};
case value::type::exprloc:
return v.as_exprloc().evaluate(ctx, base);
case value::type::loclist:
// XXX
throw std::runtime_error("not implemented");
default:
throw format_error("DW_AT_data_member_location has unexpected type " +
to_string(v.get_type()));
}
}
// XXX decl_column decl_file decl_line
AT_FLAG(declaration);
// XXX discr_list
AT_ENUM(encoding, DW_ATE);
AT_FLAG(external);
//////////////////////////////////////////////////////////////////
// 0x4X
//
// XXX frame_base
die at_friend(const die &d)
{
return d[DW_AT::friend_].as_reference();
}
AT_ENUM(identifier_case, DW_ID);
// XXX macro_info
AT_REFERENCE(namelist_item);
AT_REFERENCE(priority); // XXX Computed might be useful
// XXX segment
AT_REFERENCE(specification);
// XXX static_link
AT_REFERENCE(type);
// XXX use_location
AT_FLAG(variable_parameter);
// XXX 7.11 The value DW_VIRTUALITY_none is equivalent to the absence
// of the DW_AT_virtuality attribute.
AT_ENUM(virtuality, DW_VIRTUALITY);
// XXX vtable_elem_location
AT_UDYNAMIC(allocated);
AT_UDYNAMIC(associated);
//////////////////////////////////////////////////////////////////
// 0x5X
//
// XXX data_location
AT_UDYNAMIC(byte_stride);
AT_ADDRESS(entry_pc);
AT_FLAG(use_UTF8);
AT_REFERENCE(extension);
rangelist
at_ranges(const die &d)
{
return d[DW_AT::ranges].as_rangelist();
}
// XXX trampoline
// XXX const call_column, call_file, call_line
AT_STRING(description);
// XXX const binary_scale
// XXX const decimal_scale
AT_REFERENCE(small);
// XXX const decimal_sign
// XXX const digit_count
//////////////////////////////////////////////////////////////////
// 0x6X
//
AT_STRING(picture_string);
AT_FLAG_(mutable);
AT_FLAG(threads_scaled);
AT_FLAG_(explicit);
AT_REFERENCE(object_pointer);
AT_ENUM(endianity, DW_END);
AT_FLAG(elemental);
AT_FLAG(pure);
AT_FLAG(recursive);
AT_REFERENCE(signature); // XXX Computed might be useful
AT_FLAG(main_subprogram);
// XXX const data_bit_offset
AT_FLAG(const_expr);
AT_FLAG(enum_class);
AT_STRING(linkage_name);
rangelist
die_pc_range(const die &d)
{
// DWARF4 section 2.17
if (d.has(DW_AT::ranges))
return at_ranges(d);
taddr low = at_low_pc(d);
taddr high = d.has(DW_AT::high_pc) ? at_high_pc(d) : (low + 1);
return rangelist({{low, high}});
}
DWARFPP_END_NAMESPACE

View File

@@ -0,0 +1,207 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "internal.hh"
#include <stdexcept>
#include <cstring>
using namespace std;
DWARFPP_BEGIN_NAMESPACE
int64_t
cursor::sleb128()
{
// Appendix C
uint64_t result = 0;
unsigned shift = 0;
while (pos < sec->end) {
uint8_t byte = *(uint8_t*)(pos++);
result |= (uint64_t)(byte & 0x7f) << shift;
shift += 7;
if ((byte & 0x80) == 0) {
if (shift < sizeof(result)*8 && (byte & 0x40))
result |= -((uint64_t)1 << shift);
return result;
}
}
underflow();
return 0;
}
shared_ptr<section>
cursor::subsection()
{
// Section 7.4
const char *begin = pos;
section_length length = fixed<uword>();
format fmt;
if (length < 0xfffffff0) {
fmt = format::dwarf32;
length += sizeof(uword);
} else if (length == 0xffffffff) {
length = fixed<uint64_t>();
fmt = format::dwarf64;
length += sizeof(uword) + sizeof(uint64_t);
} else {
throw format_error("initial length has reserved value");
}
pos = begin + length;
return make_shared<section>(sec->type, begin, length, sec->ord, fmt);
}
void
cursor::skip_initial_length()
{
switch (sec->fmt) {
case format::dwarf32:
pos += sizeof(uword);
break;
case format::dwarf64:
pos += sizeof(uword) + sizeof(uint64_t);
break;
default:
throw logic_error("cannot skip initial length with unknown format");
}
}
void
cursor::skip_unit_type()
{
pos += sizeof(sbyte);
}
section_offset
cursor::offset()
{
switch (sec->fmt) {
case format::dwarf32:
return fixed<uint32_t>();
case format::dwarf64:
return fixed<uint64_t>();
default:
throw logic_error("cannot read offset with unknown format");
}
}
void
cursor::string(std::string &out)
{
size_t size;
const char *p = this->cstr(&size);
out.resize(size);
memmove(&out.front(), p, size);
}
const char *
cursor::cstr(size_t *size_out)
{
// Scan string size
const char *p = pos;
while (pos < sec->end && *pos)
pos++;
if (pos == sec->end)
throw format_error("unterminated string");
if (size_out)
*size_out = pos - p;
pos++;
return p;
}
void
cursor::skip_form(DW_FORM form)
{
section_offset tmp;
// Section 7.5.4
switch (form) {
case DW_FORM::addr:
pos += sec->addr_size;
break;
case DW_FORM::sec_offset:
case DW_FORM::ref_addr:
case DW_FORM::strp:
switch (sec->fmt) {
case format::dwarf32:
pos += 4;
break;
case format::dwarf64:
pos += 8;
break;
case format::unknown:
throw logic_error("cannot read form with unknown format");
}
break;
// size+data forms
case DW_FORM::block1:
tmp = fixed<ubyte>();
pos += tmp;
break;
case DW_FORM::block2:
tmp = fixed<uhalf>();
pos += tmp;
break;
case DW_FORM::block4:
tmp = fixed<uword>();
pos += tmp;
break;
case DW_FORM::block:
case DW_FORM::exprloc:
tmp = uleb128();
pos += tmp;
break;
// fixed-length forms
case DW_FORM::flag_present:
break;
case DW_FORM::flag:
case DW_FORM::data1:
case DW_FORM::ref1:
pos += 1;
break;
case DW_FORM::data2:
case DW_FORM::ref2:
pos += 2;
break;
case DW_FORM::data4:
case DW_FORM::ref4:
pos += 4;
break;
case DW_FORM::data8:
case DW_FORM::ref_sig8:
pos += 8;
break;
// variable-length forms
case DW_FORM::sdata:
case DW_FORM::udata:
case DW_FORM::ref_udata:
while (pos < sec->end && (*(uint8_t*)pos & 0x80))
pos++;
pos++;
break;
case DW_FORM::string:
while (pos < sec->end && *pos)
pos++;
pos++;
break;
case DW_FORM::indirect:
skip_form((DW_FORM)uleb128());
break;
default:
throw format_error("unknown form " + to_string(form));
}
}
void
cursor::underflow()
{
throw underflow_error("cannot read past end of DWARF section");
}
DWARFPP_END_NAMESPACE

View File

@@ -0,0 +1,539 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#ifndef _DWARFPP_DW_HH_
#define _DWARFPP_DW_HH_
#include <cstdint>
#include <string>
DWARFPP_BEGIN_NAMESPACE
// Integer representations (Section 7.26)
typedef std::int8_t sbyte;
typedef std::uint8_t ubyte;
typedef std::uint16_t uhalf;
typedef std::uint32_t uword;
// Section offsets and lengths
typedef std::uint64_t section_offset;
typedef std::uint64_t section_length;
// A target machine address. Targets may use smaller addresses; this
// represents the largest supported address type.
typedef std::uint64_t taddr;
// DIE tags (Section 7, figure 18). typedef, friend, and namespace
// have a trailing underscore because they are reserved words.
enum class DW_TAG {
array_type = 0x01,
class_type = 0x02,
entry_point = 0x03,
enumeration_type = 0x04,
formal_parameter = 0x05,
imported_declaration = 0x08,
label = 0x0a,
lexical_block = 0x0b,
member = 0x0d,
pointer_type = 0x0f,
reference_type = 0x10,
compile_unit = 0x11,
string_type = 0x12,
structure_type = 0x13,
subroutine_type = 0x15,
typedef_ = 0x16,
union_type = 0x17,
unspecified_parameters = 0x18,
variant = 0x19,
common_block = 0x1a,
common_inclusion = 0x1b,
inheritance = 0x1c,
inlined_subroutine = 0x1d,
module = 0x1e,
ptr_to_member_type = 0x1f,
set_type = 0x20,
subrange_type = 0x21,
with_stmt = 0x22,
access_declaration = 0x23,
base_type = 0x24,
catch_block = 0x25,
const_type = 0x26,
constant = 0x27,
enumerator = 0x28,
file_type = 0x29,
friend_ = 0x2a,
namelist = 0x2b,
namelist_item = 0x2c,
packed_type = 0x2d,
subprogram = 0x2e,
template_type_parameter = 0x2f,
template_value_parameter = 0x30,
thrown_type = 0x31,
try_block = 0x32,
variant_part = 0x33,
variable = 0x34,
volatile_type = 0x35,
dwarf_procedure = 0x36,
restrict_type = 0x37,
interface_type = 0x38,
namespace_ = 0x39,
imported_module = 0x3a,
unspecified_type = 0x3b,
partial_unit = 0x3c,
imported_unit = 0x3d,
condition = 0x3f,
shared_type = 0x40,
type_unit = 0x41,
rvalue_reference_type = 0x42,
template_alias = 0x43,
lo_user = 0x4080,
hi_user = 0xffff,
};
std::string to_string(DW_TAG v);
// Child determination (Section 7, figure 19).
enum class DW_CHILDREN : ubyte {
no = 0x00,
yes = 0x01,
};
std::string to_string(DW_CHILDREN v);
// Attribute names (Section 7, figure 20). inline, friend, mutable,
// and explicit have a trailing underscore because they are reserved
// words.
enum class DW_AT {
sibling = 0x01,// reference
location = 0x02,// exprloc, loclistptr
name = 0x03,// string
ordering = 0x09,// constant
byte_size = 0x0b,// constant, exprloc, reference
bit_offset = 0x0c,// constant, exprloc, reference
bit_size = 0x0d,// constant, exprloc, reference
stmt_list = 0x10,// lineptr
low_pc = 0x11,// address
high_pc = 0x12,// address, constant
language = 0x13,// constant
discr = 0x15,// reference
discr_value = 0x16,// constant
visibility = 0x17,// constant
import = 0x18,// reference
string_length = 0x19,// exprloc, loclistptr
common_reference = 0x1a,// reference
comp_dir = 0x1b,// string
const_value = 0x1c,// block, constant, string
containing_type = 0x1d,// reference
default_value = 0x1e,// reference
inline_ = 0x20,// constant
is_optional = 0x21,// flag
lower_bound = 0x22,// constant, exprloc, reference
producer = 0x25,// string
prototyped = 0x27,// flag
return_addr = 0x2a,// exprloc, loclistptr
start_scope = 0x2c,// constant, rangelistptr
bit_stride = 0x2e,// constant, exprloc, reference
upper_bound = 0x2f,// constant, exprloc, reference
abstract_origin = 0x31,// reference
accessibility = 0x32,// constant
address_class = 0x33,// constant
artificial = 0x34,// flag
base_types = 0x35,// reference
calling_convention = 0x36,// constant
count = 0x37,// constant, exprloc, reference
data_member_location = 0x38,// constant, exprloc, loclistptr
decl_column = 0x39,// constant
decl_file = 0x3a,// constant
decl_line = 0x3b,// constant
declaration = 0x3c,// flag
discr_list = 0x3d,// block
encoding = 0x3e,// constant
external = 0x3f,// flag
frame_base = 0x40,// exprloc, loclistptr
friend_ = 0x41,// reference
identifier_case = 0x42,// constant
macro_info = 0x43,// macptr
namelist_item = 0x44,// reference
priority = 0x45,// reference
segment = 0x46,// exprloc, loclistptr
specification = 0x47,// reference
static_link = 0x48,// exprloc, loclistptr
type = 0x49,// reference
use_location = 0x4a,// exprloc, loclistptr
variable_parameter = 0x4b,// flag
virtuality = 0x4c,// constant
vtable_elem_location = 0x4d,// exprloc, loclistptr
// DWARF 3
allocated = 0x4e,// constant, exprloc, reference
associated = 0x4f,// constant, exprloc, reference
data_location = 0x50,// exprloc
byte_stride = 0x51,// constant, exprloc, reference
entry_pc = 0x52,// address
use_UTF8 = 0x53,// flag
extension = 0x54,// reference
ranges = 0x55,// rangelistptr
trampoline = 0x56,// address, flag, reference, string
call_column = 0x57,// constant
call_file = 0x58,// constant
call_line = 0x59,// constant
description = 0x5a,// string
binary_scale = 0x5b,// constant
decimal_scale = 0x5c,// constant
small = 0x5d,// reference
decimal_sign = 0x5e,// constant
digit_count = 0x5f,// constant
picture_string = 0x60,// string
mutable_ = 0x61,// flag
threads_scaled = 0x62,// flag
explicit_ = 0x63,// flag
object_pointer = 0x64,// reference
endianity = 0x65,// constant
elemental = 0x66,// flag
pure = 0x67,// flag
recursive = 0x68,// flag
// DWARF 4
signature = 0x69,// reference
main_subprogram = 0x6a,// flag
data_bit_offset = 0x6b,// constant
const_expr = 0x6c,// flag
enum_class = 0x6d,// flag
linkage_name = 0x6e,// string
lo_user = 0x2000,
hi_user = 0x3fff,
};
std::string to_string(DW_AT v);
// Attribute form encodings (Section 7, figure 21)
enum class DW_FORM {
addr = 0x01,// address
block2 = 0x03,// block
block4 = 0x04,// block
data2 = 0x05,// constant
data4 = 0x06,// constant
data8 = 0x07,// constant
string = 0x08,// string
block = 0x09,// block
block1 = 0x0a,// block
data1 = 0x0b,// constant
flag = 0x0c,// flag
sdata = 0x0d,// constant
strp = 0x0e,// string
udata = 0x0f,// constant
ref_addr = 0x10,// reference
ref1 = 0x11,// reference
ref2 = 0x12,// reference
ref4 = 0x13,// reference
ref8 = 0x14,// reference
ref_udata = 0x15,// reference
indirect = 0x16,// (Section 7.5.3)
// DWARF 4
sec_offset = 0x17,// lineptr, loclistptr, macptr, rangelistptr
exprloc = 0x18,// exprloc
flag_present = 0x19,// flag
ref_sig8 = 0x20,// reference
implicit_const = 0x21,
loclistx = 0x22,
rnglistx = 0x23,
ref_sup8 = 0x24,
strx1 = 0x25,
strx2 = 0x26,
strx3 = 0x27,
strx4 = 0x28,
addrx1 = 0x29,
addrx2 = 0x2a,
addrx4 = 0x2c,
addrx3 = 0x2b,
};
std::string to_string(DW_FORM v);
// DWARF operation encodings (Section 7.7.1 and figure 24)
enum class DW_OP : ubyte {
addr = 0x03,// [constant address (size target specific)]
deref = 0x06,
const1u = 0x08,// [1-byte constant]
const1s = 0x09,// [1-byte constant]
const2u = 0x0a,// [2-byte constant]
const2s = 0x0b,// [2-byte constant]
const4u = 0x0c,// [4-byte constant]
const4s = 0x0d,// [4-byte constant]
const8u = 0x0e,// [8-byte constant]
const8s = 0x0f,// [8-byte constant]
constu = 0x10,// [ULEB128 constant]
consts = 0x11,// [SLEB128 constant]
dup = 0x12,
drop = 0x13,
over = 0x14,
pick = 0x15,// [1-byte stack index]
swap = 0x16,
rot = 0x17,
xderef = 0x18,
abs = 0x19,
and_ = 0x1a,
div = 0x1b,
minus = 0x1c,
mod = 0x1d,
mul = 0x1e,
neg = 0x1f,
not_ = 0x20,
or_ = 0x21,
plus = 0x22,
plus_uconst = 0x23,// [ULEB128 addend]
shl = 0x24,
shr = 0x25,
shra = 0x26,
xor_ = 0x27,
skip = 0x2f,// [signed 2-byte constant]
bra = 0x28,// [signed 2-byte constant]
eq = 0x29,
ge = 0x2a,
gt = 0x2b,
le = 0x2c,
lt = 0x2d,
ne = 0x2e,
// Literals 0..31 = (lit0 + literal)
lit0 = 0x30,
lit31 = 0x4f,
// Registers 0..31 = (reg0 + regnum)
reg0 = 0x50,
reg31 = 0x6f,
// Base register 0..31 = (breg0 + regnum)
breg0 = 0x70,// [SLEB128 offset]
breg31 = 0x8f,// [SLEB128 offset]
regx = 0x90,// [ULEB128 register]
fbreg = 0x91,// [SLEB128 offset]
bregx = 0x92,// [ULEB128 register, SLEB128 offset]
piece = 0x93,// [ULEB128 size of piece addressed]
deref_size = 0x94,// [1-byte size of data retrieved]
xderef_size = 0x95,// [1-byte size of data retrieved]
nop = 0x96,
// DWARF 3
push_object_address = 0x97,
call2 = 0x98,// [2-byte offset of DIE]
call4 = 0x99,// [4-byte offset of DIE]
call_ref = 0x9a,// [4- or 8-byte offset of DIE]
form_tls_address = 0x9b,
call_frame_cfa = 0x9c,
bit_piece = 0x9d,// [ULEB128 size, ULEB128 offset]
// DWARF 4
implicit_value = 0x9e,// [ULEB128 size, block of that size]
stack_value = 0x9f,
lo_user = 0xe0,
hi_user = 0xff,
};
std::string to_string(DW_OP v);
// DW_AT::encoding constants (DWARF4 section 7.8 figure 25)
enum class DW_ATE {
address = 0x01,
boolean = 0x02,
complex_float = 0x03,
float_ = 0x04,
signed_ = 0x05,
signed_char = 0x06,
unsigned_ = 0x07,
unsigned_char = 0x08,
imaginary_float = 0x09,
packed_decimal = 0x0a,
numeric_string = 0x0b,
edited = 0x0c,
signed_fixed = 0x0d,
unsigned_fixed = 0x0e,
decimal_float = 0x0f,
// DWARF 4
UTF = 0x10,
lo_user = 0x80,
hi_user = 0xff,
};
std::string to_string(DW_ATE v);
// DW_AT::decimal_sign constants (DWARF4 section 7.8 figure 26)
enum class DW_DS {
unsigned_ = 0x01,
leading_overpunch = 0x02,
trailing_overpunch = 0x03,
leading_separate = 0x04,
trailing_separate = 0x05,
};
std::string to_string(DW_DS v);
// DW_AT::endianity constants (DWARF4 section 7.8 figure 27)
enum class DW_END {
default_ = 0x00,
big = 0x01,
little = 0x02,
lo_user = 0x40,
hi_user = 0xff,
};
std::string to_string(DW_END v);
// DW_AT::accessibility constants (DWARF4 section 7.9 figure 28)
enum class DW_ACCESS {
public_ = 0x01,
protected_ = 0x02,
private_ = 0x03,
};
std::string to_string(DW_ACCESS v);
// DW_AT::visibility constants (DWARF4 section 7.10 figure 29)
enum class DW_VIS {
local = 0x01,
exported = 0x02,
qualified = 0x03,
};
std::string to_string(DW_VIS v);
// DW_AT::virtuality constants (DWARF4 section 7.11 figure 30)
enum class DW_VIRTUALITY {
none = 0x00,
virtual_ = 0x01,
pure_virtual = 0x02,
};
std::string to_string(DW_VIRTUALITY v);
// DW_AT::language constants (DWARF4 section 7.12 figure 31)
enum class DW_LANG {
C89 = 0x0001,// Lower bound 0
C = 0x0002,// Lower bound 0
Ada83 = 0x0003,// Lower bound 1
C_plus_plus = 0x0004,// Lower bound 0
Cobol74 = 0x0005,// Lower bound 1
Cobol85 = 0x0006,// Lower bound 1
Fortran77 = 0x0007,// Lower bound 1
Fortran90 = 0x0008,// Lower bound 1
Pascal83 = 0x0009,// Lower bound 1
Modula2 = 0x000a,// Lower bound 1
Java = 0x000b,// Lower bound 0
C99 = 0x000c,// Lower bound 0
Ada95 = 0x000d,// Lower bound 1
Fortran95 = 0x000e,// Lower bound 1
PLI = 0x000f,// Lower bound 1
ObjC = 0x0010,// Lower bound 0
ObjC_plus_plus = 0x0011,// Lower bound 0
UPC = 0x0012,// Lower bound 0
D = 0x0013,// Lower bound 0
Python = 0x0014,// Lower bound 0
lo_user = 0x8000,
hi_user = 0xffff,
};
std::string to_string(DW_LANG v);
// DW_AT::identifier_case constants (DWARF4 section 7.14 figure 32)
enum class DW_ID {
case_sensitive = 0x00,
up_case = 0x01,
down_case = 0x02,
case_insensitive = 0x03,
};
std::string to_string(DW_ID v);
// DW_AT::calling_convention constants (DWARF4 section 7.15 figure 33)
enum class DW_CC {
normal = 0x01,
program = 0x02,
nocall = 0x03,
lo_user = 0x40,
hi_user = 0xff,
};
std::string to_string(DW_CC v);
// DW_AT::inline constants (DWARF4 section 7.16 figure 34)
enum class DW_INL {
not_inlined = 0x00,
inlined = 0x01,
declared_not_inlined = 0x02,
declared_inlined = 0x03,
};
std::string to_string(DW_INL v);
// DW_AT::ordering constants (DWARF4 section 7.17 figure 35)
enum class DW_ORD {
row_major = 0x00,
col_major = 0x01,
};
std::string to_string(DW_ORD v);
// DW_AT::discr_list constants (DWARF4 section 7.18 figure 36)
enum class DW_DSC {
label = 0x00,
range = 0x01,
};
std::string to_string(DW_DSC v);
// Line number standard opcodes (DWARF4 section 7.21 figure 37)
enum class DW_LNS {
copy = 0x01,
advance_pc = 0x02,
advance_line = 0x03,
set_file = 0x04,
set_column = 0x05,
negate_stmt = 0x06,
set_basic_block = 0x07,
const_add_pc = 0x08,
fixed_advance_pc = 0x09,
// DWARF 3
set_prologue_end = 0x0a,
set_epilogue_begin = 0x0b,
set_isa = 0x0c,
};
std::string to_string(DW_LNS v);
// Line number extended opcodes (DWARF4 section 7.21 figure 38)
enum class DW_LNE {
end_sequence = 0x01,
set_address = 0x02,
define_file = 0x03,
// DWARF 4
set_discriminator = 0x04,
// DWARF 3
lo_user = 0x80,
hi_user = 0xff,
};
std::string to_string(DW_LNE v);
DWARFPP_END_NAMESPACE
#endif

View File

@@ -0,0 +1,202 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "internal.hh"
using namespace std;
DWARFPP_BEGIN_NAMESPACE
die::die(const unit *cu)
: cu(cu), abbrev(nullptr)
{
}
const unit &
die::get_unit() const
{
return *cu;
}
section_offset
die::get_section_offset() const
{
return cu->get_section_offset() + offset;
}
void
die::read(section_offset off)
{
cursor cur(cu->data(), off);
offset = off;
abbrev_code acode = cur.uleb128();
if (acode == 0) {
abbrev = nullptr;
next = cur.get_section_offset();
return;
}
abbrev = &cu->get_abbrev(acode);
tag = abbrev->tag;
// XXX We can pre-compute almost all of this work in the
// abbrev_entry.
attrs.clear();
attrs.reserve(abbrev->attributes.size());
for (auto &attr : abbrev->attributes) {
attrs.push_back(cur.get_section_offset());
cur.skip_form(attr.form);
}
next = cur.get_section_offset();
}
bool
die::has(DW_AT attr) const
{
if (!abbrev)
return false;
// XXX Totally lame
for (auto &a : abbrev->attributes)
if (a.name == attr)
return true;
return false;
}
value
die::operator[](DW_AT attr) const
{
// XXX We can pre-compute almost all of this work in the
// abbrev_entry.
if (abbrev) {
int i = 0;
for (auto &a : abbrev->attributes) {
if (a.name == attr)
return value(cu, a.name, a.form, a.type, attrs[i]);
i++;
}
}
throw out_of_range("DIE does not have attribute " + to_string(attr));
}
value
die::resolve(DW_AT attr) const
{
// DWARF4 section 2.13, DWARF4 section 3.3.8
// DWARF4 is unclear about what to do when there's both a
// DW_AT::specification and a DW_AT::abstract_origin.
// Conceptually, though, a concrete inlined instance cannot
// itself complete an external function that wasn't first
// completed by its abstract instance, so we first try to
// resolve abstract_origin, then we resolve specification.
// XXX This traverses the abbrevs at least twice and
// potentially several more times
if (has(attr))
return (*this)[attr];
if (has(DW_AT::abstract_origin)) {
die ao = (*this)[DW_AT::abstract_origin].as_reference();
if (ao.has(attr))
return ao[attr];
if (ao.has(DW_AT::specification)) {
die s = ao[DW_AT::specification].as_reference();
if (s.has(attr))
return s[attr];
}
} else if (has(DW_AT::specification)) {
die s = (*this)[DW_AT::specification].as_reference();
if (s.has(attr))
return s[attr];
}
return value();
}
die::iterator
die::begin() const
{
if (!abbrev || !abbrev->children)
return end();
return iterator(cu, next);
}
die::iterator::iterator(const unit *cu, section_offset off)
: d(cu)
{
d.read(off);
}
die::iterator &
die::iterator::operator++()
{
if (!d.abbrev)
return *this;
if (!d.abbrev->children) {
// The DIE has no children, so its successor follows
// immediately
d.read(d.next);
} else if (d.has(DW_AT::sibling)) {
// They made it easy on us. Follow the sibling
// pointer. XXX Probably worth optimizing
d = d[DW_AT::sibling].as_reference();
} else {
// It's a hard-knock life. We have to iterate through
// the children to find the next DIE.
// XXX Particularly unfortunate if the user is doing a
// DFS, since this will result in N^2 behavior. Maybe
// a small cache of terminator locations in the CU?
iterator sub(d.cu, d.next);
while (sub->abbrev)
++sub;
d.read(sub->next);
}
return *this;
}
const vector<pair<DW_AT, value> >
die::attributes() const
{
vector<pair<DW_AT, value> > res;
if (!abbrev)
return res;
// XXX Quite slow, especially when using this to traverse an
// entire DIE tree since each DIE will produce a new vector
// (whereas other vectors get reused). Might be worth a
// custom iterator.
int i = 0;
for (auto &a : abbrev->attributes) {
res.push_back(make_pair(a.name, value(cu, a.name, a.form, a.type, attrs[i])));
i++;
}
return res;
}
bool
die::operator==(const die &o) const
{
return cu == o.cu && offset == o.offset;
}
bool
die::operator!=(const die &o) const
{
return !(*this == o);
}
DWARFPP_END_NAMESPACE
size_t
std::hash<dwarf::die>::operator()(const dwarf::die &a) const
{
return hash<decltype(a.cu)>()(a.cu) ^
hash<decltype(a.get_unit_offset())>()(a.get_unit_offset());
}

View File

@@ -0,0 +1,118 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "internal.hh"
#include <cstring>
#include <unordered_set>
using namespace std;
// XXX Make this more readily available?
namespace std {
template<>
struct hash<dwarf::DW_TAG>
{
typedef size_t result_type;
typedef dwarf::DW_TAG argument_type;
result_type operator()(argument_type a) const
{
return (result_type)a;
}
};
}
DWARFPP_BEGIN_NAMESPACE
struct string_hash
{
typedef size_t result_type;
typedef const char *argument_type;
result_type operator()(const char *s) const
{
result_type h = 0;
for (; *s; ++s)
h += 33 * h + *s;
return h;
}
};
struct string_eq
{
typedef bool result_type;
typedef const char *first_argument_type;
typedef const char *second_argument_type;
bool operator()(const char *x, const char *y) const
{
return strcmp(x, y) == 0;
}
};
struct die_str_map::impl
{
impl(const die &parent, DW_AT attr,
const initializer_list<DW_TAG> &accept)
: attr(attr), accept(accept.begin(), accept.end()),
pos(parent.begin()), end(parent.end()) { }
unordered_map<const char*, die, string_hash, string_eq> str_map;
DW_AT attr;
unordered_set<DW_TAG> accept;
die::iterator pos, end;
die invalid;
};
die_str_map::die_str_map(const die &parent, DW_AT attr,
const initializer_list<DW_TAG> &accept)
: m(make_shared<impl>(parent, attr, accept))
{
}
die_str_map
die_str_map::from_type_names(const die &parent)
{
return die_str_map
(parent, DW_AT::name,
// All DWARF type tags (this is everything that ends
// with _type except thrown_type).
{DW_TAG::array_type, DW_TAG::class_type,
DW_TAG::enumeration_type, DW_TAG::pointer_type,
DW_TAG::reference_type, DW_TAG::string_type,
DW_TAG::structure_type, DW_TAG::subroutine_type,
DW_TAG::union_type, DW_TAG::ptr_to_member_type,
DW_TAG::set_type, DW_TAG::subrange_type,
DW_TAG::base_type, DW_TAG::const_type,
DW_TAG::file_type, DW_TAG::packed_type,
DW_TAG::volatile_type, DW_TAG::restrict_type,
DW_TAG::interface_type, DW_TAG::unspecified_type,
DW_TAG::shared_type, DW_TAG::rvalue_reference_type});
}
const die &
die_str_map::operator[](const char *val) const
{
// Do we have this value?
auto it = m->str_map.find(val);
if (it != m->str_map.end())
return it->second;
// Read more until we find the value or the end
while (m->pos != m->end) {
const die &d = *m->pos;
++m->pos;
if (!m->accept.count(d.tag) || !d.has(m->attr))
continue;
value dval(d[m->attr]);
if (dval.get_type() != value::type::string)
continue;
const char *dstr = dval.as_cstr();
m->str_map[dstr] = d;
if (strcmp(val, dstr) == 0)
return m->str_map[dstr];
}
// Not found
return m->invalid;
}
DWARFPP_END_NAMESPACE

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,366 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "internal.hh"
using namespace std;
DWARFPP_BEGIN_NAMESPACE
//////////////////////////////////////////////////////////////////
// class dwarf
//
struct dwarf::impl
{
impl(const std::shared_ptr<loader> &l)
: l(l), have_type_units(false) { }
std::shared_ptr<loader> l;
std::shared_ptr<section> sec_info;
std::shared_ptr<section> sec_abbrev;
std::vector<compilation_unit> compilation_units;
std::unordered_map<uint64_t, type_unit> type_units;
bool have_type_units;
std::map<section_type, std::shared_ptr<section> > sections;
};
dwarf::dwarf(const std::shared_ptr<loader> &l)
: m(make_shared<impl>(l))
{
const void *data;
size_t size;
// Get required sections
data = l->load(section_type::info, &size);
if (!data)
throw format_error("required .debug_info section missing");
m->sec_info = make_shared<section>(section_type::info, data, size, byte_order::lsb);
// Sniff the endianness from the version field of the first
// CU. This is always a small but non-zero integer.
cursor endcur(m->sec_info);
// Skip length.
section_length length = endcur.fixed<uword>();
if (length == 0xffffffff)
endcur.fixed<uint64_t>();
// Get version in both little and big endian.
uhalf version = endcur.fixed<uhalf>();
uhalf versionbe = (version >> 8) | ((version & 0xFF) << 8);
if (versionbe < version) {
m->sec_info = make_shared<section>(section_type::info, data, size, byte_order::msb);
}
data = l->load(section_type::abbrev, &size);
if (!data)
throw format_error("required .debug_abbrev section missing");
m->sec_abbrev = make_shared<section>(section_type::abbrev, data, size, m->sec_info->ord);
// Get compilation units. Everything derives from these, so
// there's no point in doing it lazily.
cursor infocur(m->sec_info);
while (!infocur.end()) {
// XXX Circular reference. Given that we now require
// the dwarf object to stick around for DIEs, maybe we
// might as well require that for units, too.
m->compilation_units.emplace_back(
*this, infocur.get_section_offset());
infocur.subsection();
}
}
dwarf::~dwarf()
{
}
const std::vector<compilation_unit> &
dwarf::compilation_units() const
{
static std::vector<compilation_unit> empty;
if (!m)
return empty;
return m->compilation_units;
}
const type_unit &
dwarf::get_type_unit(uint64_t type_signature) const
{
if (!m->have_type_units) {
cursor tucur(get_section(section_type::types));
while (!tucur.end()) {
// XXX Circular reference
type_unit tu(*this, tucur.get_section_offset());
m->type_units[tu.get_type_signature()] = tu;
tucur.subsection();
}
m->have_type_units = true;
}
if (!m->type_units.count(type_signature))
throw out_of_range("type signature 0x" + to_hex(type_signature));
return m->type_units[type_signature];
}
std::shared_ptr<section>
dwarf::get_section(section_type type) const
{
if (type == section_type::info)
return m->sec_info;
if (type == section_type::abbrev)
return m->sec_abbrev;
auto it = m->sections.find(type);
if (it != m->sections.end())
return it->second;
size_t size;
const void *data = m->l->load(type, &size);
if (!data)
throw format_error(std::string(elf::section_type_to_name(type))
+ " section missing");
m->sections[type] = std::make_shared<section>(section_type::str, data, size, m->sec_info->ord);
return m->sections[type];
}
//////////////////////////////////////////////////////////////////
// class unit
//
/**
* Implementation of a unit.
*/
struct unit::impl
{
const dwarf file;
const section_offset offset;
const std::shared_ptr<section> subsec;
const section_offset debug_abbrev_offset;
const section_offset root_offset;
// Type unit-only values
const uint64_t type_signature;
const section_offset type_offset;
// Lazily constructed root and type DIEs
die root, type;
// Lazily constructed line table
line_table lt;
// Map from abbrev code to abbrev. If the map is dense, it
// will be stored in the vector; otherwise it will be stored
// in the map.
bool have_abbrevs;
std::vector<abbrev_entry> abbrevs_vec;
std::unordered_map<abbrev_code, abbrev_entry> abbrevs_map;
impl(const dwarf &file, section_offset offset,
const std::shared_ptr<section> &subsec,
section_offset debug_abbrev_offset, section_offset root_offset,
uint64_t type_signature = 0, section_offset type_offset = 0)
: file(file), offset(offset), subsec(subsec),
debug_abbrev_offset(debug_abbrev_offset),
root_offset(root_offset), type_signature(type_signature),
type_offset(type_offset), have_abbrevs(false) { }
void force_abbrevs();
};
unit::~unit()
{
}
const dwarf &
unit::get_dwarf() const
{
return m->file;
}
section_offset
unit::get_section_offset() const
{
return m->offset;
}
const die&
unit::root() const
{
if (!m->root.valid()) {
m->force_abbrevs();
m->root = die(this);
m->root.read(m->root_offset);
}
return m->root;
}
const std::shared_ptr<section> &
unit::data() const
{
return m->subsec;
}
const abbrev_entry &
unit::get_abbrev(abbrev_code acode) const
{
if (!m->have_abbrevs)
m->force_abbrevs();
if (!m->abbrevs_vec.empty()) {
if (acode >= m->abbrevs_vec.size())
goto unknown;
const abbrev_entry &entry = m->abbrevs_vec[acode];
if (entry.code == 0)
goto unknown;
return entry;
} else {
auto it = m->abbrevs_map.find(acode);
if (it == m->abbrevs_map.end())
goto unknown;
return it->second;
}
unknown:
throw format_error("unknown abbrev code 0x" + to_hex(acode));
}
void
unit::impl::force_abbrevs()
{
// XXX Compilation units can share abbrevs. Parse each table
// at most once.
if (have_abbrevs)
return;
// Section 7.5.3
cursor c(file.get_section(section_type::abbrev),
debug_abbrev_offset);
abbrev_entry entry;
abbrev_code highest = 0;
while (entry.read(&c)) {
abbrevs_map[entry.code] = entry;
if (entry.code > highest)
highest = entry.code;
}
// Typically, abbrev codes are assigned linearly, so it's more
// space efficient and time efficient to store the table in a
// vector. Convert to a vector if it's dense enough, by some
// rough estimate of "enough".
if (highest * 10 < abbrevs_map.size() * 15) {
// Move the map into the vector
abbrevs_vec.resize(highest + 1);
for (auto &entry : abbrevs_map)
abbrevs_vec[entry.first] = move(entry.second);
abbrevs_map.clear();
}
have_abbrevs = true;
}
//////////////////////////////////////////////////////////////////
// class compilation_unit
//
compilation_unit::compilation_unit(const dwarf &file, section_offset offset)
{
// Read the CU header (DWARF4 section 7.5.1.1)
cursor cur(file.get_section(section_type::info), offset);
std::shared_ptr<section> subsec = cur.subsection();
cursor sub(subsec);
sub.skip_initial_length();
uhalf version = sub.fixed<uhalf>();
(void)version;
if (version > 5)
throw format_error("unknown compilation unit version " + std::to_string(version));
// .debug_abbrev-relative offset of this unit's abbrevs
section_offset debug_abbrev_offset;
if(version >= 5)
{
sub.skip_unit_type();
ubyte address_size = sub.fixed<ubyte>();
subsec->addr_size = address_size;
debug_abbrev_offset = sub.offset();
}
else {
debug_abbrev_offset = sub.offset();
ubyte address_size = sub.fixed<ubyte>();
subsec->addr_size = address_size;
}
m = make_shared<impl>(file, offset, subsec, debug_abbrev_offset,
sub.get_section_offset());
}
const line_table &
compilation_unit::get_line_table() const
{
if (!m->lt.valid()) {
const die &d = root();
if (!d.has(DW_AT::stmt_list) || !d.has(DW_AT::name))
goto done;
shared_ptr<section> sec;
try {
sec = m->file.get_section(section_type::line);
} catch (format_error &e) {
goto done;
}
auto comp_dir = d.has(DW_AT::comp_dir) ? at_comp_dir(d) : "";
m->lt = line_table(sec, d[DW_AT::stmt_list].as_sec_offset(),
m->subsec->addr_size, comp_dir,
at_name(d));
}
done:
return m->lt;
}
//////////////////////////////////////////////////////////////////
// class type_unit
//
type_unit::type_unit(const dwarf &file, section_offset offset)
{
// Read the type unit header (DWARF4 section 7.5.1.2)
cursor cur(file.get_section(section_type::types), offset);
std::shared_ptr<section> subsec = cur.subsection();
cursor sub(subsec);
sub.skip_initial_length();
uhalf version = sub.fixed<uhalf>();
if (version != 4)
throw format_error("unknown type unit version " + std::to_string(version));
// .debug_abbrev-relative offset of this unit's abbrevs
section_offset debug_abbrev_offset = sub.offset();
ubyte address_size = sub.fixed<ubyte>();
subsec->addr_size = address_size;
uint64_t type_signature = sub.fixed<uint64_t>();
section_offset type_offset = sub.offset();
m = make_shared<impl>(file, offset, subsec, debug_abbrev_offset,
sub.get_section_offset(), type_signature,
type_offset);
}
uint64_t
type_unit::get_type_signature() const
{
return m->type_signature;
}
const die &
type_unit::type() const
{
if (!m->type.valid()) {
m->force_abbrevs();
m->type = die(this);
m->type.read(m->type_offset);
}
return m->type;
}
DWARFPP_END_NAMESPACE

View File

@@ -0,0 +1,54 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "dwarf++.hh"
#include <cstring>
using namespace std;
DWARFPP_BEGIN_NAMESPACE
static const struct
{
const char *name;
section_type type;
} sections[] = {
{".debug_abbrev", section_type::abbrev},
{".debug_aranges", section_type::aranges},
{".debug_frame", section_type::frame},
{".debug_info", section_type::info},
{".debug_line", section_type::line},
{".debug_loc", section_type::loc},
{".debug_macinfo", section_type::macinfo},
{".debug_pubnames", section_type::pubnames},
{".debug_pubtypes", section_type::pubtypes},
{".debug_ranges", section_type::ranges},
{".debug_str", section_type::str},
{".debug_types", section_type::types},
};
bool
elf::section_name_to_type(const char *name, section_type *out)
{
for (auto &sec : sections) {
if (strcmp(sec.name, name) == 0) {
*out = sec.type;
return true;
}
}
return false;
}
const char *
elf::section_type_to_name(section_type type)
{
for (auto &sec : sections) {
if (sec.type == type)
return sec.name;
}
return nullptr;
}
DWARFPP_END_NAMESPACE

View File

@@ -0,0 +1,423 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "internal.hh"
using namespace std;
DWARFPP_BEGIN_NAMESPACE
expr_context no_expr_context;
expr::expr(const unit *cu,
section_offset offset, section_length len)
: cu(cu), offset(offset), len(len)
{
}
expr_result
expr::evaluate(expr_context *ctx) const
{
return evaluate(ctx, {});
}
expr_result
expr::evaluate(expr_context *ctx, taddr argument) const
{
return evaluate(ctx, {argument});
}
expr_result
expr::evaluate(expr_context *ctx, const std::initializer_list<taddr> &arguments) const
{
// The stack machine's stack. The top of the stack is
// stack.back().
// XXX This stack must be in target machine representation,
// since I see both (DW_OP_breg0 (eax): -28; DW_OP_stack_value)
// and (DW_OP_lit1; DW_OP_stack_value).
small_vector<taddr, 8> stack;
// Create the initial stack. arguments are in reverse order
// (that is, element 0 is TOS), so reverse it.
stack.reserve(arguments.size());
for (const taddr *elt = arguments.end() - 1;
elt >= arguments.begin(); elt--)
stack.push_back(*elt);
// Create a subsection for just this expression so we can
// easily detect the end (including premature end).
auto cusec = cu->data();
shared_ptr<section> subsec
(make_shared<section>(cusec->type,
cusec->begin + offset, len,
cusec->ord, cusec->fmt,
cusec->addr_size));
cursor cur(subsec);
// Prepare the expression result. Some location descriptions
// create the result directly, rather than using the top of
// stack.
expr_result result;
// 2.6.1.1.4 Empty location descriptions
if (cur.end()) {
result.location_type = expr_result::type::empty;
result.value = 0;
return result;
}
// Assume the result is an address for now and should be
// grabbed from the top of stack at the end.
result.location_type = expr_result::type::address;
// Execute!
while (!cur.end()) {
#define CHECK() do { if (stack.empty()) goto underflow; } while (0)
#define CHECKN(n) do { if (stack.size() < n) goto underflow; } while (0)
union
{
uint64_t u;
int64_t s;
} tmp1, tmp2, tmp3;
static_assert(sizeof(tmp1) == sizeof(taddr), "taddr is not 64 bits");
// Tell GCC to warn us about missing switch cases,
// even though we have a default case.
#pragma GCC diagnostic push
#pragma GCC diagnostic warning "-Wswitch-enum"
DW_OP op = (DW_OP)cur.fixed<ubyte>();
switch (op) {
// 2.5.1.1 Literal encodings
case DW_OP::lit0...DW_OP::lit31:
stack.push_back((unsigned)op - (unsigned)DW_OP::lit0);
break;
case DW_OP::addr:
stack.push_back(cur.address());
break;
case DW_OP::const1u:
stack.push_back(cur.fixed<uint8_t>());
break;
case DW_OP::const2u:
stack.push_back(cur.fixed<uint16_t>());
break;
case DW_OP::const4u:
stack.push_back(cur.fixed<uint32_t>());
break;
case DW_OP::const8u:
stack.push_back(cur.fixed<uint64_t>());
break;
case DW_OP::const1s:
stack.push_back(cur.fixed<int8_t>());
break;
case DW_OP::const2s:
stack.push_back(cur.fixed<int16_t>());
break;
case DW_OP::const4s:
stack.push_back(cur.fixed<int32_t>());
break;
case DW_OP::const8s:
stack.push_back(cur.fixed<int64_t>());
break;
case DW_OP::constu:
stack.push_back(cur.uleb128());
break;
case DW_OP::consts:
stack.push_back(cur.sleb128());
break;
// 2.5.1.2 Register based addressing
case DW_OP::fbreg:
// XXX
throw runtime_error("DW_OP_fbreg not implemented");
case DW_OP::breg0...DW_OP::breg31:
tmp1.u = (unsigned)op - (unsigned)DW_OP::breg0;
tmp2.s = cur.sleb128();
stack.push_back((int64_t)ctx->reg(tmp1.u) + tmp2.s);
break;
case DW_OP::bregx:
tmp1.u = cur.uleb128();
tmp2.s = cur.sleb128();
stack.push_back((int64_t)ctx->reg(tmp1.u) + tmp2.s);
break;
// 2.5.1.3 Stack operations
case DW_OP::dup:
CHECK();
stack.push_back(stack.back());
break;
case DW_OP::drop:
CHECK();
stack.pop_back();
break;
case DW_OP::pick:
tmp1.u = cur.fixed<uint8_t>();
CHECKN(tmp1.u);
stack.push_back(stack.revat(tmp1.u));
break;
case DW_OP::over:
CHECKN(2);
stack.push_back(stack.revat(1));
break;
case DW_OP::swap:
CHECKN(2);
tmp1.u = stack.back();
stack.back() = stack.revat(1);
stack.revat(1) = tmp1.u;
break;
case DW_OP::rot:
CHECKN(3);
tmp1.u = stack.back();
stack.back() = stack.revat(1);
stack.revat(1) = stack.revat(2);
stack.revat(2) = tmp1.u;
break;
case DW_OP::deref:
tmp1.u = subsec->addr_size;
goto deref_common;
case DW_OP::deref_size:
tmp1.u = cur.fixed<uint8_t>();
if (tmp1.u > subsec->addr_size)
throw expr_error("DW_OP_deref_size operand exceeds address size");
deref_common:
CHECK();
stack.back() = ctx->deref_size(stack.back(), tmp1.u);
break;
case DW_OP::xderef:
tmp1.u = subsec->addr_size;
goto xderef_common;
case DW_OP::xderef_size:
tmp1.u = cur.fixed<uint8_t>();
if (tmp1.u > subsec->addr_size)
throw expr_error("DW_OP_xderef_size operand exceeds address size");
xderef_common:
CHECKN(2);
tmp2.u = stack.back();
stack.pop_back();
stack.back() = ctx->xderef_size(tmp2.u, stack.back(), tmp1.u);
break;
case DW_OP::push_object_address:
// XXX
throw runtime_error("DW_OP_push_object_address not implemented");
case DW_OP::form_tls_address:
CHECK();
stack.back() = ctx->form_tls_address(stack.back());
break;
case DW_OP::call_frame_cfa:
// XXX
throw runtime_error("DW_OP_call_frame_cfa not implemented");
// 2.5.1.4 Arithmetic and logical operations
#define UBINOP(binop) \
do { \
CHECKN(2); \
tmp1.u = stack.back(); \
stack.pop_back(); \
tmp2.u = stack.back(); \
stack.back() = tmp2.u binop tmp1.u; \
} while (0)
case DW_OP::abs:
CHECK();
tmp1.u = stack.back();
if (tmp1.s < 0)
tmp1.s = -tmp1.s;
stack.back() = tmp1.u;
break;
case DW_OP::and_:
UBINOP(&);
break;
case DW_OP::div:
CHECKN(2);
tmp1.u = stack.back();
stack.pop_back();
tmp2.u = stack.back();
tmp3.s = tmp1.s / tmp2.s;
stack.back() = tmp3.u;
break;
case DW_OP::minus:
UBINOP(-);
break;
case DW_OP::mod:
UBINOP(%);
break;
case DW_OP::mul:
UBINOP(*);
break;
case DW_OP::neg:
CHECK();
tmp1.u = stack.back();
tmp1.s = -tmp1.s;
stack.back() = tmp1.u;
break;
case DW_OP::not_:
CHECK();
stack.back() = ~stack.back();
break;
case DW_OP::or_:
UBINOP(|);
break;
case DW_OP::plus:
UBINOP(+);
break;
case DW_OP::plus_uconst:
tmp1.u = cur.uleb128();
CHECK();
stack.back() += tmp1.u;
break;
case DW_OP::shl:
CHECKN(2);
tmp1.u = stack.back();
stack.pop_back();
tmp2.u = stack.back();
// C++ does not define what happens if you
// shift by more bits than the width of the
// type, so we handle this case specially
if (tmp1.u < sizeof(tmp2.u)*8)
stack.back() = tmp2.u << tmp1.u;
else
stack.back() = 0;
break;
case DW_OP::shr:
CHECKN(2);
tmp1.u = stack.back();
stack.pop_back();
tmp2.u = stack.back();
// Same as above
if (tmp1.u < sizeof(tmp2.u)*8)
stack.back() = tmp2.u >> tmp1.u;
else
stack.back() = 0;
break;
case DW_OP::shra:
CHECKN(2);
tmp1.u = stack.back();
stack.pop_back();
tmp2.u = stack.back();
// Shifting a negative number is
// implementation-defined in C++.
tmp3.u = (tmp2.s < 0);
if (tmp3.u)
tmp2.s = -tmp2.s;
if (tmp1.u < sizeof(tmp2.u)*8)
tmp2.u >>= tmp1.u;
else
tmp2.u = 0;
// DWARF implies that over-shifting a negative
// number should result in 0, not ~0.
if (tmp3.u)
tmp2.s = -tmp2.s;
stack.back() = tmp2.u;
break;
case DW_OP::xor_:
UBINOP(^);
break;
#undef UBINOP
// 2.5.1.5 Control flow operations
#define SRELOP(relop) \
do { \
CHECKN(2); \
tmp1.u = stack.back(); \
stack.pop_back(); \
tmp2.u = stack.back(); \
stack.back() = (tmp2.s <= tmp1.s) ? 1 : 0; \
} while (0)
case DW_OP::le:
SRELOP(<=);
break;
case DW_OP::ge:
SRELOP(>=);
break;
case DW_OP::eq:
SRELOP(==);
break;
case DW_OP::lt:
SRELOP(<);
break;
case DW_OP::gt:
SRELOP(>);
break;
case DW_OP::ne:
SRELOP(!=);
break;
case DW_OP::skip:
tmp1.s = cur.fixed<int16_t>();
goto skip_common;
case DW_OP::bra:
tmp1.s = cur.fixed<int16_t>();
CHECK();
tmp2.u = stack.back();
stack.pop_back();
if (tmp2.u == 0)
break;
skip_common:
cur = cursor(subsec, (int64_t)cur.get_section_offset() + tmp1.s);
break;
case DW_OP::call2:
case DW_OP::call4:
case DW_OP::call_ref:
// XXX
throw runtime_error(to_string(op) + " not implemented");
#undef SRELOP
// 2.5.1.6 Special operations
case DW_OP::nop:
break;
// 2.6.1.1.2 Register location descriptions
case DW_OP::reg0...DW_OP::reg31:
result.location_type = expr_result::type::reg;
result.value = (unsigned)op - (unsigned)DW_OP::reg0;
break;
case DW_OP::regx:
result.location_type = expr_result::type::reg;
result.value = cur.uleb128();
break;
// 2.6.1.1.3 Implicit location descriptions
case DW_OP::implicit_value:
result.location_type = expr_result::type::implicit;
result.implicit_len = cur.uleb128();
cur.ensure(result.implicit_len);
result.implicit = cur.pos;
break;
case DW_OP::stack_value:
CHECK();
result.location_type = expr_result::type::literal;
result.value = stack.back();
break;
// 2.6.1.2 Composite location descriptions
case DW_OP::piece:
case DW_OP::bit_piece:
// XXX
throw runtime_error(to_string(op) + " not implemented");
case DW_OP::lo_user...DW_OP::hi_user:
// XXX We could let the context evaluate this,
// but it would need access to the cursor.
throw expr_error("unknown user op " + to_string(op));
default:
throw expr_error("bad operation " + to_string(op));
}
#pragma GCC diagnostic pop
#undef CHECK
#undef CHECKN
}
if (result.location_type == expr_result::type::address) {
// The result type is still and address, so we should
// fetch it from the top of stack.
if (stack.empty())
throw expr_error("final stack is empty; no result given");
result.value = stack.back();
}
return result;
underflow:
throw expr_error("stack underflow evaluating DWARF expression");
}
DWARFPP_END_NAMESPACE

View File

@@ -0,0 +1,297 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#ifndef _DWARFPP_INTERNAL_HH_
#define _DWARFPP_INTERNAL_HH_
#include "dwarf++.hh"
#include "../elf/to_hex.hh"
#include <stdexcept>
#include <type_traits>
#include <unordered_map>
#include <vector>
DWARFPP_BEGIN_NAMESPACE
enum class format
{
unknown,
dwarf32,
dwarf64
};
enum class byte_order
{
lsb,
msb
};
/**
* Return this system's native byte order.
*/
static inline byte_order
native_order()
{
static const union
{
int i;
char c[sizeof(int)];
} test = {1};
return test.c[0] == 1 ? byte_order::lsb : byte_order::msb;
}
/**
* A single DWARF section or a slice of a section. This also tracks
* dynamic information necessary to decode values in this section.
*/
struct section
{
section_type type;
const char *begin, *end;
const format fmt;
const byte_order ord;
unsigned addr_size;
section(section_type type, const void *begin,
section_length length,
byte_order ord, format fmt = format::unknown,
unsigned addr_size = 0)
: type(type), begin((char*)begin), end((char*)begin + length),
fmt(fmt), ord(ord), addr_size(addr_size) { }
section(const section &o) = default;
std::shared_ptr<section> slice(section_offset start, section_length len,
format fmt = format::unknown,
unsigned addr_size = 0)
{
if (fmt == format::unknown)
fmt = this->fmt;
if (addr_size == 0)
addr_size = this->addr_size;
return std::make_shared<section>(
type, begin+start,
std::min(len, (section_length)(end-begin)),
ord, fmt, addr_size);
}
size_t size() const
{
return end - begin;
}
};
/**
* A cursor pointing into a DWARF section. Provides deserialization
* operations and bounds checking.
*/
struct cursor
{
// XXX There's probably a lot of overhead to maintaining the
// shared pointer to the section from this. Perhaps the rule
// should be that all objects keep the dwarf::impl alive
// (directly or indirectly) and that keeps the loader alive,
// so a cursor just needs a regular section*.
std::shared_ptr<section> sec;
const char *pos;
cursor()
: pos(nullptr) { }
cursor(const std::shared_ptr<section> sec, section_offset offset = 0)
: sec(sec), pos(sec->begin + offset) { }
/**
* Read a subsection. The cursor must be at an initial
* length. After, the cursor will point just past the end of
* the subsection. The returned section has the appropriate
* DWARF format and begins at the current location of the
* cursor (so this is usually followed by a
* skip_initial_length).
*/
std::shared_ptr<section> subsection();
std::int64_t sleb128();
section_offset offset();
void string(std::string &out);
const char *cstr(size_t *size_out = nullptr);
void
ensure(section_offset bytes)
{
if ((section_offset)(sec->end - pos) < bytes || pos >= sec->end)
underflow();
}
template<typename T>
T fixed()
{
ensure(sizeof(T));
static_assert(sizeof(T) <= 8, "T too big");
uint64_t val = 0;
const unsigned char *p = (const unsigned char*)pos;
if (sec->ord == byte_order::lsb) {
for (unsigned i = 0; i < sizeof(T); i++)
val |= ((uint64_t)p[i]) << (i * 8);
} else {
for (unsigned i = 0; i < sizeof(T); i++)
val = (val << 8) | (uint64_t)p[i];
}
pos += sizeof(T);
return (T)val;
}
std::uint64_t uleb128()
{
// Appendix C
// XXX Pre-compute all two byte ULEB's
std::uint64_t result = 0;
int shift = 0;
while (pos < sec->end) {
uint8_t byte = *(uint8_t*)(pos++);
result |= (uint64_t)(byte & 0x7f) << shift;
if ((byte & 0x80) == 0)
return result;
shift += 7;
}
underflow();
return 0;
}
taddr address()
{
switch (sec->addr_size) {
case 1:
return fixed<uint8_t>();
case 2:
return fixed<uint16_t>();
case 4:
return fixed<uint32_t>();
case 8:
return fixed<uint64_t>();
default:
throw std::runtime_error("address size " + std::to_string(sec->addr_size) + " not supported");
}
}
void skip_initial_length();
void skip_unit_type();
void skip_form(DW_FORM form);
cursor &operator+=(section_offset offset)
{
pos += offset;
return *this;
}
cursor operator+(section_offset offset) const
{
return cursor(sec, pos + offset);
}
bool operator<(const cursor &o) const
{
return pos < o.pos;
}
bool end() const
{
return pos >= sec->end;
}
bool valid() const
{
return !!pos;
}
section_offset get_section_offset() const
{
return pos - sec->begin;
}
private:
cursor(const std::shared_ptr<section> sec, const char *pos)
: sec(sec), pos(pos) { }
void underflow();
};
/**
* An attribute specification in an abbrev.
*/
struct attribute_spec
{
DW_AT name;
DW_FORM form;
// Computed information
value::type type;
attribute_spec(DW_AT name, DW_FORM form);
};
typedef std::uint64_t abbrev_code;
/**
* An entry in .debug_abbrev.
*/
struct abbrev_entry
{
abbrev_code code;
DW_TAG tag;
bool children;
std::vector<attribute_spec> attributes;
abbrev_entry() : code(0) { }
bool read(cursor *cur);
};
/**
* A section header in .debug_pubnames or .debug_pubtypes.
*/
struct name_unit
{
uhalf version;
section_offset debug_info_offset;
section_length debug_info_length;
// Cursor to the first name_entry in this unit. This cursor's
// section is limited to this unit.
cursor entries;
void read(cursor *cur)
{
// Section 7.19
std::shared_ptr<section> subsec = cur->subsection();
cursor sub(subsec);
sub.skip_initial_length();
version = sub.fixed<uhalf>();
if (version != 2)
throw format_error("unknown name unit version " + std::to_string(version));
debug_info_offset = sub.offset();
debug_info_length = sub.offset();
entries = sub;
}
};
/**
* An entry in a .debug_pubnames or .debug_pubtypes unit.
*/
struct name_entry
{
section_offset offset;
std::string name;
void read(cursor *cur)
{
offset = cur->offset();
cur->string(name);
}
};
DWARFPP_END_NAMESPACE
#endif

View File

@@ -0,0 +1,438 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "internal.hh"
#include <cassert>
using namespace std;
DWARFPP_BEGIN_NAMESPACE
// The expected number of arguments for standard opcodes. This is
// used to check the opcode_lengths header field for compatibility.
static const int opcode_lengths[] = {
0,
// DW_LNS::copy
0, 1, 1, 1, 1,
// DW_LNS::negate_stmt
0, 0, 0, 1, 0,
// DW_LNS::set_epilogue_begin
0, 1
};
struct line_table::impl
{
shared_ptr<section> sec;
// Header information
section_offset program_offset;
ubyte minimum_instruction_length;
ubyte maximum_operations_per_instruction;
bool default_is_stmt;
sbyte line_base;
ubyte line_range;
ubyte opcode_base;
vector<ubyte> standard_opcode_lengths;
vector<string> include_directories;
vector<file> file_names;
// The offset in sec following the last read file name entry.
// File name entries can appear both in the line table header
// and in the line number program itself. Since we can
// iterate over the line number program repeatedly, this keeps
// track of how far we've gotten so we don't add the same
// entry twice.
section_offset last_file_name_end;
// If an iterator has traversed the entire program, then we
// know we've gathered all file names.
bool file_names_complete;
impl() : last_file_name_end(0), file_names_complete(false) {};
bool read_file_entry(cursor *cur, bool in_header);
};
line_table::line_table(const shared_ptr<section> &sec, section_offset offset,
unsigned cu_addr_size, const string &cu_comp_dir,
const string &cu_name)
: m(make_shared<impl>())
{
// XXX DWARF2 and 3 give a weird specification for DW_AT_comp_dir
string comp_dir, abs_path;
if (cu_comp_dir.empty() || cu_comp_dir.back() == '/')
comp_dir = cu_comp_dir;
else
comp_dir = cu_comp_dir + '/';
// Read the line table header (DWARF2 section 6.2.4, DWARF3
// section 6.2.4, DWARF4 section 6.2.3)
cursor cur(sec, offset);
m->sec = cur.subsection();
cur = cursor(m->sec);
cur.skip_initial_length();
m->sec->addr_size = cu_addr_size;
// Basic header information
uhalf version = cur.fixed<uhalf>();
if (version < 2 || version > 4)
throw format_error("unknown line number table version " +
std::to_string(version));
section_length header_length = cur.offset();
m->program_offset = cur.get_section_offset() + header_length;
m->minimum_instruction_length = cur.fixed<ubyte>();
m->maximum_operations_per_instruction = 1;
if (version >= 4)
m->maximum_operations_per_instruction = cur.fixed<ubyte>();
if (m->maximum_operations_per_instruction == 0)
throw format_error("maximum_operations_per_instruction cannot"
" be 0 in line number table");
m->default_is_stmt = cur.fixed<ubyte>();
m->line_base = cur.fixed<sbyte>();
m->line_range = cur.fixed<ubyte>();
if (m->line_range == 0)
throw format_error("line_range cannot be 0 in line number table");
m->opcode_base = cur.fixed<ubyte>();
static_assert(sizeof(opcode_lengths) / sizeof(opcode_lengths[0]) == 13,
"opcode_lengths table has wrong length");
// Opcode length table
m->standard_opcode_lengths.resize(m->opcode_base);
m->standard_opcode_lengths[0] = 0;
for (unsigned i = 1; i < m->opcode_base; i++) {
ubyte length = cur.fixed<ubyte>();
if (length != opcode_lengths[i])
// The spec never says what to do if the
// opcode length of a standard opcode doesn't
// match the header. Do the safe thing.
throw format_error(
"expected " +
std::to_string(opcode_lengths[i]) +
" arguments for line number opcode " +
std::to_string(i) + ", got " +
std::to_string(length));
m->standard_opcode_lengths[i] = length;
}
// Include directories list
string incdir;
// Include directory 0 is implicitly the compilation unit
// current directory
m->include_directories.push_back(comp_dir);
while (true) {
cur.string(incdir);
if (incdir.empty())
break;
if (incdir.back() != '/')
incdir += '/';
if (incdir[0] == '/')
m->include_directories.push_back(move(incdir));
else
m->include_directories.push_back(comp_dir + incdir);
}
// File name list
string file_name;
// File name 0 is implicitly the compilation unit file name.
// cu_name can be relative to comp_dir or absolute.
if (!cu_name.empty() && cu_name[0] == '/')
m->file_names.emplace_back(cu_name);
else
m->file_names.emplace_back(comp_dir + cu_name);
while (m->read_file_entry(&cur, true));
}
line_table::iterator
line_table::begin() const
{
if (!valid())
return iterator(nullptr, 0);
return iterator(this, m->program_offset);
}
line_table::iterator
line_table::end() const
{
if (!valid())
return iterator(nullptr, 0);
return iterator(this, m->sec->size());
}
line_table::iterator
line_table::find_address(taddr addr) const
{
iterator prev = begin(), e = end();
if (prev == e)
return prev;
iterator it = prev;
for (++it; it != e; prev = it++) {
if (prev->address <= addr && it->address > addr &&
!prev->end_sequence)
return prev;
}
prev = e;
return prev;
}
const line_table::file *
line_table::get_file(unsigned index) const
{
if (index >= m->file_names.size()) {
// It could be declared in the line table program.
// This is unlikely, so we don't have to be
// super-efficient about this. Just force our way
// through the whole line table program.
if (!m->file_names_complete) {
for (auto &ent : *this)
(void)ent;
}
if (index >= m->file_names.size())
throw out_of_range
("file name index " + std::to_string(index) +
" exceeds file table size of " +
std::to_string(m->file_names.size()));
}
return &m->file_names[index];
}
bool
line_table::impl::read_file_entry(cursor *cur, bool in_header)
{
assert(cur->sec == sec);
string file_name;
cur->string(file_name);
if (in_header && file_name.empty())
return false;
uint64_t dir_index = cur->uleb128();
uint64_t mtime = cur->uleb128();
uint64_t length = cur->uleb128();
// Have we already processed this file entry?
if (cur->get_section_offset() <= last_file_name_end)
return true;
last_file_name_end = cur->get_section_offset();
if (file_name[0] == '/')
file_names.emplace_back(move(file_name), mtime, length);
else if (dir_index < include_directories.size())
file_names.emplace_back(
include_directories[dir_index] + file_name,
mtime, length);
else
throw format_error("file name directory index out of range: " +
std::to_string(dir_index));
return true;
}
line_table::file::file(string path, uint64_t mtime, uint64_t length)
: path(path), mtime(mtime), length(length)
{
}
void
line_table::entry::reset(bool is_stmt)
{
address = op_index = 0;
file = nullptr;
file_index = line = 1;
column = 0;
this->is_stmt = is_stmt;
basic_block = end_sequence = prologue_end = epilogue_begin = false;
isa = discriminator = 0;
}
string
line_table::entry::get_description() const
{
string res = file->path;
if (line) {
res.append(":").append(std::to_string(line));
if (column)
res.append(":").append(std::to_string(column));
}
return res;
}
line_table::iterator::iterator(const line_table *table, section_offset pos)
: table(table), pos(pos)
{
if (table) {
regs.reset(table->m->default_is_stmt);
++(*this);
}
}
line_table::iterator &
line_table::iterator::operator++()
{
cursor cur(table->m->sec, pos);
// Execute opcodes until we reach the end of the stream or an
// opcode emits a line table row
bool stepped = false, output = false;
while (!cur.end() && !output) {
output = step(&cur);
stepped = true;
}
if (stepped && !output)
throw format_error("unexpected end of line table");
if (stepped && cur.end()) {
// Record that all file names must be known now
table->m->file_names_complete = true;
}
if (output) {
// Resolve file name of entry
if (entry.file_index < table->m->file_names.size())
entry.file = &table->m->file_names[entry.file_index];
else
throw format_error("bad file index " +
std::to_string(entry.file_index) +
" in line table");
}
pos = cur.get_section_offset();
return *this;
}
bool
line_table::iterator::step(cursor *cur)
{
struct line_table::impl *m = table->m.get();
// Read the opcode (DWARF4 section 6.2.3)
ubyte opcode = cur->fixed<ubyte>();
if (opcode >= m->opcode_base) {
// Special opcode (DWARF4 section 6.2.5.1)
ubyte adjusted_opcode = opcode - m->opcode_base;
unsigned op_advance = adjusted_opcode / m->line_range;
signed line_inc = m->line_base + (signed)adjusted_opcode % m->line_range;
regs.line += line_inc;
regs.address += m->minimum_instruction_length *
((regs.op_index + op_advance)
/ m->maximum_operations_per_instruction);
regs.op_index = (regs.op_index + op_advance)
% m->maximum_operations_per_instruction;
entry = regs;
regs.basic_block = regs.prologue_end =
regs.epilogue_begin = false;
regs.discriminator = 0;
return true;
} else if (opcode != 0) {
// Standard opcode (DWARF4 sections 6.2.3 and 6.2.5.2)
//
// According to the standard, any opcode between the
// highest defined opcode for a given DWARF version
// and opcode_base should be treated as a
// vendor-specific opcode. However, the de facto
// standard seems to be to process these as standard
// opcodes even if they're from a later version of the
// standard than the line table header claims.
uint64_t uarg;
#pragma GCC diagnostic push
#pragma GCC diagnostic warning "-Wswitch-enum"
switch ((DW_LNS)opcode) {
case DW_LNS::copy:
entry = regs;
regs.basic_block = regs.prologue_end =
regs.epilogue_begin = false;
regs.discriminator = 0;
break;
case DW_LNS::advance_pc:
// Opcode advance (as for special opcodes)
uarg = cur->uleb128();
advance_pc:
regs.address += m->minimum_instruction_length *
((regs.op_index + uarg)
/ m->maximum_operations_per_instruction);
regs.op_index = (regs.op_index + uarg)
% m->maximum_operations_per_instruction;
break;
case DW_LNS::advance_line:
regs.line = (signed)regs.line + cur->sleb128();
break;
case DW_LNS::set_file:
regs.file_index = cur->uleb128();
break;
case DW_LNS::set_column:
regs.column = cur->uleb128();
break;
case DW_LNS::negate_stmt:
regs.is_stmt = !regs.is_stmt;
break;
case DW_LNS::set_basic_block:
regs.basic_block = true;
break;
case DW_LNS::const_add_pc:
uarg = (255 - m->opcode_base) / m->line_range;
goto advance_pc;
case DW_LNS::fixed_advance_pc:
regs.address += cur->fixed<uhalf>();
regs.op_index = 0;
break;
case DW_LNS::set_prologue_end:
regs.prologue_end = true;
break;
case DW_LNS::set_epilogue_begin:
regs.epilogue_begin = true;
break;
case DW_LNS::set_isa:
regs.isa = cur->uleb128();
break;
default:
// XXX Vendor extensions
throw format_error("unknown line number opcode " +
to_string((DW_LNS)opcode));
}
return ((DW_LNS)opcode == DW_LNS::copy);
} else { // opcode == 0
// Extended opcode (DWARF4 sections 6.2.3 and 6.2.5.3)
assert(opcode == 0);
uint64_t length = cur->uleb128();
section_offset end = cur->get_section_offset() + length;
opcode = cur->fixed<ubyte>();
switch ((DW_LNE)opcode) {
case DW_LNE::end_sequence:
regs.end_sequence = true;
entry = regs;
regs.reset(m->default_is_stmt);
break;
case DW_LNE::set_address:
regs.address = cur->address();
regs.op_index = 0;
break;
case DW_LNE::define_file:
m->read_file_entry(cur, false);
break;
case DW_LNE::set_discriminator:
// XXX Only DWARF4
regs.discriminator = cur->uleb128();
break;
case DW_LNE::lo_user...DW_LNE::hi_user:
// XXX Vendor extensions
throw runtime_error("vendor line number opcode " +
to_string((DW_LNE)opcode) +
" not implemented");
default:
// XXX Prior to DWARF4, any opcode number
// could be a vendor extension
throw format_error("unknown line number opcode " +
to_string((DW_LNE)opcode));
}
#pragma GCC diagnostic pop
if (cur->get_section_offset() > end)
throw format_error("extended line number opcode exceeded its size");
cur += end - cur->get_section_offset();
return ((DW_LNE)opcode == DW_LNE::end_sequence);
}
}
DWARFPP_END_NAMESPACE

View File

@@ -0,0 +1,103 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "internal.hh"
using namespace std;
DWARFPP_BEGIN_NAMESPACE
rangelist::rangelist(const std::shared_ptr<section> &sec, section_offset off,
unsigned cu_addr_size, taddr cu_low_pc)
: sec(sec->slice(off, ~0, format::unknown, cu_addr_size)),
base_addr(cu_low_pc)
{
}
rangelist::rangelist(const initializer_list<pair<taddr, taddr> > &ranges)
{
synthetic.reserve(ranges.size() * 2 + 2);
for (auto &range : ranges) {
synthetic.push_back(range.first);
synthetic.push_back(range.second);
}
synthetic.push_back(0);
synthetic.push_back(0);
sec = make_shared<section>(
section_type::ranges, (const char*)synthetic.data(),
synthetic.size() * sizeof(taddr),
native_order(), format::unknown, sizeof(taddr));
base_addr = 0;
}
rangelist::iterator
rangelist::begin() const
{
if (sec)
return iterator(sec, base_addr);
return end();
}
rangelist::iterator
rangelist::end() const
{
return iterator();
}
bool
rangelist::contains(taddr addr) const
{
for (auto ent : *this)
if (ent.contains(addr))
return true;
return false;
}
rangelist::iterator::iterator(const std::shared_ptr<section> &sec, taddr base_addr)
: sec(sec), base_addr(base_addr), pos(0)
{
// Read in the first entry
++(*this);
}
rangelist::iterator &
rangelist::iterator::operator++()
{
// DWARF4 section 2.17.3
taddr largest_offset = ~(taddr)0;
if (sec->addr_size < sizeof(taddr))
largest_offset += 1 << (8 * sec->addr_size);
// Read in entries until we reach a regular entry of an
// end-of-list. Note that pos points to the beginning of the
// entry *following* the current entry, so that's where we
// start.
cursor cur(sec, pos);
while (true) {
entry.low = cur.address();
entry.high = cur.address();
if (entry.low == 0 && entry.high == 0) {
// End of list
sec.reset();
pos = 0;
break;
} else if (entry.low == largest_offset) {
// Base address change
base_addr = entry.high;
} else {
// Regular entry. Adjust by base address.
entry.low += base_addr;
entry.high += base_addr;
pos = cur.get_section_offset();
break;
}
}
return *this;
}
DWARFPP_END_NAMESPACE

View File

@@ -0,0 +1,197 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#ifndef _DWARFPP_SMALL_VECTOR_HH_
#define _DWARFPP_SMALL_VECTOR_HH_
DWARFPP_BEGIN_NAMESPACE
/**
* A vector-like class that only heap allocates above a specified
* size.
*/
template<class T, unsigned Min>
class small_vector
{
public:
typedef T value_type;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef size_t size_type;
small_vector()
: base((T*)buf), end(base), cap((T*)&buf[sizeof(T[Min])])
{
}
small_vector(const small_vector<T, Min> &o)
: base((T*)buf), end(base), cap((T*)&buf[sizeof(T[Min])])
{
*this = o;
}
small_vector(small_vector<T, Min> &&o)
: base((T*)buf), end(base), cap((T*)&buf[sizeof(T[Min])])
{
if ((char*)o.base == o.buf) {
// Elements are inline; have to copy them
base = (T*)buf;
end = base;
cap = (T*)&buf[sizeof(T[Min])];
*this = o;
o.clear();
} else {
// Elements are external; swap pointers
base = o.base;
end = o.end;
cap = o.cap;
o.base = (T*)o.buf;
o.end = o.base;
o.cap = (T*)&o.buf[sizeof(T[Min])];
}
}
~small_vector()
{
clear();
if ((char*)base != buf)
delete[] (char*)base;
}
small_vector &operator=(const small_vector<T, Min> &o)
{
size_type osize = o.size();
clear();
reserve(osize);
for (size_type i = 0; i < osize; i++)
new (&base[i]) T(o[i]);
end = base + osize;
return *this;
}
size_type size() const
{
return end - base;
}
bool empty() const
{
return base == end;
}
void reserve(size_type n)
{
if (n <= (size_type)(cap - base))
return;
size_type target = cap - base;
if (target == 0)
target = 1;
while (target < n)
target <<= 1;
char *newbuf = new char[sizeof(T[target])];
T *src = base, *dest = (T*)newbuf;
for (; src < end; src++, dest++) {
new(dest) T(*src);
dest->~T();
}
if ((char*)base != buf)
delete[] (char*)base;
base = (T*)newbuf;
end = dest;
cap = base + target;
}
reference operator[](size_type n)
{
return base[n];
}
const_reference operator[](size_type n) const
{
return base[n];
}
reference at(size_type n)
{
return base[n];
}
const_reference at(size_type n) const
{
return base[n];
}
/**
* "Reverse at". revat(0) is equivalent to back(). revat(1)
* is the element before back. Etc.
*/
reference revat(size_type n)
{
return *(end - 1 - n);
}
const_reference revat(size_type n) const
{
return *(end - 1 - n);
}
reference front()
{
return base[0];
}
const_reference front() const
{
return base[0];
}
reference back()
{
return *(end-1);
}
const_reference back() const
{
return *(end-1);
}
void push_back(const T& x)
{
reserve(size() + 1);
new (end) T(x);
end++;
}
void push_back(T&& x)
{
reserve(size() + 1);
new (end) T(std::move(x));
end++;
}
void pop_back()
{
end--;
end->~T();
}
void clear()
{
for (T* p = base; p < end; ++p)
p->~T();
end = base;
}
private:
char buf[sizeof(T[Min])];
T *base, *end, *cap;
};
DWARFPP_END_NAMESPACE
#endif

View File

@@ -0,0 +1,336 @@
// Copyright (c) 2013 Austin T. Clements. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
#include "internal.hh"
#include <cstring>
using namespace std;
DWARFPP_BEGIN_NAMESPACE
value::value(const unit *cu,
DW_AT name, DW_FORM form, type typ, section_offset offset)
: cu(cu), form(form), typ(typ), offset(offset) {
if (form == DW_FORM::indirect)
resolve_indirect(name);
}
section_offset
value::get_section_offset() const
{
return cu->get_section_offset() + offset;
}
taddr
value::as_address() const
{
if (form != DW_FORM::addr)
throw value_type_mismatch("cannot read " + to_string(typ) + " as address");
cursor cur(cu->data(), offset);
return cur.address();
}
const void *
value::as_block(size_t *size_out) const
{
// XXX Blocks can contain all sorts of things, including
// references, which couldn't be resolved by callers in the
// current minimal API.
cursor cur(cu->data(), offset);
switch (form) {
case DW_FORM::block1:
*size_out = cur.fixed<uint8_t>();
break;
case DW_FORM::block2:
*size_out = cur.fixed<uint16_t>();
break;
case DW_FORM::block4:
*size_out = cur.fixed<uint32_t>();
break;
case DW_FORM::block:
case DW_FORM::exprloc:
*size_out = cur.uleb128();
break;
default:
throw value_type_mismatch("cannot read " + to_string(typ) + " as block");
}
cur.ensure(*size_out);
return cur.pos;
}
uint64_t
value::as_uconstant() const
{
cursor cur(cu->data(), offset);
switch (form) {
case DW_FORM::data1:
return cur.fixed<uint8_t>();
case DW_FORM::data2:
return cur.fixed<uint16_t>();
case DW_FORM::data4:
return cur.fixed<uint32_t>();
case DW_FORM::data8:
return cur.fixed<uint64_t>();
case DW_FORM::udata:
return cur.uleb128();
default:
throw value_type_mismatch("cannot read " + to_string(typ) + " as uconstant");
}
}
int64_t
value::as_sconstant() const
{
cursor cur(cu->data(), offset);
switch (form) {
case DW_FORM::data1:
return cur.fixed<int8_t>();
case DW_FORM::data2:
return cur.fixed<int16_t>();
case DW_FORM::data4:
return cur.fixed<int32_t>();
case DW_FORM::data8:
return cur.fixed<int64_t>();
case DW_FORM::sdata:
return cur.sleb128();
default:
throw value_type_mismatch("cannot read " + to_string(typ) + " as sconstant");
}
}
expr
value::as_exprloc() const
{
cursor cur(cu->data(), offset);
size_t size;
// Prior to DWARF 4, exprlocs were encoded as blocks.
switch (form) {
case DW_FORM::exprloc:
case DW_FORM::block:
size = cur.uleb128();
break;
case DW_FORM::block1:
size = cur.fixed<uint8_t>();
break;
case DW_FORM::block2:
size = cur.fixed<uint16_t>();
break;
case DW_FORM::block4:
size = cur.fixed<uint32_t>();
break;
default:
throw value_type_mismatch("cannot read " + to_string(typ) + " as exprloc");
}
return expr(cu, cur.get_section_offset(), size);
}
bool
value::as_flag() const
{
switch (form) {
case DW_FORM::flag: {
cursor cur(cu->data(), offset);
return cur.fixed<ubyte>() != 0;
}
case DW_FORM::flag_present:
return true;
default:
throw value_type_mismatch("cannot read " + to_string(typ) + " as flag");
}
}
rangelist
value::as_rangelist() const
{
section_offset off = as_sec_offset();
// The compilation unit may not have a base address. In this
// case, the first entry in the range list must be a base
// address entry, but we'll just assume 0 for the initial base
// address.
die cudie = cu->root();
taddr cu_low_pc = cudie.has(DW_AT::low_pc) ? at_low_pc(cudie) : 0;
auto sec = cu->get_dwarf().get_section(section_type::ranges);
auto cusec = cu->data();
return rangelist(sec, off, cusec->addr_size, cu_low_pc);
}
die
value::as_reference() const
{
section_offset off;
// XXX Would be nice if we could avoid this. The cursor is
// all overhead here.
cursor cur(cu->data(), offset);
switch (form) {
case DW_FORM::ref1:
off = cur.fixed<ubyte>();
break;
case DW_FORM::ref2:
off = cur.fixed<uhalf>();
break;
case DW_FORM::ref4:
off = cur.fixed<uword>();
break;
case DW_FORM::ref8:
off = cur.fixed<uint64_t>();
break;
case DW_FORM::ref_udata:
off = cur.uleb128();
break;
case DW_FORM::ref_addr: {
off = cur.offset();
// These seem to be extremely rare in practice (I
// haven't been able to get gcc to produce a
// ref_addr), so it's not worth caching this lookup.
const compilation_unit *base_cu = nullptr;
for (auto &file_cu : cu->get_dwarf().compilation_units()) {
if (file_cu.get_section_offset() > off)
break;
base_cu = &file_cu;
}
die d(base_cu);
d.read(off - base_cu->get_section_offset());
return d;
}
case DW_FORM::ref_sig8: {
uint64_t sig = cur.fixed<uint64_t>();
try {
return cu->get_dwarf().get_type_unit(sig).type();
} catch (std::out_of_range &e) {
throw format_error("unknown type signature 0x" + to_hex(sig));
}
}
default:
throw value_type_mismatch("cannot read " + to_string(typ) + " as reference");
}
die d(cu);
d.read(off);
return d;
}
void
value::as_string(string &buf) const
{
size_t size;
const char *p = as_cstr(&size);
buf.resize(size);
memmove(&buf.front(), p, size);
}
string
value::as_string() const
{
size_t size;
const char *s = as_cstr(&size);
return string(s, size);
}
const char *
value::as_cstr(size_t *size_out) const
{
cursor cur(cu->data(), offset);
switch (form) {
case DW_FORM::string:
return cur.cstr(size_out);
case DW_FORM::strp: {
section_offset off = cur.offset();
cursor scur(cu->get_dwarf().get_section(section_type::str), off);
return scur.cstr(size_out);
}
default:
throw value_type_mismatch("cannot read " + to_string(typ) + " as string");
}
}
section_offset
value::as_sec_offset() const
{
// Prior to DWARF 4, sec_offsets were encoded as data4 or
// data8.
cursor cur(cu->data(), offset);
switch (form) {
case DW_FORM::data4:
return cur.fixed<uint32_t>();
case DW_FORM::data8:
return cur.fixed<uint64_t>();
case DW_FORM::sec_offset:
return cur.offset();
default:
throw value_type_mismatch("cannot read " + to_string(typ) + " as sec_offset");
}
}
void
value::resolve_indirect(DW_AT name)
{
if (form != DW_FORM::indirect)
return;
cursor c(cu->data(), offset);
DW_FORM form;
do {
form = (DW_FORM)c.uleb128();
} while (form == DW_FORM::indirect);
typ = attribute_spec(name, form).type;
offset = c.get_section_offset();
}
string
to_string(const value &v)
{
switch (v.get_type()) {
case value::type::invalid:
return "<invalid value type>";
case value::type::address:
return "0x" + to_hex(v.as_address());
case value::type::block: {
size_t size;
const char *b = (const char*)v.as_block(&size);
string res = ::to_string(size) + " byte block:";
for (size_t pos = 0; pos < size; ++pos) {
res += ' ';
res += to_hex(b[pos]);
}
return res;
}
case value::type::constant:
return "0x" + to_hex(v.as_uconstant());
case value::type::uconstant:
return ::to_string(v.as_uconstant());
case value::type::sconstant:
return ::to_string(v.as_sconstant());
case value::type::exprloc:
// XXX
return "<exprloc>";
case value::type::flag:
return v.as_flag() ? "true" : "false";
case value::type::line:
return "<line 0x" + to_hex(v.as_sec_offset()) + ">";
case value::type::loclist:
return "<loclist 0x" + to_hex(v.as_sec_offset()) + ">";
case value::type::mac:
return "<mac 0x" + to_hex(v.as_sec_offset()) + ">";
case value::type::rangelist:
return "<rangelist 0x" + to_hex(v.as_sec_offset()) + ">";
case value::type::reference: {
die d = v.as_reference();
auto tu = dynamic_cast<const type_unit*>(&d.get_unit());
if (tu)
return "<.debug_types+0x" + to_hex(d.get_section_offset()) + ">";
return "<0x" + to_hex(d.get_section_offset()) + ">";
}
case value::type::string:
return v.as_string();
}
return "<unexpected value type " + to_string(v.get_type()) + ">";
}
DWARFPP_END_NAMESPACE