Home | History | Annotate | Download | only in elff
      1 /* Copyright (C) 2007-2010 The Android Open Source Project
      2 **
      3 ** This software is licensed under the terms of the GNU General Public
      4 ** License version 2, as published by the Free Software Foundation, and
      5 ** may be copied, distributed, and modified under those terms.
      6 **
      7 ** This program is distributed in the hope that it will be useful,
      8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
      9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     10 ** GNU General Public License for more details.
     11 */
     12 
     13 /*
     14  * Contains declarations of types, constants and structures
     15  * describing DWARF format.
     16  */
     17 
     18 #ifndef ELFF_DWARF_DEFS_H_
     19 #define ELFF_DWARF_DEFS_H_
     20 
     21 #include "dwarf.h"
     22 #include "elf_defs.h"
     23 
     24 /* DWARF structures are packed to 1 byte. */
     25 #define ELFF_PACKED __attribute__ ((packed))
     26 
     27 /*
     28  * Helper types for misc. DWARF variables.
     29  */
     30 
     31 /* Type for DWARF abbreviation number. */
     32 typedef uint32_t  Dwarf_AbbrNum;
     33 
     34 /* Type for DWARF tag ID. */
     35 typedef uint16_t  Dwarf_Tag;
     36 
     37 /* Type for DWARF attribute ID. */
     38 typedef uint16_t  Dwarf_At;
     39 
     40 /* Type for DWARF form ID. */
     41 typedef uint16_t  Dwarf_Form;
     42 
     43 /* Type for offset in 32-bit DWARF. */
     44 typedef uint32_t  Dwarf32_Off;
     45 
     46 /* Type for offset in 64-bit DWARF. */
     47 typedef uint64_t  Dwarf64_Off;
     48 
     49 /* Enumerates types of values, obtained during DWARF attribute decoding. */
     50 typedef enum DwarfValueType {
     51   /* Undefined */
     52   DWARF_VALUE_UNKNOWN = 1,
     53 
     54   /* uint8_t */
     55   DWARF_VALUE_U8,
     56 
     57   /* int8_t */
     58   DWARF_VALUE_S8,
     59 
     60   /* uint16_t */
     61   DWARF_VALUE_U16,
     62 
     63   /* int16_t */
     64   DWARF_VALUE_S16,
     65 
     66   /* uint32_t */
     67   DWARF_VALUE_U32,
     68 
     69   /* int32_t */
     70   DWARF_VALUE_S32,
     71 
     72   /* uint64_t */
     73   DWARF_VALUE_U64,
     74 
     75   /* int64_t */
     76   DWARF_VALUE_S64,
     77 
     78   /* const char* */
     79   DWARF_VALUE_STR,
     80 
     81   /* 32-bit address */
     82   DWARF_VALUE_PTR32,
     83 
     84   /* 64-bit address */
     85   DWARF_VALUE_PTR64,
     86 
     87   /* Dwarf_Block */
     88   DWARF_VALUE_BLOCK,
     89 } DwarfValueType;
     90 
     91 /* Describes block of data, stored directly in the mapped .debug_info
     92  * section. This type is used to represent an attribute encoded with
     93  * DW_FORM_block# form.
     94  */
     95 typedef struct Dwarf_Block {
     96   /* Pointer to the block data inside mapped .debug_info section. */
     97   const void*   block_ptr;
     98 
     99   /* Byte size of the block data. */
    100   Elf_Word      block_size;
    101 } Dwarf_Block;
    102 
    103 /* Describes a value, obtained from the mapped .debug_info section
    104  * during DWARF attribute decoding.
    105  */
    106 typedef struct Dwarf_Value {
    107   /* Unites all possible data types for the value.
    108    * See DwarfValueType for the list of types.
    109    */
    110   union {
    111     Elf_Byte      u8;
    112     Elf_Sbyte     s8;
    113     Elf_Half      u16;
    114     Elf_Shalf     s16;
    115     Elf_Word      u32;
    116     Elf_Sword     s32;
    117     Elf_Xword     u64;
    118     Elf_Sxword    s64;
    119     Elf_Word      ptr32;
    120     Elf_Xword     ptr64;
    121     const char*   str;
    122     Dwarf_Block   block;
    123   };
    124 
    125   /* Value type (defines which variable in the union abowe
    126    * contains the value).
    127    */
    128   DwarfValueType  type;
    129 
    130   /* Number of bytes that encode this value in .debug_info section
    131    * of ELF file.
    132    */
    133   Elf_Word        encoded_size;
    134 } Dwarf_Value;
    135 
    136 /* DWARF's LEB128 data type. LEB128 is defined as:
    137  * Variable Length Data. "Little Endian Base 128" (LEB128) numbers. LEB128 is
    138  * a scheme for encoding integers densely that exploits the assumption that
    139  * most integers are small in magnitude. (This encoding is equally suitable
    140  * whether the target machine architecture represents data in big-endian or
    141  * littleendian order. It is "little endian" only in the sense that it avoids
    142  * using space to represent the "big" end of an unsigned integer, when the big
    143  * end is all zeroes or sign extension bits).
    144  *
    145  * Unsigned LEB128 numbers are encoded as follows: start at the low order end
    146  * of an unsigned integer and chop it into 7-bit chunks. Place each chunk into
    147  * the low order 7 bits of a byte. Typically, several of the high order bytes
    148  * will be zero; discard them. Emit the remaining bytes in a stream, starting
    149  * with the low order byte; set the high order bit on each byte except the last
    150  * emitted byte. The high bit of zero on the last byte indicates to the decoder
    151  * that it has encountered the last byte. The integer zero is a special case,
    152  * consisting of a single zero byte.
    153  *
    154  * The encoding for signed LEB128 numbers is similar, except that the criterion
    155  * for discarding high order bytes is not whether they are zero, but whether
    156  * they consist entirely of sign extension bits. Consider the 32-bit integer
    157  * -2. The three high level bytes of the number are sign extension, thus LEB128
    158  * would represent it as a single byte containing the low order 7 bits, with
    159  * the high order bit cleared to indicate the end of the byte stream. Note that
    160  * there is nothing within the LEB128 representation that indicates whether an
    161  * encoded number is signed or unsigned. The decoder must know what type of
    162  * number to expect.
    163  *
    164  * NOTE: It's assumed that LEB128 will not contain encodings for integers,
    165  * larger than 64 bit.
    166 */
    167 typedef struct ELFF_PACKED Dwarf_Leb128 {
    168   /* Beginning of the LEB128 block. */
    169   Elf_Byte  val;
    170 
    171   /* Pulls actual value, encoded with this LEB128 block.
    172    * Param:
    173    *  value - Upon return will contain value, encoded with this LEB128 block.
    174    *  sign - If true, the caller expects the LEB128 to contain a signed
    175    *    integer, otherwise, caller expects an unsigned integer value to be
    176    *    encoded with this LEB128 block.
    177    */
    178   void get_common(Dwarf_Value* value, bool sign) const {
    179     value->u64 = 0;
    180     /* Integer zero is a special case. */
    181     if (val == 0) {
    182       value->type = sign ? DWARF_VALUE_S32 : DWARF_VALUE_U32;
    183       value->encoded_size = 1;
    184       return;
    185     }
    186 
    187     /* We've got to reconstruct the integer. */
    188     value->type = DWARF_VALUE_UNKNOWN;
    189     value->encoded_size = 0;
    190 
    191     /* Byte by byte loop though the LEB128, reconstructing the integer from
    192      * 7-bits chunks. Byte with 8-th bit set to zero indicates the end
    193      * of the LEB128 block. For signed integers, 7-th bit of the last LEB128
    194      * byte controls the sign. If 7-th bit of the last LEB128 byte is set,
    195      * the integer is negative. If 7-th bit of the last LEB128 byte is not
    196      * set, the integer is positive.
    197      */
    198     const Elf_Byte* cur = &val;
    199     Elf_Word shift = 0;
    200     while ((*cur & 0x80) != 0) {
    201       value->u64 |= (static_cast<Elf_Xword>(*cur) & 0x7F) << shift;
    202       shift += 7;
    203       value->encoded_size++;
    204       cur++;
    205     }
    206     value->u64 |= (static_cast<Elf_Xword>(*cur) & 0x7F) << shift;
    207     value->encoded_size++;
    208 
    209     /* LEB128 format doesn't carry any info of the sizeof of the integer it
    210      * represents. We well guess it, judging by the highest bit set in the
    211      * reconstucted integer.
    212      */
    213     if ((value->u64 & 0xFFFFFFFF00000000LL) == 0) {
    214       /* 32-bit integer. */
    215       if (sign) {
    216         value->type = DWARF_VALUE_S32;
    217         if (((*cur) & 0x40) != 0) {
    218           // Value is negative.
    219           value->u64 |= - (1 << (shift + 7));
    220         } else if ((value->u32 & 0x80000000) != 0) {
    221           // Make sure we don't report negative value in this case.
    222           value->type = DWARF_VALUE_S64;
    223         }
    224       } else {
    225         value->type = DWARF_VALUE_U32;
    226       }
    227     } else {
    228       /* 64-bit integer. */
    229       if (sign) {
    230         value->type = DWARF_VALUE_S64;
    231         if (((*cur) & 0x40) != 0) {
    232           // Value is negative.
    233           value->u64 |= - (1 << (shift + 7));
    234         }
    235       } else {
    236         value->type = DWARF_VALUE_U64;
    237       }
    238     }
    239   }
    240 
    241   /* Pulls actual unsigned value, encoded with this LEB128 block.
    242    * See get_common() for more info.
    243    * Param:
    244    *  value - Upon return will contain unsigned value, encoded with
    245    *  this LEB128 block.
    246    */
    247   void get_unsigned(Dwarf_Value* value) const {
    248     get_common(value, false);
    249   }
    250 
    251   /* Pulls actual signed value, encoded with this LEB128 block.
    252    * See get_common() for more info.
    253    * Param:
    254    *  value - Upon return will contain signed value, encoded with
    255    *  this LEB128 block.
    256    */
    257   void get_signed(Dwarf_Value* value) const {
    258     get_common(value, true);
    259   }
    260 
    261   /* Pulls LEB128 value, advancing past this LEB128 block.
    262    * See get_common() for more info.
    263    * Return:
    264    *  Pointer to the byte past this LEB128 block.
    265    */
    266   const void* process(Dwarf_Value* value, bool sign) const {
    267     get_common(value, sign);
    268     return INC_CPTR(&val, value->encoded_size);
    269   }
    270 
    271   /* Pulls LEB128 unsigned value, advancing past this LEB128 block.
    272    * See process() for more info.
    273    */
    274   const void* process_unsigned(Dwarf_Value* value) const {
    275     return process(value, false);
    276   }
    277 
    278   /* Pulls LEB128 signed value, advancing past this LEB128 block.
    279    * See process() for more info.
    280    */
    281   const void* process_signed(Dwarf_Value* value) const {
    282     return process(value, true);
    283   }
    284 } Dwarf_Leb128;
    285 
    286 /* DIE attribute descriptor in the .debug_abbrev section.
    287  * Attribute descriptor contains two LEB128 values. First one provides
    288  * attribute ID (one of DW_AT_XXX values), and the second one provides
    289  * format (one of DW_FORMAT_XXX values), in which attribute value is
    290  * encoded in the .debug_info section of the ELF file.
    291  */
    292 typedef struct ELFF_PACKED Dwarf_Abbr_AT {
    293   /* Attribute ID (DW_AT_XXX).
    294    * Attribute format (DW_FORMAT_XXX) follows immediately.
    295    */
    296   Dwarf_Leb128  at;
    297 
    298   /* Checks if this is a separator descriptor.
    299    * Zero is an invalid attribute ID, indicating the end of attribute
    300    * list for the current DIE.
    301    */
    302   bool is_separator() const {
    303     return at.val == 0;
    304   }
    305 
    306   /* Pulls attribute data, advancing past this descriptor.
    307    * Param:
    308    *  at_value - Upon return contains attribute value of this descriptor.
    309    *  form - Upon return contains form value of this descriptor.
    310    * Return:
    311    *  Pointer to the byte past this descriptor block (usually, next
    312    *  attribute decriptor).
    313    */
    314   const Dwarf_Abbr_AT* process(Dwarf_At* at_value, Dwarf_Form* form) const {
    315     if (is_separator()) {
    316       /* Size of separator descriptor is always 2 bytes. */
    317       *at_value = 0;
    318       *form = 0;
    319       return INC_CPTR_T(Dwarf_Abbr_AT, &at.val, 2);
    320     }
    321 
    322     Dwarf_Value val;
    323 
    324     /* Process attribute ID. */
    325     const Dwarf_Leb128* next =
    326         reinterpret_cast<const Dwarf_Leb128*>(at.process_unsigned(&val));
    327     *at_value = val.u16;
    328 
    329     /* Follow with processing the form. */
    330     next = reinterpret_cast<const Dwarf_Leb128*>(next->process_unsigned(&val));
    331     *form = val.u16;
    332     return reinterpret_cast<const Dwarf_Abbr_AT*>(next);
    333   }
    334 } Dwarf_Abbr_AT;
    335 
    336 /* DIE abbreviation descriptor in the .debug_abbrev section.
    337  * DIE abbreviation descriptor contains three parameters. The first one is a
    338  * LEB128 value, that encodes 1 - based abbreviation descriptor number.
    339  * Abbreviation descriptor numbers seems to be always in sequential order, and
    340  * are counted on per-compilation unit basis. I.e. abbreviation number for the
    341  * first DIE abbreviation descriptor of each compilation unit is always 1.
    342  *
    343  * Besides abbreviation number, DIE abbreviation descriptor contains two more
    344  * values. The first one (after abbr_num) is a LEB128 value containing DIE's
    345  * tag value, and the second one is one byte flag specifying whether or not
    346  * the DIE contains any cildren.
    347  *
    348  * This descriptor is immediately followed by a list of attribute descriptors
    349  * (see Dwarf_Abbr_AT) for the DIE represented by this abbreviation descriptor.
    350  */
    351 typedef struct ELFF_PACKED Dwarf_Abbr_DIE {
    352   /* 1 - based abbreviation number for the DIE. */
    353   Dwarf_Leb128  abbr_num;
    354 
    355   /* Gets abbreviation number for this descriptor. */
    356   Dwarf_AbbrNum get_abbr_num() const {
    357     Dwarf_Value val;
    358     abbr_num.get_unsigned(&val);
    359     return val.u16;
    360   }
    361 
    362   /* Gets DIE tag for this descriptor. */
    363   Dwarf_Tag get_tag() const {
    364     Dwarf_Tag tag;
    365     process(NULL, &tag);
    366     return tag;
    367   }
    368 
    369   /* Pulls DIE abbreviation descriptor data, advancing past this descriptor.
    370    * Param:
    371    *  abbr_index - Upon return contains abbreviation number for this
    372    *    descriptor. This parameter can be NULL, if the caller is not interested
    373    *    in this value.
    374    *  tag - Upon return contains tag of the DIE for this descriptor. This
    375    *    parameter can be NULL, if the caller is not interested in this value.
    376    *  form - Upon return contains form of the DIE for this descriptor.
    377    * Return:
    378    *  Pointer to the list of attribute descriptors for the DIE.
    379    */
    380   const Dwarf_Abbr_AT* process(Dwarf_AbbrNum* abbr_index,
    381                                Dwarf_Tag* tag) const {
    382     Dwarf_Value val;
    383     const Dwarf_Leb128* next =
    384         reinterpret_cast<const Dwarf_Leb128*>(abbr_num.process_unsigned(&val));
    385     if (abbr_index != NULL) {
    386       *abbr_index = val.u32;
    387     }
    388 
    389     /* Next one is a "tag". */
    390     next = reinterpret_cast<const Dwarf_Leb128*>(next->process_unsigned(&val));
    391     if (tag != NULL) {
    392       *tag = val.u16;
    393     }
    394 
    395     /* Next one is a "has children" one byte flag. We're not interested in it,
    396      * so jump to the list of attribute descriptors that immediately follows
    397      * this DIE descriptor. */
    398     return INC_CPTR_T(Dwarf_Abbr_AT, next, 1);
    399   }
    400 } Dwarf_Abbr_DIE;
    401 
    402 /* DIE descriptor in the .debug_info section.
    403  * DIE descriptor contains one LEB128-encoded value, containing DIE's
    404  * abbreviation descriptor number in the .debug_abbrev section.
    405  *
    406  * DIE descriptor is immediately followed by the list of DIE attribute values,
    407  * format of wich is defined by the list of attribute descriptors in the
    408  * .debug_abbrev section, that immediately follow the DIE attribute descriptor,
    409  * addressed by this descriptor's abbr_num LEB128.
    410  */
    411 typedef struct ELFF_PACKED Dwarf_DIE {
    412   /* 1 - based index of DIE abbreviation descriptor (Dwarf_Abbr_DIE) for this
    413    * DIE in the .debug_abbrev section.
    414    *
    415    * NOTE: DIE abbreviation descriptor indexes are tied to the compilation
    416    * unit. In other words, each compilation unit restarts counting DIE
    417    * abbreviation descriptors from 1.
    418    *
    419    * NOTE: Zero is invalid value for this field, indicating that this DIE is a
    420    * separator (usually it ends a list of "child" DIEs)
    421    */
    422   Dwarf_Leb128  abbr_num;
    423 
    424   /* Checks if this is a separator DIE. */
    425   bool is_separator() const {
    426     return abbr_num.val == 0;
    427   }
    428 
    429   /* Gets (1 - based) abbreviation number for this DIE. */
    430   Dwarf_AbbrNum get_abbr_num() const {
    431     Dwarf_Value val;
    432     abbr_num.get_unsigned(&val);
    433     return val.u16;
    434   }
    435 
    436   /* Pulls DIE information, advancing past this descriptor to DIE attributes.
    437    * Param:
    438    *  abbr_num - Upon return contains abbreviation number for this DIE. This
    439    *    parameter can be NULL, if the caller is not interested in this value.
    440    * Return:
    441    *  Pointer to the byte past this descriptor (the list of DIE attributes).
    442    */
    443   const Elf_Byte* process(Dwarf_AbbrNum* abbr_number) const {
    444     if (is_separator()) {
    445       if (abbr_number != NULL) {
    446         *abbr_number = 0;
    447       }
    448       // Size of a separator DIE is 1 byte.
    449       return INC_CPTR_T(Elf_Byte, &abbr_num.val, 1);
    450     }
    451     Dwarf_Value val;
    452     const void* ret = abbr_num.process_unsigned(&val);
    453     if (abbr_number != NULL) {
    454       *abbr_number = val.u32;
    455     }
    456     return reinterpret_cast<const Elf_Byte*>(ret);
    457   }
    458 } Dwarf_DIE;
    459 
    460 /*
    461  * Variable size headers.
    462  * When encoding size value in DWARF, the first 32 bits of a "size" header
    463  * define header type. If first 32 bits of the header contain 0xFFFFFFFF
    464  * value, this is 64-bit size header with the following 64 bits encoding
    465  * the size. Otherwise, if first 32 bits are not 0xFFFFFFFF, they contain
    466  * 32-bit size value.
    467  */
    468 
    469 /* Size header for 32-bit DWARF. */
    470 typedef struct ELFF_PACKED Dwarf32_SizeHdr {
    471   /* Size value. */
    472   Elf_Word  size;
    473 } Dwarf32_SizeHdr;
    474 
    475 /* Size header for 64-bit DWARF. */
    476 typedef struct ELFF_PACKED Dwarf64_SizeHdr {
    477   /* Size selector. For 64-bit DWARF this field is set to 0xFFFFFFFF */
    478   Elf_Word  size_selector;
    479 
    480   /* Actual size value. */
    481   Elf_Xword   size;
    482 } Dwarf64_SizeHdr;
    483 
    484 /* Compilation unit header in the .debug_info section.
    485  * Template param:
    486  *  Dwarf_SizeHdr - Type for the header's size field. Must be Dwarf32_SizeHdr
    487  *    for 32-bit DWARF, or Dwarf64_SizeHdr for 64-bit DWARF.
    488  *  Elf_Off - Type for abbrev_offset field. Must be Elf_Word for for 32-bit
    489  *    DWARF, or Elf_Xword for 64-bit DWARF.
    490  */
    491 template <typename Dwarf_SizeHdr, typename Elf_Off>
    492 struct ELFF_PACKED Dwarf_CUHdr {
    493   /* Size of the compilation unit data in .debug_info section. */
    494   Dwarf_SizeHdr   size_hdr;
    495 
    496   /* Compilation unit's DWARF version stamp. */
    497   Elf_Half        version;
    498 
    499   /* Relative (to the beginning of .debug_abbrev section data) offset of the
    500    * beginning of abbreviation sequence for this compilation unit.
    501    */
    502   Elf_Off         abbrev_offset;
    503 
    504   /* Pointer size for this compilation unit (should be 4, or 8). */
    505   Elf_Byte        address_size;
    506 };
    507 /* Compilation unit header in the .debug_info section for 32-bit DWARF. */
    508 typedef Dwarf_CUHdr<Dwarf32_SizeHdr, Elf_Word> Dwarf32_CUHdr;
    509 /* Compilation unit header in the .debug_info section for 64-bit DWARF. */
    510 typedef Dwarf_CUHdr<Dwarf64_SizeHdr, Elf_Xword> Dwarf64_CUHdr;
    511 
    512 /* CU STMTL header in the .debug_line section.
    513  * Template param:
    514  *  Dwarf_SizeHdr - Type for the header's size field. Must be Dwarf32_SizeHdr
    515  *    for 32-bit DWARF, or Dwarf64_SizeHdr for 64-bit DWARF.
    516  *  Elf_Size - Type for header_length field. Must be Elf_Word for for 32-bit
    517  *    DWARF, or Elf_Xword for 64-bit DWARF.
    518  */
    519 template <typename Dwarf_SizeHdr, typename Elf_Size>
    520 struct ELFF_PACKED Dwarf_STMTLHdr {
    521   /* The size in bytes of the line number information for this compilation
    522    * unit, not including the unit_length field itself. */
    523   Dwarf_SizeHdr unit_length;
    524 
    525   /* A version number. This number is specific to the line number information
    526    * and is independent of the DWARF version number. */
    527   Elf_Half      version;
    528 
    529   /* The number of bytes following the header_length field to the beginning of
    530    * the first byte of the line number program itself. In the 32-bit DWARF
    531    * format, this is a 4-byte unsigned length; in the 64-bit DWARF format,
    532    * this field is an 8-byte unsigned length. */
    533   Elf_Size      header_length;
    534 
    535   /* The size in bytes of the smallest target machine instruction. Line number
    536    * program opcodes that alter the address register first multiply their
    537    * operands by this value. */
    538   Elf_Byte      min_instruction_len;
    539 
    540   /* The initial value of the is_stmt register. */
    541   Elf_Byte      default_is_stmt;
    542 
    543   /* This parameter affects the meaning of the special opcodes. */
    544   Elf_Sbyte     line_base;
    545 
    546   /* This parameter affects the meaning of the special opcodes. */
    547   Elf_Byte      line_range;
    548 
    549   /* The number assigned to the first special opcode. */
    550   Elf_Byte      opcode_base;
    551 
    552   /* This is first opcode in an array specifying the number of LEB128 operands
    553    * for each of the standard opcodes. The first element of the array
    554    * corresponds to the opcode whose value is 1, and the last element
    555    * corresponds to the opcode whose value is opcode_base - 1. By increasing
    556    * opcode_base, and adding elements to this array, new standard opcodes can
    557    * be added, while allowing consumers who do not know about these new opcodes
    558    * to be able to skip them. NOTE: this array points to the mapped
    559    * .debug_line section. */
    560   Elf_Byte      standard_opcode_lengths;
    561 };
    562 /* CU STMTL header in the .debug_line section for 32-bit DWARF. */
    563 typedef Dwarf_STMTLHdr<Dwarf32_SizeHdr, Elf_Word> Dwarf32_STMTLHdr;
    564 /* CU STMTL header in the .debug_line section for 64-bit DWARF. */
    565 typedef Dwarf_STMTLHdr<Dwarf64_SizeHdr, Elf_Xword> Dwarf64_STMTLHdr;
    566 
    567 /* Source file descriptor in the .debug_line section.
    568  * Descriptor begins with zero-terminated file name, followed by an ULEB128,
    569  * encoding directory index in the list of included directories, followed by
    570  * an ULEB12, encoding file modification time, followed by an ULEB12, encoding
    571  * file size.
    572  */
    573 typedef struct ELFF_PACKED Dwarf_STMTL_FileDesc {
    574   /* Zero-terminated file name. */
    575   char  file_name[1];
    576 
    577   /* Checks of this descriptor ends the list. */
    578   bool is_last_entry() const {
    579     return file_name[0] == '\0';
    580   }
    581 
    582   /* Gets file name. */
    583   const char* get_file_name() const {
    584     return file_name;
    585   }
    586 
    587   /* Processes this descriptor, advancing to the next one.
    588    * Param:
    589    *  dir_index - Upon return contains index of the parent directory in the
    590    *    list of included directories. Can be NULL if caller is not interested
    591    *    in this value.
    592    * Return:
    593    *  Pointer to the next source file descriptor in the list.
    594    */
    595   const Dwarf_STMTL_FileDesc* process(Elf_Word* dir_index) const {
    596     if (is_last_entry()) {
    597       return this;
    598     }
    599 
    600     /* First parameter: include directory index. */
    601     Dwarf_Value tmp;
    602     const Dwarf_Leb128* leb =
    603         INC_CPTR_T(Dwarf_Leb128, file_name, strlen(file_name) + 1);
    604     leb = reinterpret_cast<const Dwarf_Leb128*>(leb->process_unsigned(&tmp));
    605     if (dir_index != NULL) {
    606       *dir_index = tmp.u32;
    607     }
    608     /* Process file time. */
    609     leb = reinterpret_cast<const Dwarf_Leb128*>(leb->process_unsigned(&tmp));
    610     /* Process file size. */
    611     return reinterpret_cast<const Dwarf_STMTL_FileDesc*>(leb->process_unsigned(&tmp));
    612   }
    613 
    614   /* Gets directory index for this descriptor. */
    615   Elf_Word get_dir_index() const {
    616     assert(!is_last_entry());
    617     if (is_last_entry()) {
    618       return 0;
    619     }
    620     /* Get directory index. */
    621     Dwarf_Value ret;
    622     const Dwarf_Leb128* leb =
    623       INC_CPTR_T(Dwarf_Leb128, file_name, strlen(file_name) + 1);
    624     leb->process_unsigned(&ret);
    625     return ret.u32;
    626   }
    627 } Dwarf_STMTL_FileDesc;
    628 
    629 /* Encapsulates a DIE attribute, collected during ELF file parsing.
    630  */
    631 class DIEAttrib {
    632  public:
    633   /* Constructs DIEAttrib intance. */
    634   DIEAttrib()
    635       : at_(0),
    636         form_(0) {
    637     value_.type = DWARF_VALUE_UNKNOWN;
    638   }
    639 
    640   /* Destructs DIEAttrib intance. */
    641   ~DIEAttrib() {
    642   }
    643 
    644   /* Gets DWARF attribute ID (DW_AT_Xxx) for this property. */
    645   Dwarf_At at() const {
    646     return at_;
    647   }
    648 
    649   /* Gets DWARF form ID (DW_FORM_Xxx) for this property. */
    650   Dwarf_Form form() const {
    651     return form_;
    652   }
    653 
    654   /* Gets value of this property. */
    655   const Dwarf_Value* value() const {
    656     return &value_;
    657   }
    658 
    659   /* Value of this property. */
    660   Dwarf_Value   value_;
    661 
    662   /* DWARF attribute ID (DW_AT_Xxx) for this property. */
    663   Dwarf_At      at_;
    664 
    665   /* DWARF form ID (DW_FORM_Xxx) for this property. */
    666   Dwarf_Form    form_;
    667 };
    668 
    669 /* Parse tag context.
    670  * This structure is used as an ELF file parsing parameter, limiting collected
    671  * DIEs by the list of tags.
    672  */
    673 typedef struct DwarfParseContext {
    674   /* Zero-terminated list of tags to collect DIEs for. If this field is NULL,
    675    * DIEs for all tags will be collected during the parsing. */
    676   const Dwarf_Tag*  tags;
    677 } DwarfParseContext;
    678 
    679 /* Checks if a DIE with the given tag should be collected during the parsing.
    680  * Param:
    681  *  parse_context - Parse context to check the tag against. This parameter can
    682  *  be NULL, indicating that all tags should be collected.
    683  *  tag - Tag to check.
    684  * Return:
    685  *  true if a DIE with the given tag should be collected during the parsing,
    686  *  or false, if the DIE should not be collected.
    687  */
    688 static inline bool
    689 collect_die(const DwarfParseContext* parse_context, Dwarf_Tag tag) {
    690   if (parse_context == NULL || parse_context->tags == NULL) {
    691     return true;
    692   }
    693   for (const Dwarf_Tag* tags = parse_context->tags; *tags != 0; tags++) {
    694     if (*tags == tag) {
    695       return true;
    696     }
    697   }
    698   return false;
    699 }
    700 
    701 /* Encapsulates an array of Dwarf_Abbr_DIE pointers, cached for a compilation
    702  * unit. Although Dwarf_Abbr_DIE descriptors in the .debug_abbrev section of
    703  * the ELF file seems to be always in sequential order, DIE descriptors may
    704  * reference them randomly. So, to provide better performance, we will cache
    705  * all Dwarf_Abbr_DIE pointers, that were found for each DIE. Since all of the
    706  * Dwarf_Abbr_DIE are sequential, an array is the best way to cache them.
    707  *
    708  * NOTE: Objects of this class are instantiated one per each CU, as all DIE
    709  * abbreviation numberation is restarted from 1 for each new CU.
    710  */
    711 class DwarfAbbrDieArray {
    712  public:
    713   /* Constructs DwarfAbbrDieArray instance.
    714    * Most of the CUs don't have too many unique Dwarf_Abbr_DIEs, so, in order
    715    * to decrease the amount of memory allocation calls, we will preallocate
    716    * a relatively small array for them along with the instance of this class,
    717    * hopping, that all Dwarf_Abbr_DIEs for the CU will fit into it.
    718    */
    719   DwarfAbbrDieArray()
    720       : array_(&small_array_[0]),
    721         array_size_(ELFF_ARRAY_SIZE(small_array_)),
    722         count_(0) {
    723   }
    724 
    725   /* Destructs DwarfAbbrDieArray instance. */
    726   ~DwarfAbbrDieArray() {
    727     if (array_ != &small_array_[0]) {
    728       delete[] array_;
    729     }
    730   }
    731 
    732   /* Adds new entry to the array
    733    * Param:
    734    *  abbr - New entry to add.
    735    *  num - Abbreviation number for the adding entry.
    736    *    NOTE: before adding, this method will verify that descriptor for the
    737    *    given abbreviation number has not been cached yet.
    738    *    NOTE: due to the nature of this array, entries MUST be added strictly
    739    *    in sequential order.
    740    * Return:
    741    *  true on success, false on failure.
    742    */
    743   bool add(const Dwarf_Abbr_DIE* abbr, Dwarf_AbbrNum num) {
    744     assert(num != 0);
    745     if (num == 0) {
    746       // Zero is illegal DIE abbreviation number.
    747       _set_errno(EINVAL);
    748       return false;
    749     }
    750 
    751     if (num <= count_) {
    752       // Already cached.
    753       return true;
    754     }
    755 
    756     // Enforce strict sequential order.
    757     assert(num == (count_ + 1));
    758     if (num != (count_ + 1)) {
    759       _set_errno(EINVAL);
    760       return false;
    761     }
    762 
    763     if (num >= array_size_) {
    764       /* Expand the array. Make it 64 entries bigger than adding entry number.
    765        * NOTE: that we don't check for an overflow here, since we secured
    766        * ourselves from that by enforcing strict sequential order. So, an
    767        * overflow may happen iff number of entries cached in this array is
    768        * close to 4G, which is a) totally unreasonable, and b) we would die
    769        * long before this amount of entries is cached.
    770        */
    771       Dwarf_AbbrNum new_size = num + 64;
    772 
    773       // Reallocate.
    774       const Dwarf_Abbr_DIE** new_array = new const Dwarf_Abbr_DIE*[new_size];
    775       assert(new_array != NULL);
    776       if (new_array == NULL) {
    777         _set_errno(ENOMEM);
    778         return false;
    779       }
    780       memcpy(new_array, array_, count_ * sizeof(const Dwarf_Abbr_DIE*));
    781       if (array_ != &small_array_[0]) {
    782         delete[] array_;
    783       }
    784       array_ = new_array;
    785       array_size_ = new_size;
    786     }
    787 
    788     // Abbreviation numbers are 1-based.
    789     array_[num - 1] = abbr;
    790     count_++;
    791     return true;
    792   }
    793 
    794   /* Adds new entry to the array
    795    * Param:
    796    *  abbr - New entry to add.
    797    * Return:
    798    *  true on success, false on failure.
    799    */
    800   bool add(const Dwarf_Abbr_DIE* abbr) {
    801     return add(abbr, abbr->get_abbr_num());
    802   }
    803 
    804   /* Gets an entry from the array
    805    * Param:
    806    *  num - 1-based index of an entry to get.
    807    * Return:
    808    *  Entry on success, or NULL if num exceeds the number of entries
    809    *  contained in the array.
    810    */
    811   const Dwarf_Abbr_DIE* get(Dwarf_AbbrNum num) const {
    812     assert(num != 0 && num <= count_);
    813     if (num != 0 && num <= count_) {
    814       return array_[num - 1];
    815     } else {
    816       _set_errno(EINVAL);
    817       return NULL;
    818     }
    819   }
    820 
    821   /* Caches Dwarf_Abbr_DIEs into this array up to the requested number.
    822    * NOTE: This method cannot be called on an empty array. Usually, first
    823    * entry is inserted into this array when CU object is initialized.
    824    * Param:
    825    *  num - Entry number to cache entries up to.
    826    * Return:
    827    *  Last cached entry (actually, an entry for the 'num' index).
    828    */
    829   const Dwarf_Abbr_DIE* cache_to(Dwarf_AbbrNum num) {
    830     /* Last cached DIE abbreviation. We always should have cached at least one
    831      * abbreviation for the CU DIE itself, added via "add" method when CU
    832      * object was initialized. */
    833     const Dwarf_Abbr_DIE* cur_abbr = get(count_);
    834     assert(cur_abbr != NULL);
    835     if (cur_abbr == NULL) {
    836       return NULL;
    837     }
    838 
    839     /* Starting with the last cached DIE abbreviation, loop through the
    840      * remaining DIE abbreviations in the .debug_abbrev section of the
    841      * mapped ELF file, caching them until we reach the requested
    842      * abbreviation descriptor number. Normally, the very next DIE
    843      * abbreviation will stop the loop. */
    844     while (num > count_) {
    845       Dwarf_AbbrNum abbr_num;
    846       Dwarf_Tag tmp2;
    847       Dwarf_Form tmp3;
    848       Dwarf_At tmp4;
    849 
    850       /* Process all AT abbreviations for the current DIE entry, reaching next
    851        * DIE abbreviation. */
    852       const Dwarf_Abbr_AT* abbr_at = cur_abbr->process(&abbr_num, &tmp2);
    853       while (!abbr_at->is_separator()) {
    854         abbr_at = abbr_at->process(&tmp4, &tmp3);
    855       }
    856 
    857       // Next DIE abbreviation is right after the separator AT abbreviation.
    858       cur_abbr = reinterpret_cast<const Dwarf_Abbr_DIE*>
    859                                               (abbr_at->process(&tmp4, &tmp3));
    860       if (!add(cur_abbr)) {
    861         return NULL;
    862       }
    863     }
    864 
    865     return array_[num - 1];
    866   }
    867 
    868   /* Empties array and frees allocations. */
    869   void empty() {
    870     if (array_ != &small_array_[0]) {
    871       delete[] array_;
    872       array_ = &small_array_[0];
    873       array_size_ = sizeof(small_array_) / sizeof(small_array_[0]);
    874     }
    875     count_ = 0;
    876   }
    877 
    878  protected:
    879   /* Array, preallocated in anticipation of relatively small number of
    880    * DIE abbreviations in compilation unit. */
    881   const Dwarf_Abbr_DIE*   small_array_[64];
    882 
    883   /* Array of Dwarf_Abbr_DIE pointers, cached for a compilation unit. */
    884   const Dwarf_Abbr_DIE**  array_;
    885 
    886   /* Current size of the array. */
    887   Dwarf_AbbrNum           array_size_;
    888 
    889   /* Number of entries, cached in the array. */
    890   Dwarf_AbbrNum           count_;
    891 };
    892 
    893 /* Encapsulates a state machine for the "Line Number Program", that is run
    894  * on data conained in the mapped .debug_line section.
    895  */
    896 class DwarfStateMachine {
    897  public:
    898   /* Constructs DwarfStateMachine instance.
    899    * Param:
    900    *  set_is_stmt - Matches value of default_is_stmt field in the STMTL header.
    901    *    see Dwarf_STMTL_HdrXX.
    902    */
    903   explicit DwarfStateMachine(bool set_is_stmt)
    904     : address_(0),
    905       file_(1),
    906       line_(1),
    907       column_(0),
    908       discriminator_(0),
    909       is_stmt_(set_is_stmt),
    910       basic_block_(false),
    911       end_sequence_(false),
    912       prologue_end_(false),
    913       epilogue_begin_(false),
    914       isa_(0),
    915       set_file_info_(NULL) {
    916   }
    917 
    918   /* Destructs DwarfStateMachine instance. */
    919   ~DwarfStateMachine() {
    920   }
    921 
    922   /* Resets the state to default.
    923    * Param:
    924    *  set_is_stmt - Matches value of default_is_stmt field in the STMTL header.
    925    *    see Dwarf_STMTL_HdrXX.
    926   */
    927   void reset(bool set_is_stmt) {
    928     address_ = 0;
    929     file_ = 1;
    930     line_ = 1;
    931     column_ = 0;
    932     discriminator_ = 0;
    933     is_stmt_ = set_is_stmt;
    934     basic_block_ = false;
    935     end_sequence_ = false;
    936     prologue_end_ = false;
    937     epilogue_begin_ = false;
    938     isa_ = 0;
    939     set_file_info_ = NULL;
    940   }
    941 
    942   /*
    943    * Machine state.
    944    */
    945 
    946   /* Current address (current PC value). */
    947   Elf_Xword                   address_;
    948 
    949   /* Current index of source file descriptor. */
    950   Elf_Word                    file_;
    951 
    952   /* Current line in the current source file. */
    953   Elf_Word                    line_;
    954 
    955   /* Current column. */
    956   Elf_Word                    column_;
    957 
    958   /* Current discriminator value. */
    959   Elf_Word                    discriminator_;
    960 
    961   /* Current STMT flag. */
    962   bool                        is_stmt_;
    963 
    964   /* Current basic block flag. */
    965   bool                        basic_block_;
    966 
    967   /* Current end of sequence flag. */
    968   bool                        end_sequence_;
    969 
    970   /* Current end of prologue flag. */
    971   bool                        prologue_end_;
    972 
    973   /* Current epilogue begin flag. */
    974   bool                        epilogue_begin_;
    975 
    976   /* Current ISA value. */
    977   Elf_Word                    isa_;
    978 
    979   /* Current value for explicitly set current source file descriptor.
    980    * If not NULL, this descriptor has priority over the descriptor, addressed
    981    * by the file_ member of this class. */
    982   const Dwarf_STMTL_FileDesc* set_file_info_;
    983 };
    984 
    985 /* Checks if given tag belongs to a routine. */
    986 static inline bool
    987 dwarf_tag_is_routine(Dwarf_Tag tag) {
    988   return tag == DW_TAG_inlined_subroutine ||
    989          tag == DW_TAG_subprogram ||
    990          tag == DW_AT_main_subprogram;
    991 }
    992 
    993 /* Checks if given tag belongs to a compilation unit. */
    994 static inline bool
    995 dwarf_tag_is_cu(Dwarf_Tag tag) {
    996   return tag == DW_TAG_compile_unit ||
    997          tag == DW_TAG_partial_unit;
    998 }
    999 
   1000 #endif  // ELFF_DWARF_DEFS_H_
   1001