Home | History | Annotate | Download | only in dwarf
      1 // Copyright (c) 2010 Google Inc. All Rights Reserved.
      2 //
      3 // Redistribution and use in source and binary forms, with or without
      4 // modification, are permitted provided that the following conditions are
      5 // met:
      6 //
      7 //     * Redistributions of source code must retain the above copyright
      8 // notice, this list of conditions and the following disclaimer.
      9 //     * Redistributions in binary form must reproduce the above
     10 // copyright notice, this list of conditions and the following disclaimer
     11 // in the documentation and/or other materials provided with the
     12 // distribution.
     13 //     * Neither the name of Google Inc. nor the names of its
     14 // contributors may be used to endorse or promote products derived from
     15 // this software without specific prior written permission.
     16 //
     17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 
     29 // CFI reader author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com>
     30 
     31 // Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
     32 // and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
     33 
     34 #include "common/dwarf/dwarf2reader.h"
     35 
     36 #include <assert.h>
     37 #include <stdint.h>
     38 #include <stdio.h>
     39 #include <string.h>
     40 
     41 #include <map>
     42 #include <memory>
     43 #include <stack>
     44 #include <string>
     45 #include <utility>
     46 
     47 #include "common/dwarf/bytereader-inl.h"
     48 #include "common/dwarf/bytereader.h"
     49 #include "common/dwarf/line_state_machine.h"
     50 #include "common/using_std_string.h"
     51 
     52 namespace dwarf2reader {
     53 
     54 CompilationUnit::CompilationUnit(const SectionMap& sections, uint64 offset,
     55                                  ByteReader* reader, Dwarf2Handler* handler)
     56     : offset_from_section_start_(offset), reader_(reader),
     57       sections_(sections), handler_(handler), abbrevs_(NULL),
     58       string_buffer_(NULL), string_buffer_length_(0) {}
     59 
     60 // Read a DWARF2/3 abbreviation section.
     61 // Each abbrev consists of a abbreviation number, a tag, a byte
     62 // specifying whether the tag has children, and a list of
     63 // attribute/form pairs.
     64 // The list of forms is terminated by a 0 for the attribute, and a
     65 // zero for the form.  The entire abbreviation section is terminated
     66 // by a zero for the code.
     67 
     68 void CompilationUnit::ReadAbbrevs() {
     69   if (abbrevs_)
     70     return;
     71 
     72   // First get the debug_abbrev section.  ".debug_abbrev" is the name
     73   // recommended in the DWARF spec, and used on Linux;
     74   // "__debug_abbrev" is the name used in Mac OS X Mach-O files.
     75   SectionMap::const_iterator iter = sections_.find(".debug_abbrev");
     76   if (iter == sections_.end())
     77     iter = sections_.find("__debug_abbrev");
     78   assert(iter != sections_.end());
     79 
     80   abbrevs_ = new std::vector<Abbrev>;
     81   abbrevs_->resize(1);
     82 
     83   // The only way to check whether we are reading over the end of the
     84   // buffer would be to first compute the size of the leb128 data by
     85   // reading it, then go back and read it again.
     86   const char* abbrev_start = iter->second.first +
     87                                       header_.abbrev_offset;
     88   const char* abbrevptr = abbrev_start;
     89 #ifndef NDEBUG
     90   const uint64 abbrev_length = iter->second.second - header_.abbrev_offset;
     91 #endif
     92 
     93   while (1) {
     94     CompilationUnit::Abbrev abbrev;
     95     size_t len;
     96     const uint64 number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
     97 
     98     if (number == 0)
     99       break;
    100     abbrev.number = number;
    101     abbrevptr += len;
    102 
    103     assert(abbrevptr < abbrev_start + abbrev_length);
    104     const uint64 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
    105     abbrevptr += len;
    106     abbrev.tag = static_cast<enum DwarfTag>(tag);
    107 
    108     assert(abbrevptr < abbrev_start + abbrev_length);
    109     abbrev.has_children = reader_->ReadOneByte(abbrevptr);
    110     abbrevptr += 1;
    111 
    112     assert(abbrevptr < abbrev_start + abbrev_length);
    113 
    114     while (1) {
    115       const uint64 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
    116       abbrevptr += len;
    117 
    118       assert(abbrevptr < abbrev_start + abbrev_length);
    119       const uint64 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
    120       abbrevptr += len;
    121       if (nametemp == 0 && formtemp == 0)
    122         break;
    123 
    124       const enum DwarfAttribute name =
    125         static_cast<enum DwarfAttribute>(nametemp);
    126       const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp);
    127       abbrev.attributes.push_back(std::make_pair(name, form));
    128     }
    129     assert(abbrev.number == abbrevs_->size());
    130     abbrevs_->push_back(abbrev);
    131   }
    132 }
    133 
    134 // Skips a single DIE's attributes.
    135 const char* CompilationUnit::SkipDIE(const char* start,
    136                                               const Abbrev& abbrev) {
    137   for (AttributeList::const_iterator i = abbrev.attributes.begin();
    138        i != abbrev.attributes.end();
    139        i++)  {
    140     start = SkipAttribute(start, i->second);
    141   }
    142   return start;
    143 }
    144 
    145 // Skips a single attribute form's data.
    146 const char* CompilationUnit::SkipAttribute(const char* start,
    147                                                     enum DwarfForm form) {
    148   size_t len;
    149 
    150   switch (form) {
    151     case DW_FORM_indirect:
    152       form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
    153                                                                      &len));
    154       start += len;
    155       return SkipAttribute(start, form);
    156 
    157     case DW_FORM_flag_present:
    158       return start;
    159     case DW_FORM_data1:
    160     case DW_FORM_flag:
    161     case DW_FORM_ref1:
    162       return start + 1;
    163     case DW_FORM_ref2:
    164     case DW_FORM_data2:
    165       return start + 2;
    166     case DW_FORM_ref4:
    167     case DW_FORM_data4:
    168       return start + 4;
    169     case DW_FORM_ref8:
    170     case DW_FORM_data8:
    171     case DW_FORM_ref_sig8:
    172       return start + 8;
    173     case DW_FORM_string:
    174       return start + strlen(start) + 1;
    175     case DW_FORM_udata:
    176     case DW_FORM_ref_udata:
    177       reader_->ReadUnsignedLEB128(start, &len);
    178       return start + len;
    179 
    180     case DW_FORM_sdata:
    181       reader_->ReadSignedLEB128(start, &len);
    182       return start + len;
    183     case DW_FORM_addr:
    184       return start + reader_->AddressSize();
    185     case DW_FORM_ref_addr:
    186       // DWARF2 and 3/4 differ on whether ref_addr is address size or
    187       // offset size.
    188       assert(header_.version >= 2);
    189       if (header_.version == 2) {
    190         return start + reader_->AddressSize();
    191       } else if (header_.version >= 3) {
    192         return start + reader_->OffsetSize();
    193       }
    194       break;
    195 
    196     case DW_FORM_block1:
    197       return start + 1 + reader_->ReadOneByte(start);
    198     case DW_FORM_block2:
    199       return start + 2 + reader_->ReadTwoBytes(start);
    200     case DW_FORM_block4:
    201       return start + 4 + reader_->ReadFourBytes(start);
    202     case DW_FORM_block:
    203     case DW_FORM_exprloc: {
    204       uint64 size = reader_->ReadUnsignedLEB128(start, &len);
    205       return start + size + len;
    206     }
    207     case DW_FORM_strp:
    208     case DW_FORM_sec_offset:
    209       return start + reader_->OffsetSize();
    210   }
    211   fprintf(stderr,"Unhandled form type");
    212   return NULL;
    213 }
    214 
    215 // Read a DWARF2/3 header.
    216 // The header is variable length in DWARF3 (and DWARF2 as extended by
    217 // most compilers), and consists of an length field, a version number,
    218 // the offset in the .debug_abbrev section for our abbrevs, and an
    219 // address size.
    220 void CompilationUnit::ReadHeader() {
    221   const char* headerptr = buffer_;
    222   size_t initial_length_size;
    223 
    224   assert(headerptr + 4 < buffer_ + buffer_length_);
    225   const uint64 initial_length
    226     = reader_->ReadInitialLength(headerptr, &initial_length_size);
    227   headerptr += initial_length_size;
    228   header_.length = initial_length;
    229 
    230   assert(headerptr + 2 < buffer_ + buffer_length_);
    231   header_.version = reader_->ReadTwoBytes(headerptr);
    232   headerptr += 2;
    233 
    234   assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
    235   header_.abbrev_offset = reader_->ReadOffset(headerptr);
    236   headerptr += reader_->OffsetSize();
    237 
    238   assert(headerptr + 1 < buffer_ + buffer_length_);
    239   header_.address_size = reader_->ReadOneByte(headerptr);
    240   reader_->SetAddressSize(header_.address_size);
    241   headerptr += 1;
    242 
    243   after_header_ = headerptr;
    244 
    245   // This check ensures that we don't have to do checking during the
    246   // reading of DIEs. header_.length does not include the size of the
    247   // initial length.
    248   assert(buffer_ + initial_length_size + header_.length <=
    249         buffer_ + buffer_length_);
    250 }
    251 
    252 uint64 CompilationUnit::Start() {
    253   // First get the debug_info section.  ".debug_info" is the name
    254   // recommended in the DWARF spec, and used on Linux; "__debug_info"
    255   // is the name used in Mac OS X Mach-O files.
    256   SectionMap::const_iterator iter = sections_.find(".debug_info");
    257   if (iter == sections_.end())
    258     iter = sections_.find("__debug_info");
    259   assert(iter != sections_.end());
    260 
    261   // Set up our buffer
    262   buffer_ = iter->second.first + offset_from_section_start_;
    263   buffer_length_ = iter->second.second - offset_from_section_start_;
    264 
    265   // Read the header
    266   ReadHeader();
    267 
    268   // Figure out the real length from the end of the initial length to
    269   // the end of the compilation unit, since that is the value we
    270   // return.
    271   uint64 ourlength = header_.length;
    272   if (reader_->OffsetSize() == 8)
    273     ourlength += 12;
    274   else
    275     ourlength += 4;
    276 
    277   // See if the user wants this compilation unit, and if not, just return.
    278   if (!handler_->StartCompilationUnit(offset_from_section_start_,
    279                                       reader_->AddressSize(),
    280                                       reader_->OffsetSize(),
    281                                       header_.length,
    282                                       header_.version))
    283     return ourlength;
    284 
    285   // Otherwise, continue by reading our abbreviation entries.
    286   ReadAbbrevs();
    287 
    288   // Set the string section if we have one.  ".debug_str" is the name
    289   // recommended in the DWARF spec, and used on Linux; "__debug_str"
    290   // is the name used in Mac OS X Mach-O files.
    291   iter = sections_.find(".debug_str");
    292   if (iter == sections_.end())
    293     iter = sections_.find("__debug_str");
    294   if (iter != sections_.end()) {
    295     string_buffer_ = iter->second.first;
    296     string_buffer_length_ = iter->second.second;
    297   }
    298 
    299   // Now that we have our abbreviations, start processing DIE's.
    300   ProcessDIEs();
    301 
    302   return ourlength;
    303 }
    304 
    305 // If one really wanted, you could merge SkipAttribute and
    306 // ProcessAttribute
    307 // This is all boring data manipulation and calling of the handler.
    308 const char* CompilationUnit::ProcessAttribute(
    309     uint64 dieoffset, const char* start, enum DwarfAttribute attr,
    310     enum DwarfForm form) {
    311   size_t len;
    312 
    313   switch (form) {
    314     // DW_FORM_indirect is never used because it is such a space
    315     // waster.
    316     case DW_FORM_indirect:
    317       form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
    318                                                                      &len));
    319       start += len;
    320       return ProcessAttribute(dieoffset, start, attr, form);
    321 
    322     case DW_FORM_flag_present:
    323       handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 1);
    324       return start;
    325     case DW_FORM_data1:
    326     case DW_FORM_flag:
    327       handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
    328                                          reader_->ReadOneByte(start));
    329       return start + 1;
    330     case DW_FORM_data2:
    331       handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
    332                                          reader_->ReadTwoBytes(start));
    333       return start + 2;
    334     case DW_FORM_data4:
    335       handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
    336                                          reader_->ReadFourBytes(start));
    337       return start + 4;
    338     case DW_FORM_data8:
    339       handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
    340                                          reader_->ReadEightBytes(start));
    341       return start + 8;
    342     case DW_FORM_string: {
    343       const char* str = start;
    344       handler_->ProcessAttributeString(dieoffset, attr, form,
    345                                        str);
    346       return start + strlen(str) + 1;
    347     }
    348     case DW_FORM_udata:
    349       handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
    350                                          reader_->ReadUnsignedLEB128(start,
    351                                                                      &len));
    352       return start + len;
    353 
    354     case DW_FORM_sdata:
    355       handler_->ProcessAttributeSigned(dieoffset, attr, form,
    356                                       reader_->ReadSignedLEB128(start, &len));
    357       return start + len;
    358     case DW_FORM_addr:
    359       handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
    360                                          reader_->ReadAddress(start));
    361       return start + reader_->AddressSize();
    362     case DW_FORM_sec_offset:
    363       handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
    364                                          reader_->ReadOffset(start));
    365       return start + reader_->OffsetSize();
    366 
    367     case DW_FORM_ref1:
    368       handler_->ProcessAttributeReference(dieoffset, attr, form,
    369                                           reader_->ReadOneByte(start)
    370                                           + offset_from_section_start_);
    371       return start + 1;
    372     case DW_FORM_ref2:
    373       handler_->ProcessAttributeReference(dieoffset, attr, form,
    374                                           reader_->ReadTwoBytes(start)
    375                                           + offset_from_section_start_);
    376       return start + 2;
    377     case DW_FORM_ref4:
    378       handler_->ProcessAttributeReference(dieoffset, attr, form,
    379                                           reader_->ReadFourBytes(start)
    380                                           + offset_from_section_start_);
    381       return start + 4;
    382     case DW_FORM_ref8:
    383       handler_->ProcessAttributeReference(dieoffset, attr, form,
    384                                           reader_->ReadEightBytes(start)
    385                                           + offset_from_section_start_);
    386       return start + 8;
    387     case DW_FORM_ref_udata:
    388       handler_->ProcessAttributeReference(dieoffset, attr, form,
    389                                           reader_->ReadUnsignedLEB128(start,
    390                                                                       &len)
    391                                           + offset_from_section_start_);
    392       return start + len;
    393     case DW_FORM_ref_addr:
    394       // DWARF2 and 3/4 differ on whether ref_addr is address size or
    395       // offset size.
    396       assert(header_.version >= 2);
    397       if (header_.version == 2) {
    398         handler_->ProcessAttributeReference(dieoffset, attr, form,
    399                                             reader_->ReadAddress(start));
    400         return start + reader_->AddressSize();
    401       } else if (header_.version >= 3) {
    402         handler_->ProcessAttributeReference(dieoffset, attr, form,
    403                                             reader_->ReadOffset(start));
    404         return start + reader_->OffsetSize();
    405       }
    406       break;
    407     case DW_FORM_ref_sig8:
    408       handler_->ProcessAttributeSignature(dieoffset, attr, form,
    409                                           reader_->ReadEightBytes(start));
    410       return start + 8;
    411 
    412     case DW_FORM_block1: {
    413       uint64 datalen = reader_->ReadOneByte(start);
    414       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
    415                                        datalen);
    416       return start + 1 + datalen;
    417     }
    418     case DW_FORM_block2: {
    419       uint64 datalen = reader_->ReadTwoBytes(start);
    420       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
    421                                        datalen);
    422       return start + 2 + datalen;
    423     }
    424     case DW_FORM_block4: {
    425       uint64 datalen = reader_->ReadFourBytes(start);
    426       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
    427                                        datalen);
    428       return start + 4 + datalen;
    429     }
    430     case DW_FORM_block:
    431     case DW_FORM_exprloc: {
    432       uint64 datalen = reader_->ReadUnsignedLEB128(start, &len);
    433       handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
    434                                        datalen);
    435       return start + datalen + len;
    436     }
    437     case DW_FORM_strp: {
    438       assert(string_buffer_ != NULL);
    439 
    440       const uint64 offset = reader_->ReadOffset(start);
    441       assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
    442 
    443       const char* str = string_buffer_ + offset;
    444       handler_->ProcessAttributeString(dieoffset, attr, form,
    445                                        str);
    446       return start + reader_->OffsetSize();
    447     }
    448   }
    449   fprintf(stderr, "Unhandled form type\n");
    450   return NULL;
    451 }
    452 
    453 const char* CompilationUnit::ProcessDIE(uint64 dieoffset,
    454                                                  const char* start,
    455                                                  const Abbrev& abbrev) {
    456   for (AttributeList::const_iterator i = abbrev.attributes.begin();
    457        i != abbrev.attributes.end();
    458        i++)  {
    459     start = ProcessAttribute(dieoffset, start, i->first, i->second);
    460   }
    461   return start;
    462 }
    463 
    464 void CompilationUnit::ProcessDIEs() {
    465   const char* dieptr = after_header_;
    466   size_t len;
    467 
    468   // lengthstart is the place the length field is based on.
    469   // It is the point in the header after the initial length field
    470   const char* lengthstart = buffer_;
    471 
    472   // In 64 bit dwarf, the initial length is 12 bytes, because of the
    473   // 0xffffffff at the start.
    474   if (reader_->OffsetSize() == 8)
    475     lengthstart += 12;
    476   else
    477     lengthstart += 4;
    478 
    479   std::stack<uint64> die_stack;
    480 
    481   while (dieptr < (lengthstart + header_.length)) {
    482     // We give the user the absolute offset from the beginning of
    483     // debug_info, since they need it to deal with ref_addr forms.
    484     uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
    485 
    486     uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
    487 
    488     dieptr += len;
    489 
    490     // Abbrev == 0 represents the end of a list of children, or padding
    491     // at the end of the compilation unit.
    492     if (abbrev_num == 0) {
    493       if (die_stack.size() == 0)
    494         // If it is padding, then we are done with the compilation unit's DIEs.
    495         return;
    496       const uint64 offset = die_stack.top();
    497       die_stack.pop();
    498       handler_->EndDIE(offset);
    499       continue;
    500     }
    501 
    502     const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
    503     const enum DwarfTag tag = abbrev.tag;
    504     if (!handler_->StartDIE(absolute_offset, tag)) {
    505       dieptr = SkipDIE(dieptr, abbrev);
    506     } else {
    507       dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
    508     }
    509 
    510     if (abbrev.has_children) {
    511       die_stack.push(absolute_offset);
    512     } else {
    513       handler_->EndDIE(absolute_offset);
    514     }
    515   }
    516 }
    517 
    518 LineInfo::LineInfo(const char* buffer, uint64 buffer_length,
    519                    ByteReader* reader, LineInfoHandler* handler):
    520     handler_(handler), reader_(reader), buffer_(buffer),
    521     buffer_length_(buffer_length) {
    522   header_.std_opcode_lengths = NULL;
    523 }
    524 
    525 uint64 LineInfo::Start() {
    526   ReadHeader();
    527   ReadLines();
    528   return after_header_ - buffer_;
    529 }
    530 
    531 // The header for a debug_line section is mildly complicated, because
    532 // the line info is very tightly encoded.
    533 void LineInfo::ReadHeader() {
    534   const char* lineptr = buffer_;
    535   size_t initial_length_size;
    536 
    537   const uint64 initial_length
    538     = reader_->ReadInitialLength(lineptr, &initial_length_size);
    539 
    540   lineptr += initial_length_size;
    541   header_.total_length = initial_length;
    542   assert(buffer_ + initial_length_size + header_.total_length <=
    543         buffer_ + buffer_length_);
    544 
    545   // Address size *must* be set by CU ahead of time.
    546   assert(reader_->AddressSize() != 0);
    547 
    548   header_.version = reader_->ReadTwoBytes(lineptr);
    549   lineptr += 2;
    550 
    551   header_.prologue_length = reader_->ReadOffset(lineptr);
    552   lineptr += reader_->OffsetSize();
    553 
    554   header_.min_insn_length = reader_->ReadOneByte(lineptr);
    555   lineptr += 1;
    556 
    557   header_.default_is_stmt = reader_->ReadOneByte(lineptr);
    558   lineptr += 1;
    559 
    560   header_.line_base = *reinterpret_cast<const int8*>(lineptr);
    561   lineptr += 1;
    562 
    563   header_.line_range = reader_->ReadOneByte(lineptr);
    564   lineptr += 1;
    565 
    566   header_.opcode_base = reader_->ReadOneByte(lineptr);
    567   lineptr += 1;
    568 
    569   header_.std_opcode_lengths = new std::vector<unsigned char>;
    570   header_.std_opcode_lengths->resize(header_.opcode_base + 1);
    571   (*header_.std_opcode_lengths)[0] = 0;
    572   for (int i = 1; i < header_.opcode_base; i++) {
    573     (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
    574     lineptr += 1;
    575   }
    576 
    577   // It is legal for the directory entry table to be empty.
    578   if (*lineptr) {
    579     uint32 dirindex = 1;
    580     while (*lineptr) {
    581       const char* dirname = lineptr;
    582       handler_->DefineDir(dirname, dirindex);
    583       lineptr += strlen(dirname) + 1;
    584       dirindex++;
    585     }
    586   }
    587   lineptr++;
    588 
    589   // It is also legal for the file entry table to be empty.
    590   if (*lineptr) {
    591     uint32 fileindex = 1;
    592     size_t len;
    593     while (*lineptr) {
    594       const char* filename = lineptr;
    595       lineptr += strlen(filename) + 1;
    596 
    597       uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
    598       lineptr += len;
    599 
    600       uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
    601       lineptr += len;
    602 
    603       uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
    604       lineptr += len;
    605       handler_->DefineFile(filename, fileindex, static_cast<uint32>(dirindex),
    606                            mod_time, filelength);
    607       fileindex++;
    608     }
    609   }
    610   lineptr++;
    611 
    612   after_header_ = lineptr;
    613 }
    614 
    615 /* static */
    616 bool LineInfo::ProcessOneOpcode(ByteReader* reader,
    617                                 LineInfoHandler* handler,
    618                                 const struct LineInfoHeader &header,
    619                                 const char* start,
    620                                 struct LineStateMachine* lsm,
    621                                 size_t* len,
    622                                 uintptr pc,
    623                                 bool *lsm_passes_pc) {
    624   size_t oplen = 0;
    625   size_t templen;
    626   uint8 opcode = reader->ReadOneByte(start);
    627   oplen++;
    628   start++;
    629 
    630   // If the opcode is great than the opcode_base, it is a special
    631   // opcode. Most line programs consist mainly of special opcodes.
    632   if (opcode >= header.opcode_base) {
    633     opcode -= header.opcode_base;
    634     const int64 advance_address = (opcode / header.line_range)
    635                                   * header.min_insn_length;
    636     const int32 advance_line = (opcode % header.line_range)
    637                                + header.line_base;
    638 
    639     // Check if the lsm passes "pc". If so, mark it as passed.
    640     if (lsm_passes_pc &&
    641         lsm->address <= pc && pc < lsm->address + advance_address) {
    642       *lsm_passes_pc = true;
    643     }
    644 
    645     lsm->address += advance_address;
    646     lsm->line_num += advance_line;
    647     lsm->basic_block = true;
    648     *len = oplen;
    649     return true;
    650   }
    651 
    652   // Otherwise, we have the regular opcodes
    653   switch (opcode) {
    654     case DW_LNS_copy: {
    655       lsm->basic_block = false;
    656       *len = oplen;
    657       return true;
    658     }
    659 
    660     case DW_LNS_advance_pc: {
    661       uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen);
    662       oplen += templen;
    663 
    664       // Check if the lsm passes "pc". If so, mark it as passed.
    665       if (lsm_passes_pc && lsm->address <= pc &&
    666           pc < lsm->address + header.min_insn_length * advance_address) {
    667         *lsm_passes_pc = true;
    668       }
    669 
    670       lsm->address += header.min_insn_length * advance_address;
    671     }
    672       break;
    673     case DW_LNS_advance_line: {
    674       const int64 advance_line = reader->ReadSignedLEB128(start, &templen);
    675       oplen += templen;
    676       lsm->line_num += static_cast<int32>(advance_line);
    677 
    678       // With gcc 4.2.1, we can get the line_no here for the first time
    679       // since DW_LNS_advance_line is called after DW_LNE_set_address is
    680       // called. So we check if the lsm passes "pc" here, not in
    681       // DW_LNE_set_address.
    682       if (lsm_passes_pc && lsm->address == pc) {
    683         *lsm_passes_pc = true;
    684       }
    685     }
    686       break;
    687     case DW_LNS_set_file: {
    688       const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen);
    689       oplen += templen;
    690       lsm->file_num = static_cast<uint32>(fileno);
    691     }
    692       break;
    693     case DW_LNS_set_column: {
    694       const uint64 colno = reader->ReadUnsignedLEB128(start, &templen);
    695       oplen += templen;
    696       lsm->column_num = static_cast<uint32>(colno);
    697     }
    698       break;
    699     case DW_LNS_negate_stmt: {
    700       lsm->is_stmt = !lsm->is_stmt;
    701     }
    702       break;
    703     case DW_LNS_set_basic_block: {
    704       lsm->basic_block = true;
    705     }
    706       break;
    707     case DW_LNS_fixed_advance_pc: {
    708       const uint16 advance_address = reader->ReadTwoBytes(start);
    709       oplen += 2;
    710 
    711       // Check if the lsm passes "pc". If so, mark it as passed.
    712       if (lsm_passes_pc &&
    713           lsm->address <= pc && pc < lsm->address + advance_address) {
    714         *lsm_passes_pc = true;
    715       }
    716 
    717       lsm->address += advance_address;
    718     }
    719       break;
    720     case DW_LNS_const_add_pc: {
    721       const int64 advance_address = header.min_insn_length
    722                                     * ((255 - header.opcode_base)
    723                                        / header.line_range);
    724 
    725       // Check if the lsm passes "pc". If so, mark it as passed.
    726       if (lsm_passes_pc &&
    727           lsm->address <= pc && pc < lsm->address + advance_address) {
    728         *lsm_passes_pc = true;
    729       }
    730 
    731       lsm->address += advance_address;
    732     }
    733       break;
    734     case DW_LNS_extended_op: {
    735       const uint64 extended_op_len = reader->ReadUnsignedLEB128(start,
    736                                                                 &templen);
    737       start += templen;
    738       oplen += templen + extended_op_len;
    739 
    740       const uint64 extended_op = reader->ReadOneByte(start);
    741       start++;
    742 
    743       switch (extended_op) {
    744         case DW_LNE_end_sequence: {
    745           lsm->end_sequence = true;
    746           *len = oplen;
    747           return true;
    748         }
    749           break;
    750         case DW_LNE_set_address: {
    751           // With gcc 4.2.1, we cannot tell the line_no here since
    752           // DW_LNE_set_address is called before DW_LNS_advance_line is
    753           // called.  So we do not check if the lsm passes "pc" here.  See
    754           // also the comment in DW_LNS_advance_line.
    755           uint64 address = reader->ReadAddress(start);
    756           lsm->address = address;
    757         }
    758           break;
    759         case DW_LNE_define_file: {
    760           const char* filename  = start;
    761 
    762           templen = strlen(filename) + 1;
    763           start += templen;
    764 
    765           uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen);
    766           oplen += templen;
    767 
    768           const uint64 mod_time = reader->ReadUnsignedLEB128(start,
    769                                                              &templen);
    770           oplen += templen;
    771 
    772           const uint64 filelength = reader->ReadUnsignedLEB128(start,
    773                                                                &templen);
    774           oplen += templen;
    775 
    776           if (handler) {
    777             handler->DefineFile(filename, -1, static_cast<uint32>(dirindex),
    778                                 mod_time, filelength);
    779           }
    780         }
    781           break;
    782       }
    783     }
    784       break;
    785 
    786     default: {
    787       // Ignore unknown opcode  silently
    788       if (header.std_opcode_lengths) {
    789         for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
    790           reader->ReadUnsignedLEB128(start, &templen);
    791           start += templen;
    792           oplen += templen;
    793         }
    794       }
    795     }
    796       break;
    797   }
    798   *len = oplen;
    799   return false;
    800 }
    801 
    802 void LineInfo::ReadLines() {
    803   struct LineStateMachine lsm;
    804 
    805   // lengthstart is the place the length field is based on.
    806   // It is the point in the header after the initial length field
    807   const char* lengthstart = buffer_;
    808 
    809   // In 64 bit dwarf, the initial length is 12 bytes, because of the
    810   // 0xffffffff at the start.
    811   if (reader_->OffsetSize() == 8)
    812     lengthstart += 12;
    813   else
    814     lengthstart += 4;
    815 
    816   const char* lineptr = after_header_;
    817   lsm.Reset(header_.default_is_stmt);
    818 
    819   // The LineInfoHandler interface expects each line's length along
    820   // with its address, but DWARF only provides addresses (sans
    821   // length), and an end-of-sequence address; one infers the length
    822   // from the next address. So we report a line only when we get the
    823   // next line's address, or the end-of-sequence address.
    824   bool have_pending_line = false;
    825   uint64 pending_address = 0;
    826   uint32 pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
    827 
    828   while (lineptr < lengthstart + header_.total_length) {
    829     size_t oplength;
    830     bool add_row = ProcessOneOpcode(reader_, handler_, header_,
    831                                     lineptr, &lsm, &oplength, (uintptr)-1,
    832                                     NULL);
    833     if (add_row) {
    834       if (have_pending_line)
    835         handler_->AddLine(pending_address, lsm.address - pending_address,
    836                           pending_file_num, pending_line_num,
    837                           pending_column_num);
    838       if (lsm.end_sequence) {
    839         lsm.Reset(header_.default_is_stmt);
    840         have_pending_line = false;
    841       } else {
    842         pending_address = lsm.address;
    843         pending_file_num = lsm.file_num;
    844         pending_line_num = lsm.line_num;
    845         pending_column_num = lsm.column_num;
    846         have_pending_line = true;
    847       }
    848     }
    849     lineptr += oplength;
    850   }
    851 
    852   after_header_ = lengthstart + header_.total_length;
    853 }
    854 
    855 // A DWARF rule for recovering the address or value of a register, or
    856 // computing the canonical frame address. There is one subclass of this for
    857 // each '*Rule' member function in CallFrameInfo::Handler.
    858 //
    859 // It's annoying that we have to handle Rules using pointers (because
    860 // the concrete instances can have an arbitrary size). They're small,
    861 // so it would be much nicer if we could just handle them by value
    862 // instead of fretting about ownership and destruction.
    863 //
    864 // It seems like all these could simply be instances of std::tr1::bind,
    865 // except that we need instances to be EqualityComparable, too.
    866 //
    867 // This could logically be nested within State, but then the qualified names
    868 // get horrendous.
    869 class CallFrameInfo::Rule {
    870  public:
    871   virtual ~Rule() { }
    872 
    873   // Tell HANDLER that, at ADDRESS in the program, REGISTER can be
    874   // recovered using this rule. If REGISTER is kCFARegister, then this rule
    875   // describes how to compute the canonical frame address. Return what the
    876   // HANDLER member function returned.
    877   virtual bool Handle(Handler *handler,
    878                       uint64 address, int register) const = 0;
    879 
    880   // Equality on rules. We use these to decide which rules we need
    881   // to report after a DW_CFA_restore_state instruction.
    882   virtual bool operator==(const Rule &rhs) const = 0;
    883 
    884   bool operator!=(const Rule &rhs) const { return ! (*this == rhs); }
    885 
    886   // Return a pointer to a copy of this rule.
    887   virtual Rule *Copy() const = 0;
    888 
    889   // If this is a base+offset rule, change its base register to REG.
    890   // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
    891   virtual void SetBaseRegister(unsigned reg) { }
    892 
    893   // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
    894   // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
    895   virtual void SetOffset(long long offset) { }
    896 };
    897 
    898 // Rule: the value the register had in the caller cannot be recovered.
    899 class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
    900  public:
    901   UndefinedRule() { }
    902   ~UndefinedRule() { }
    903   bool Handle(Handler *handler, uint64 address, int reg) const {
    904     return handler->UndefinedRule(address, reg);
    905   }
    906   bool operator==(const Rule &rhs) const {
    907     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
    908     // been carefully considered; cheap RTTI-like workarounds are forbidden.
    909     const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs);
    910     return (our_rhs != NULL);
    911   }
    912   Rule *Copy() const { return new UndefinedRule(*this); }
    913 };
    914 
    915 // Rule: the register's value is the same as that it had in the caller.
    916 class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
    917  public:
    918   SameValueRule() { }
    919   ~SameValueRule() { }
    920   bool Handle(Handler *handler, uint64 address, int reg) const {
    921     return handler->SameValueRule(address, reg);
    922   }
    923   bool operator==(const Rule &rhs) const {
    924     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
    925     // been carefully considered; cheap RTTI-like workarounds are forbidden.
    926     const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs);
    927     return (our_rhs != NULL);
    928   }
    929   Rule *Copy() const { return new SameValueRule(*this); }
    930 };
    931 
    932 // Rule: the register is saved at OFFSET from BASE_REGISTER.  BASE_REGISTER
    933 // may be CallFrameInfo::Handler::kCFARegister.
    934 class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
    935  public:
    936   OffsetRule(int base_register, long offset)
    937       : base_register_(base_register), offset_(offset) { }
    938   ~OffsetRule() { }
    939   bool Handle(Handler *handler, uint64 address, int reg) const {
    940     return handler->OffsetRule(address, reg, base_register_, offset_);
    941   }
    942   bool operator==(const Rule &rhs) const {
    943     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
    944     // been carefully considered; cheap RTTI-like workarounds are forbidden.
    945     const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs);
    946     return (our_rhs &&
    947             base_register_ == our_rhs->base_register_ &&
    948             offset_ == our_rhs->offset_);
    949   }
    950   Rule *Copy() const { return new OffsetRule(*this); }
    951   // We don't actually need SetBaseRegister or SetOffset here, since they
    952   // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
    953   // doesn't make sense to use OffsetRule for computing the CFA: it
    954   // computes the address at which a register is saved, not a value.
    955  private:
    956   int base_register_;
    957   long offset_;
    958 };
    959 
    960 // Rule: the value the register had in the caller is the value of
    961 // BASE_REGISTER plus offset. BASE_REGISTER may be
    962 // CallFrameInfo::Handler::kCFARegister.
    963 class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
    964  public:
    965   ValOffsetRule(int base_register, long offset)
    966       : base_register_(base_register), offset_(offset) { }
    967   ~ValOffsetRule() { }
    968   bool Handle(Handler *handler, uint64 address, int reg) const {
    969     return handler->ValOffsetRule(address, reg, base_register_, offset_);
    970   }
    971   bool operator==(const Rule &rhs) const {
    972     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
    973     // been carefully considered; cheap RTTI-like workarounds are forbidden.
    974     const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs);
    975     return (our_rhs &&
    976             base_register_ == our_rhs->base_register_ &&
    977             offset_ == our_rhs->offset_);
    978   }
    979   Rule *Copy() const { return new ValOffsetRule(*this); }
    980   void SetBaseRegister(unsigned reg) { base_register_ = reg; }
    981   void SetOffset(long long offset) { offset_ = offset; }
    982  private:
    983   int base_register_;
    984   long offset_;
    985 };
    986 
    987 // Rule: the register has been saved in another register REGISTER_NUMBER_.
    988 class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
    989  public:
    990   explicit RegisterRule(int register_number)
    991       : register_number_(register_number) { }
    992   ~RegisterRule() { }
    993   bool Handle(Handler *handler, uint64 address, int reg) const {
    994     return handler->RegisterRule(address, reg, register_number_);
    995   }
    996   bool operator==(const Rule &rhs) const {
    997     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
    998     // been carefully considered; cheap RTTI-like workarounds are forbidden.
    999     const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs);
   1000     return (our_rhs && register_number_ == our_rhs->register_number_);
   1001   }
   1002   Rule *Copy() const { return new RegisterRule(*this); }
   1003  private:
   1004   int register_number_;
   1005 };
   1006 
   1007 // Rule: EXPRESSION evaluates to the address at which the register is saved.
   1008 class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
   1009  public:
   1010   explicit ExpressionRule(const string &expression)
   1011       : expression_(expression) { }
   1012   ~ExpressionRule() { }
   1013   bool Handle(Handler *handler, uint64 address, int reg) const {
   1014     return handler->ExpressionRule(address, reg, expression_);
   1015   }
   1016   bool operator==(const Rule &rhs) const {
   1017     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
   1018     // been carefully considered; cheap RTTI-like workarounds are forbidden.
   1019     const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs);
   1020     return (our_rhs && expression_ == our_rhs->expression_);
   1021   }
   1022   Rule *Copy() const { return new ExpressionRule(*this); }
   1023  private:
   1024   string expression_;
   1025 };
   1026 
   1027 // Rule: EXPRESSION evaluates to the address at which the register is saved.
   1028 class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
   1029  public:
   1030   explicit ValExpressionRule(const string &expression)
   1031       : expression_(expression) { }
   1032   ~ValExpressionRule() { }
   1033   bool Handle(Handler *handler, uint64 address, int reg) const {
   1034     return handler->ValExpressionRule(address, reg, expression_);
   1035   }
   1036   bool operator==(const Rule &rhs) const {
   1037     // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
   1038     // been carefully considered; cheap RTTI-like workarounds are forbidden.
   1039     const ValExpressionRule *our_rhs =
   1040         dynamic_cast<const ValExpressionRule *>(&rhs);
   1041     return (our_rhs && expression_ == our_rhs->expression_);
   1042   }
   1043   Rule *Copy() const { return new ValExpressionRule(*this); }
   1044  private:
   1045   string expression_;
   1046 };
   1047 
   1048 // A map from register numbers to rules.
   1049 class CallFrameInfo::RuleMap {
   1050  public:
   1051   RuleMap() : cfa_rule_(NULL) { }
   1052   RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; }
   1053   ~RuleMap() { Clear(); }
   1054 
   1055   RuleMap &operator=(const RuleMap &rhs);
   1056 
   1057   // Set the rule for computing the CFA to RULE. Take ownership of RULE.
   1058   void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; }
   1059 
   1060   // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
   1061   // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
   1062   // DW_CFA_def_cfa_register, and for detecting references to the CFA before
   1063   // a rule for it has been established.
   1064   Rule *CFARule() const { return cfa_rule_; }
   1065 
   1066   // Return the rule for REG, or NULL if there is none. The caller takes
   1067   // ownership of the result.
   1068   Rule *RegisterRule(int reg) const;
   1069 
   1070   // Set the rule for computing REG to RULE. Take ownership of RULE.
   1071   void SetRegisterRule(int reg, Rule *rule);
   1072 
   1073   // Make all the appropriate calls to HANDLER as if we were changing from
   1074   // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
   1075   // DW_CFA_restore_state, where lots of rules can change simultaneously.
   1076   // Return true if all handlers returned true; otherwise, return false.
   1077   bool HandleTransitionTo(Handler *handler, uint64 address,
   1078                           const RuleMap &new_rules) const;
   1079 
   1080  private:
   1081   // A map from register numbers to Rules.
   1082   typedef std::map<int, Rule *> RuleByNumber;
   1083 
   1084   // Remove all register rules and clear cfa_rule_.
   1085   void Clear();
   1086 
   1087   // The rule for computing the canonical frame address. This RuleMap owns
   1088   // this rule.
   1089   Rule *cfa_rule_;
   1090 
   1091   // A map from register numbers to postfix expressions to recover
   1092   // their values. This RuleMap owns the Rules the map refers to.
   1093   RuleByNumber registers_;
   1094 };
   1095 
   1096 CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) {
   1097   Clear();
   1098   // Since each map owns the rules it refers to, assignment must copy them.
   1099   if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
   1100   for (RuleByNumber::const_iterator it = rhs.registers_.begin();
   1101        it != rhs.registers_.end(); it++)
   1102     registers_[it->first] = it->second->Copy();
   1103   return *this;
   1104 }
   1105 
   1106 CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const {
   1107   assert(reg != Handler::kCFARegister);
   1108   RuleByNumber::const_iterator it = registers_.find(reg);
   1109   if (it != registers_.end())
   1110     return it->second->Copy();
   1111   else
   1112     return NULL;
   1113 }
   1114 
   1115 void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) {
   1116   assert(reg != Handler::kCFARegister);
   1117   assert(rule);
   1118   Rule **slot = &registers_[reg];
   1119   delete *slot;
   1120   *slot = rule;
   1121 }
   1122 
   1123 bool CallFrameInfo::RuleMap::HandleTransitionTo(
   1124     Handler *handler,
   1125     uint64 address,
   1126     const RuleMap &new_rules) const {
   1127   // Transition from cfa_rule_ to new_rules.cfa_rule_.
   1128   if (cfa_rule_ && new_rules.cfa_rule_) {
   1129     if (*cfa_rule_ != *new_rules.cfa_rule_ &&
   1130         !new_rules.cfa_rule_->Handle(handler, address,
   1131                                      Handler::kCFARegister))
   1132       return false;
   1133   } else if (cfa_rule_) {
   1134     // this RuleMap has a CFA rule but new_rules doesn't.
   1135     // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
   1136     // it's garbage input. The instruction interpreter should have
   1137     // detected this and warned, so take no action here.
   1138   } else if (new_rules.cfa_rule_) {
   1139     // This shouldn't be possible: NEW_RULES is some prior state, and
   1140     // there's no way to remove entries.
   1141     assert(0);
   1142   } else {
   1143     // Both CFA rules are empty.  No action needed.
   1144   }
   1145 
   1146   // Traverse the two maps in order by register number, and report
   1147   // whatever differences we find.
   1148   RuleByNumber::const_iterator old_it = registers_.begin();
   1149   RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
   1150   while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
   1151     if (old_it->first < new_it->first) {
   1152       // This RuleMap has an entry for old_it->first, but NEW_RULES
   1153       // doesn't.
   1154       //
   1155       // This isn't really the right thing to do, but since CFI generally
   1156       // only mentions callee-saves registers, and GCC's convention for
   1157       // callee-saves registers is that they are unchanged, it's a good
   1158       // approximation.
   1159       if (!handler->SameValueRule(address, old_it->first))
   1160         return false;
   1161       old_it++;
   1162     } else if (old_it->first > new_it->first) {
   1163       // NEW_RULES has entry for new_it->first, but this RuleMap
   1164       // doesn't. This shouldn't be possible: NEW_RULES is some prior
   1165       // state, and there's no way to remove entries.
   1166       assert(0);
   1167     } else {
   1168       // Both maps have an entry for this register. Report the new
   1169       // rule if it is different.
   1170       if (*old_it->second != *new_it->second &&
   1171           !new_it->second->Handle(handler, address, new_it->first))
   1172         return false;
   1173       new_it++, old_it++;
   1174     }
   1175   }
   1176   // Finish off entries from this RuleMap with no counterparts in new_rules.
   1177   while (old_it != registers_.end()) {
   1178     if (!handler->SameValueRule(address, old_it->first))
   1179       return false;
   1180     old_it++;
   1181   }
   1182   // Since we only make transitions from a rule set to some previously
   1183   // saved rule set, and we can only add rules to the map, NEW_RULES
   1184   // must have fewer rules than *this.
   1185   assert(new_it == new_rules.registers_.end());
   1186 
   1187   return true;
   1188 }
   1189 
   1190 // Remove all register rules and clear cfa_rule_.
   1191 void CallFrameInfo::RuleMap::Clear() {
   1192   delete cfa_rule_;
   1193   cfa_rule_ = NULL;
   1194   for (RuleByNumber::iterator it = registers_.begin();
   1195        it != registers_.end(); it++)
   1196     delete it->second;
   1197   registers_.clear();
   1198 }
   1199 
   1200 // The state of the call frame information interpreter as it processes
   1201 // instructions from a CIE and FDE.
   1202 class CallFrameInfo::State {
   1203  public:
   1204   // Create a call frame information interpreter state with the given
   1205   // reporter, reader, handler, and initial call frame info address.
   1206   State(ByteReader *reader, Handler *handler, Reporter *reporter,
   1207         uint64 address)
   1208       : reader_(reader), handler_(handler), reporter_(reporter),
   1209         address_(address), entry_(NULL), cursor_(NULL) { }
   1210 
   1211   // Interpret instructions from CIE, save the resulting rule set for
   1212   // DW_CFA_restore instructions, and return true. On error, report
   1213   // the problem to reporter_ and return false.
   1214   bool InterpretCIE(const CIE &cie);
   1215 
   1216   // Interpret instructions from FDE, and return true. On error,
   1217   // report the problem to reporter_ and return false.
   1218   bool InterpretFDE(const FDE &fde);
   1219 
   1220  private:
   1221   // The operands of a CFI instruction, for ParseOperands.
   1222   struct Operands {
   1223     unsigned register_number;  // A register number.
   1224     uint64 offset;             // An offset or address.
   1225     long signed_offset;        // A signed offset.
   1226     string expression;         // A DWARF expression.
   1227   };
   1228 
   1229   // Parse CFI instruction operands from STATE's instruction stream as
   1230   // described by FORMAT. On success, populate OPERANDS with the
   1231   // results, and return true. On failure, report the problem and
   1232   // return false.
   1233   //
   1234   // Each character of FORMAT should be one of the following:
   1235   //
   1236   //   'r'  unsigned LEB128 register number (OPERANDS->register_number)
   1237   //   'o'  unsigned LEB128 offset          (OPERANDS->offset)
   1238   //   's'  signed LEB128 offset            (OPERANDS->signed_offset)
   1239   //   'a'  machine-size address            (OPERANDS->offset)
   1240   //        (If the CIE has a 'z' augmentation string, 'a' uses the
   1241   //        encoding specified by the 'R' argument.)
   1242   //   '1'  a one-byte offset               (OPERANDS->offset)
   1243   //   '2'  a two-byte offset               (OPERANDS->offset)
   1244   //   '4'  a four-byte offset              (OPERANDS->offset)
   1245   //   '8'  an eight-byte offset            (OPERANDS->offset)
   1246   //   'e'  a DW_FORM_block holding a       (OPERANDS->expression)
   1247   //        DWARF expression
   1248   bool ParseOperands(const char *format, Operands *operands);
   1249 
   1250   // Interpret one CFI instruction from STATE's instruction stream, update
   1251   // STATE, report any rule changes to handler_, and return true. On
   1252   // failure, report the problem and return false.
   1253   bool DoInstruction();
   1254 
   1255   // The following Do* member functions are subroutines of DoInstruction,
   1256   // factoring out the actual work of operations that have several
   1257   // different encodings.
   1258 
   1259   // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
   1260   // return true. On failure, report and return false. (Used for
   1261   // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
   1262   bool DoDefCFA(unsigned base_register, long offset);
   1263 
   1264   // Change the offset of the CFA rule to OFFSET, and return true. On
   1265   // failure, report and return false. (Subroutine for
   1266   // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
   1267   bool DoDefCFAOffset(long offset);
   1268 
   1269   // Specify that REG can be recovered using RULE, and return true. On
   1270   // failure, report and return false.
   1271   bool DoRule(unsigned reg, Rule *rule);
   1272 
   1273   // Specify that REG can be found at OFFSET from the CFA, and return true.
   1274   // On failure, report and return false. (Subroutine for DW_CFA_offset,
   1275   // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
   1276   bool DoOffset(unsigned reg, long offset);
   1277 
   1278   // Specify that the caller's value for REG is the CFA plus OFFSET,
   1279   // and return true. On failure, report and return false. (Subroutine
   1280   // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
   1281   bool DoValOffset(unsigned reg, long offset);
   1282 
   1283   // Restore REG to the rule established in the CIE, and return true. On
   1284   // failure, report and return false. (Subroutine for DW_CFA_restore and
   1285   // DW_CFA_restore_extended.)
   1286   bool DoRestore(unsigned reg);
   1287 
   1288   // Return the section offset of the instruction at cursor. For use
   1289   // in error messages.
   1290   uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
   1291 
   1292   // Report that entry_ is incomplete, and return false. For brevity.
   1293   bool ReportIncomplete() {
   1294     reporter_->Incomplete(entry_->offset, entry_->kind);
   1295     return false;
   1296   }
   1297 
   1298   // For reading multi-byte values with the appropriate endianness.
   1299   ByteReader *reader_;
   1300 
   1301   // The handler to which we should report the data we find.
   1302   Handler *handler_;
   1303 
   1304   // For reporting problems in the info we're parsing.
   1305   Reporter *reporter_;
   1306 
   1307   // The code address to which the next instruction in the stream applies.
   1308   uint64 address_;
   1309 
   1310   // The entry whose instructions we are currently processing. This is
   1311   // first a CIE, and then an FDE.
   1312   const Entry *entry_;
   1313 
   1314   // The next instruction to process.
   1315   const char *cursor_;
   1316 
   1317   // The current set of rules.
   1318   RuleMap rules_;
   1319 
   1320   // The set of rules established by the CIE, used by DW_CFA_restore
   1321   // and DW_CFA_restore_extended. We set this after interpreting the
   1322   // CIE's instructions.
   1323   RuleMap cie_rules_;
   1324 
   1325   // A stack of saved states, for DW_CFA_remember_state and
   1326   // DW_CFA_restore_state.
   1327   std::stack<RuleMap> saved_rules_;
   1328 };
   1329 
   1330 bool CallFrameInfo::State::InterpretCIE(const CIE &cie) {
   1331   entry_ = &cie;
   1332   cursor_ = entry_->instructions;
   1333   while (cursor_ < entry_->end)
   1334     if (!DoInstruction())
   1335       return false;
   1336   // Note the rules established by the CIE, for use by DW_CFA_restore
   1337   // and DW_CFA_restore_extended.
   1338   cie_rules_ = rules_;
   1339   return true;
   1340 }
   1341 
   1342 bool CallFrameInfo::State::InterpretFDE(const FDE &fde) {
   1343   entry_ = &fde;
   1344   cursor_ = entry_->instructions;
   1345   while (cursor_ < entry_->end)
   1346     if (!DoInstruction())
   1347       return false;
   1348   return true;
   1349 }
   1350 
   1351 bool CallFrameInfo::State::ParseOperands(const char *format,
   1352                                          Operands *operands) {
   1353   size_t len;
   1354   const char *operand;
   1355 
   1356   for (operand = format; *operand; operand++) {
   1357     size_t bytes_left = entry_->end - cursor_;
   1358     switch (*operand) {
   1359       case 'r':
   1360         operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
   1361         if (len > bytes_left) return ReportIncomplete();
   1362         cursor_ += len;
   1363         break;
   1364 
   1365       case 'o':
   1366         operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
   1367         if (len > bytes_left) return ReportIncomplete();
   1368         cursor_ += len;
   1369         break;
   1370 
   1371       case 's':
   1372         operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
   1373         if (len > bytes_left) return ReportIncomplete();
   1374         cursor_ += len;
   1375         break;
   1376 
   1377       case 'a':
   1378         operands->offset =
   1379           reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
   1380                                       &len);
   1381         if (len > bytes_left) return ReportIncomplete();
   1382         cursor_ += len;
   1383         break;
   1384 
   1385       case '1':
   1386         if (1 > bytes_left) return ReportIncomplete();
   1387         operands->offset = static_cast<unsigned char>(*cursor_++);
   1388         break;
   1389 
   1390       case '2':
   1391         if (2 > bytes_left) return ReportIncomplete();
   1392         operands->offset = reader_->ReadTwoBytes(cursor_);
   1393         cursor_ += 2;
   1394         break;
   1395 
   1396       case '4':
   1397         if (4 > bytes_left) return ReportIncomplete();
   1398         operands->offset = reader_->ReadFourBytes(cursor_);
   1399         cursor_ += 4;
   1400         break;
   1401 
   1402       case '8':
   1403         if (8 > bytes_left) return ReportIncomplete();
   1404         operands->offset = reader_->ReadEightBytes(cursor_);
   1405         cursor_ += 8;
   1406         break;
   1407 
   1408       case 'e': {
   1409         size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
   1410         if (len > bytes_left || expression_length > bytes_left - len)
   1411           return ReportIncomplete();
   1412         cursor_ += len;
   1413         operands->expression = string(cursor_, expression_length);
   1414         cursor_ += expression_length;
   1415         break;
   1416       }
   1417 
   1418       default:
   1419           assert(0);
   1420     }
   1421   }
   1422 
   1423   return true;
   1424 }
   1425 
   1426 bool CallFrameInfo::State::DoInstruction() {
   1427   CIE *cie = entry_->cie;
   1428   Operands ops;
   1429 
   1430   // Our entry's kind should have been set by now.
   1431   assert(entry_->kind != kUnknown);
   1432 
   1433   // We shouldn't have been invoked unless there were more
   1434   // instructions to parse.
   1435   assert(cursor_ < entry_->end);
   1436 
   1437   unsigned opcode = *cursor_++;
   1438   if ((opcode & 0xc0) != 0) {
   1439     switch (opcode & 0xc0) {
   1440       // Advance the address.
   1441       case DW_CFA_advance_loc: {
   1442         size_t code_offset = opcode & 0x3f;
   1443         address_ += code_offset * cie->code_alignment_factor;
   1444         break;
   1445       }
   1446 
   1447       // Find a register at an offset from the CFA.
   1448       case DW_CFA_offset:
   1449         if (!ParseOperands("o", &ops) ||
   1450             !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
   1451           return false;
   1452         break;
   1453 
   1454       // Restore the rule established for a register by the CIE.
   1455       case DW_CFA_restore:
   1456         if (!DoRestore(opcode & 0x3f)) return false;
   1457         break;
   1458 
   1459       // The 'if' above should have excluded this possibility.
   1460       default:
   1461         assert(0);
   1462     }
   1463 
   1464     // Return here, so the big switch below won't be indented.
   1465     return true;
   1466   }
   1467 
   1468   switch (opcode) {
   1469     // Set the address.
   1470     case DW_CFA_set_loc:
   1471       if (!ParseOperands("a", &ops)) return false;
   1472       address_ = ops.offset;
   1473       break;
   1474 
   1475     // Advance the address.
   1476     case DW_CFA_advance_loc1:
   1477       if (!ParseOperands("1", &ops)) return false;
   1478       address_ += ops.offset * cie->code_alignment_factor;
   1479       break;
   1480 
   1481     // Advance the address.
   1482     case DW_CFA_advance_loc2:
   1483       if (!ParseOperands("2", &ops)) return false;
   1484       address_ += ops.offset * cie->code_alignment_factor;
   1485       break;
   1486 
   1487     // Advance the address.
   1488     case DW_CFA_advance_loc4:
   1489       if (!ParseOperands("4", &ops)) return false;
   1490       address_ += ops.offset * cie->code_alignment_factor;
   1491       break;
   1492 
   1493     // Advance the address.
   1494     case DW_CFA_MIPS_advance_loc8:
   1495       if (!ParseOperands("8", &ops)) return false;
   1496       address_ += ops.offset * cie->code_alignment_factor;
   1497       break;
   1498 
   1499     // Compute the CFA by adding an offset to a register.
   1500     case DW_CFA_def_cfa:
   1501       if (!ParseOperands("ro", &ops) ||
   1502           !DoDefCFA(ops.register_number, ops.offset))
   1503         return false;
   1504       break;
   1505 
   1506     // Compute the CFA by adding an offset to a register.
   1507     case DW_CFA_def_cfa_sf:
   1508       if (!ParseOperands("rs", &ops) ||
   1509           !DoDefCFA(ops.register_number,
   1510                     ops.signed_offset * cie->data_alignment_factor))
   1511         return false;
   1512       break;
   1513 
   1514     // Change the base register used to compute the CFA.
   1515     case DW_CFA_def_cfa_register: {
   1516       if (!ParseOperands("r", &ops)) return false;
   1517       Rule *cfa_rule = rules_.CFARule();
   1518       if (!cfa_rule) {
   1519         if (!DoDefCFA(ops.register_number, ops.offset)) {
   1520           reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
   1521           return false;
   1522         }
   1523       } else {
   1524         cfa_rule->SetBaseRegister(ops.register_number);
   1525         if (!cfa_rule->Handle(handler_, address_,
   1526                               Handler::kCFARegister))
   1527         return false;
   1528       }
   1529       break;
   1530     }
   1531 
   1532     // Change the offset used to compute the CFA.
   1533     case DW_CFA_def_cfa_offset:
   1534       if (!ParseOperands("o", &ops) ||
   1535           !DoDefCFAOffset(ops.offset))
   1536         return false;
   1537       break;
   1538 
   1539     // Change the offset used to compute the CFA.
   1540     case DW_CFA_def_cfa_offset_sf:
   1541       if (!ParseOperands("s", &ops) ||
   1542           !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
   1543         return false;
   1544       break;
   1545 
   1546     // Specify an expression whose value is the CFA.
   1547     case DW_CFA_def_cfa_expression: {
   1548       if (!ParseOperands("e", &ops))
   1549         return false;
   1550       Rule *rule = new ValExpressionRule(ops.expression);
   1551       rules_.SetCFARule(rule);
   1552       if (!rule->Handle(handler_, address_,
   1553                         Handler::kCFARegister))
   1554         return false;
   1555       break;
   1556     }
   1557 
   1558     // The register's value cannot be recovered.
   1559     case DW_CFA_undefined: {
   1560       if (!ParseOperands("r", &ops) ||
   1561           !DoRule(ops.register_number, new UndefinedRule()))
   1562         return false;
   1563       break;
   1564     }
   1565 
   1566     // The register's value is unchanged from its value in the caller.
   1567     case DW_CFA_same_value: {
   1568       if (!ParseOperands("r", &ops) ||
   1569           !DoRule(ops.register_number, new SameValueRule()))
   1570         return false;
   1571       break;
   1572     }
   1573 
   1574     // Find a register at an offset from the CFA.
   1575     case DW_CFA_offset_extended:
   1576       if (!ParseOperands("ro", &ops) ||
   1577           !DoOffset(ops.register_number,
   1578                     ops.offset * cie->data_alignment_factor))
   1579         return false;
   1580       break;
   1581 
   1582     // The register is saved at an offset from the CFA.
   1583     case DW_CFA_offset_extended_sf:
   1584       if (!ParseOperands("rs", &ops) ||
   1585           !DoOffset(ops.register_number,
   1586                     ops.signed_offset * cie->data_alignment_factor))
   1587         return false;
   1588       break;
   1589 
   1590     // The register is saved at an offset from the CFA.
   1591     case DW_CFA_GNU_negative_offset_extended:
   1592       if (!ParseOperands("ro", &ops) ||
   1593           !DoOffset(ops.register_number,
   1594                     -ops.offset * cie->data_alignment_factor))
   1595         return false;
   1596       break;
   1597 
   1598     // The register's value is the sum of the CFA plus an offset.
   1599     case DW_CFA_val_offset:
   1600       if (!ParseOperands("ro", &ops) ||
   1601           !DoValOffset(ops.register_number,
   1602                        ops.offset * cie->data_alignment_factor))
   1603         return false;
   1604       break;
   1605 
   1606     // The register's value is the sum of the CFA plus an offset.
   1607     case DW_CFA_val_offset_sf:
   1608       if (!ParseOperands("rs", &ops) ||
   1609           !DoValOffset(ops.register_number,
   1610                        ops.signed_offset * cie->data_alignment_factor))
   1611         return false;
   1612       break;
   1613 
   1614     // The register has been saved in another register.
   1615     case DW_CFA_register: {
   1616       if (!ParseOperands("ro", &ops) ||
   1617           !DoRule(ops.register_number, new RegisterRule(ops.offset)))
   1618         return false;
   1619       break;
   1620     }
   1621 
   1622     // An expression yields the address at which the register is saved.
   1623     case DW_CFA_expression: {
   1624       if (!ParseOperands("re", &ops) ||
   1625           !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
   1626         return false;
   1627       break;
   1628     }
   1629 
   1630     // An expression yields the caller's value for the register.
   1631     case DW_CFA_val_expression: {
   1632       if (!ParseOperands("re", &ops) ||
   1633           !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
   1634         return false;
   1635       break;
   1636     }
   1637 
   1638     // Restore the rule established for a register by the CIE.
   1639     case DW_CFA_restore_extended:
   1640       if (!ParseOperands("r", &ops) ||
   1641           !DoRestore( ops.register_number))
   1642         return false;
   1643       break;
   1644 
   1645     // Save the current set of rules on a stack.
   1646     case DW_CFA_remember_state:
   1647       saved_rules_.push(rules_);
   1648       break;
   1649 
   1650     // Pop the current set of rules off the stack.
   1651     case DW_CFA_restore_state: {
   1652       if (saved_rules_.empty()) {
   1653         reporter_->EmptyStateStack(entry_->offset, entry_->kind,
   1654                                    CursorOffset());
   1655         return false;
   1656       }
   1657       const RuleMap &new_rules = saved_rules_.top();
   1658       if (rules_.CFARule() && !new_rules.CFARule()) {
   1659         reporter_->ClearingCFARule(entry_->offset, entry_->kind,
   1660                                    CursorOffset());
   1661         return false;
   1662       }
   1663       rules_.HandleTransitionTo(handler_, address_, new_rules);
   1664       rules_ = new_rules;
   1665       saved_rules_.pop();
   1666       break;
   1667     }
   1668 
   1669     // No operation.  (Padding instruction.)
   1670     case DW_CFA_nop:
   1671       break;
   1672 
   1673     // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
   1674     // are saved in registers 24 through 31 (%i0-%i7), and registers
   1675     // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
   1676     // (0-15 * the register size). The register numbers must be
   1677     // hard-coded. A GNU extension, and not a pretty one.
   1678     case DW_CFA_GNU_window_save: {
   1679       // Save %o0-%o7 in %i0-%i7.
   1680       for (int i = 8; i < 16; i++)
   1681         if (!DoRule(i, new RegisterRule(i + 16)))
   1682           return false;
   1683       // Save %l0-%l7 and %i0-%i7 at the CFA.
   1684       for (int i = 16; i < 32; i++)
   1685         // Assume that the byte reader's address size is the same as
   1686         // the architecture's register size. !@#%*^ hilarious.
   1687         if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
   1688                                       (i - 16) * reader_->AddressSize())))
   1689           return false;
   1690       break;
   1691     }
   1692 
   1693     // I'm not sure what this is. GDB doesn't use it for unwinding.
   1694     case DW_CFA_GNU_args_size:
   1695       if (!ParseOperands("o", &ops)) return false;
   1696       break;
   1697 
   1698     // An opcode we don't recognize.
   1699     default: {
   1700       reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
   1701       return false;
   1702     }
   1703   }
   1704 
   1705   return true;
   1706 }
   1707 
   1708 bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
   1709   Rule *rule = new ValOffsetRule(base_register, offset);
   1710   rules_.SetCFARule(rule);
   1711   return rule->Handle(handler_, address_,
   1712                       Handler::kCFARegister);
   1713 }
   1714 
   1715 bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
   1716   Rule *cfa_rule = rules_.CFARule();
   1717   if (!cfa_rule) {
   1718     reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
   1719     return false;
   1720   }
   1721   cfa_rule->SetOffset(offset);
   1722   return cfa_rule->Handle(handler_, address_,
   1723                           Handler::kCFARegister);
   1724 }
   1725 
   1726 bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) {
   1727   rules_.SetRegisterRule(reg, rule);
   1728   return rule->Handle(handler_, address_, reg);
   1729 }
   1730 
   1731 bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
   1732   if (!rules_.CFARule()) {
   1733     reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
   1734     return false;
   1735   }
   1736   return DoRule(reg,
   1737                 new OffsetRule(Handler::kCFARegister, offset));
   1738 }
   1739 
   1740 bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
   1741   if (!rules_.CFARule()) {
   1742     reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
   1743     return false;
   1744   }
   1745   return DoRule(reg,
   1746                 new ValOffsetRule(Handler::kCFARegister, offset));
   1747 }
   1748 
   1749 bool CallFrameInfo::State::DoRestore(unsigned reg) {
   1750   // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
   1751   if (entry_->kind == kCIE) {
   1752     reporter_->RestoreInCIE(entry_->offset, CursorOffset());
   1753     return false;
   1754   }
   1755   Rule *rule = cie_rules_.RegisterRule(reg);
   1756   if (!rule) {
   1757     // This isn't really the right thing to do, but since CFI generally
   1758     // only mentions callee-saves registers, and GCC's convention for
   1759     // callee-saves registers is that they are unchanged, it's a good
   1760     // approximation.
   1761     rule = new SameValueRule();
   1762   }
   1763   return DoRule(reg, rule);
   1764 }
   1765 
   1766 bool CallFrameInfo::ReadEntryPrologue(const char *cursor, Entry *entry) {
   1767   const char *buffer_end = buffer_ + buffer_length_;
   1768 
   1769   // Initialize enough of ENTRY for use in error reporting.
   1770   entry->offset = cursor - buffer_;
   1771   entry->start = cursor;
   1772   entry->kind = kUnknown;
   1773   entry->end = NULL;
   1774 
   1775   // Read the initial length. This sets reader_'s offset size.
   1776   size_t length_size;
   1777   uint64 length = reader_->ReadInitialLength(cursor, &length_size);
   1778   if (length_size > size_t(buffer_end - cursor))
   1779     return ReportIncomplete(entry);
   1780   cursor += length_size;
   1781 
   1782   // In a .eh_frame section, a length of zero marks the end of the series
   1783   // of entries.
   1784   if (length == 0 && eh_frame_) {
   1785     entry->kind = kTerminator;
   1786     entry->end = cursor;
   1787     return true;
   1788   }
   1789 
   1790   // Validate the length.
   1791   if (length > size_t(buffer_end - cursor))
   1792     return ReportIncomplete(entry);
   1793 
   1794   // The length is the number of bytes after the initial length field;
   1795   // we have that position handy at this point, so compute the end
   1796   // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
   1797   // and the length didn't fit in a size_t, we would have rejected it
   1798   // above.)
   1799   entry->end = cursor + length;
   1800 
   1801   // Parse the next field: either the offset of a CIE or a CIE id.
   1802   size_t offset_size = reader_->OffsetSize();
   1803   if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
   1804   entry->id = reader_->ReadOffset(cursor);
   1805 
   1806   // Don't advance cursor past id field yet; in .eh_frame data we need
   1807   // the id's position to compute the section offset of an FDE's CIE.
   1808 
   1809   // Now we can decide what kind of entry this is.
   1810   if (eh_frame_) {
   1811     // In .eh_frame data, an ID of zero marks the entry as a CIE, and
   1812     // anything else is an offset from the id field of the FDE to the start
   1813     // of the CIE.
   1814     if (entry->id == 0) {
   1815       entry->kind = kCIE;
   1816     } else {
   1817       entry->kind = kFDE;
   1818       // Turn the offset from the id into an offset from the buffer's start.
   1819       entry->id = (cursor - buffer_) - entry->id;
   1820     }
   1821   } else {
   1822     // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
   1823     // offset size for the entry) marks the entry as a CIE, and anything
   1824     // else is the offset of the CIE from the beginning of the section.
   1825     if (offset_size == 4)
   1826       entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
   1827     else {
   1828       assert(offset_size == 8);
   1829       entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
   1830     }
   1831   }
   1832 
   1833   // Now advance cursor past the id.
   1834    cursor += offset_size;
   1835 
   1836   // The fields specific to this kind of entry start here.
   1837   entry->fields = cursor;
   1838 
   1839   entry->cie = NULL;
   1840 
   1841   return true;
   1842 }
   1843 
   1844 bool CallFrameInfo::ReadCIEFields(CIE *cie) {
   1845   const char *cursor = cie->fields;
   1846   size_t len;
   1847 
   1848   assert(cie->kind == kCIE);
   1849 
   1850   // Prepare for early exit.
   1851   cie->version = 0;
   1852   cie->augmentation.clear();
   1853   cie->code_alignment_factor = 0;
   1854   cie->data_alignment_factor = 0;
   1855   cie->return_address_register = 0;
   1856   cie->has_z_augmentation = false;
   1857   cie->pointer_encoding = DW_EH_PE_absptr;
   1858   cie->instructions = 0;
   1859 
   1860   // Parse the version number.
   1861   if (cie->end - cursor < 1)
   1862     return ReportIncomplete(cie);
   1863   cie->version = reader_->ReadOneByte(cursor);
   1864   cursor++;
   1865 
   1866   // If we don't recognize the version, we can't parse any more fields of the
   1867   // CIE. For DWARF CFI, we handle versions 1 through 3 (there was never a
   1868   // version 2 of CFI data). For .eh_frame, we handle versions 1 and 3 as well;
   1869   // the difference between those versions seems to be the same as for
   1870   // .debug_frame.
   1871   if (cie->version < 1 || cie->version > 3) {
   1872     reporter_->UnrecognizedVersion(cie->offset, cie->version);
   1873     return false;
   1874   }
   1875 
   1876   const char *augmentation_start = cursor;
   1877   const void *augmentation_end =
   1878       memchr(augmentation_start, '\0', cie->end - augmentation_start);
   1879   if (! augmentation_end) return ReportIncomplete(cie);
   1880   cursor = static_cast<const char *>(augmentation_end);
   1881   cie->augmentation = string(augmentation_start,
   1882                                   cursor - augmentation_start);
   1883   // Skip the terminating '\0'.
   1884   cursor++;
   1885 
   1886   // Is this CFI augmented?
   1887   if (!cie->augmentation.empty()) {
   1888     // Is it an augmentation we recognize?
   1889     if (cie->augmentation[0] == DW_Z_augmentation_start) {
   1890       // Linux C++ ABI 'z' augmentation, used for exception handling data.
   1891       cie->has_z_augmentation = true;
   1892     } else {
   1893       // Not an augmentation we recognize. Augmentations can have arbitrary
   1894       // effects on the form of rest of the content, so we have to give up.
   1895       reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
   1896       return false;
   1897     }
   1898   }
   1899 
   1900   // Parse the code alignment factor.
   1901   cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
   1902   if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
   1903   cursor += len;
   1904 
   1905   // Parse the data alignment factor.
   1906   cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
   1907   if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
   1908   cursor += len;
   1909 
   1910   // Parse the return address register. This is a ubyte in version 1, and
   1911   // a ULEB128 in version 3.
   1912   if (cie->version == 1) {
   1913     if (cursor >= cie->end) return ReportIncomplete(cie);
   1914     cie->return_address_register = uint8(*cursor++);
   1915   } else {
   1916     cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
   1917     if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
   1918     cursor += len;
   1919   }
   1920 
   1921   // If we have a 'z' augmentation string, find the augmentation data and
   1922   // use the augmentation string to parse it.
   1923   if (cie->has_z_augmentation) {
   1924     uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
   1925     if (size_t(cie->end - cursor) < len + data_size)
   1926       return ReportIncomplete(cie);
   1927     cursor += len;
   1928     const char *data = cursor;
   1929     cursor += data_size;
   1930     const char *data_end = cursor;
   1931 
   1932     cie->has_z_lsda = false;
   1933     cie->has_z_personality = false;
   1934     cie->has_z_signal_frame = false;
   1935 
   1936     // Walk the augmentation string, and extract values from the
   1937     // augmentation data as the string directs.
   1938     for (size_t i = 1; i < cie->augmentation.size(); i++) {
   1939       switch (cie->augmentation[i]) {
   1940         case DW_Z_has_LSDA:
   1941           // The CIE's augmentation data holds the language-specific data
   1942           // area pointer's encoding, and the FDE's augmentation data holds
   1943           // the pointer itself.
   1944           cie->has_z_lsda = true;
   1945           // Fetch the LSDA encoding from the augmentation data.
   1946           if (data >= data_end) return ReportIncomplete(cie);
   1947           cie->lsda_encoding = DwarfPointerEncoding(*data++);
   1948           if (!reader_->ValidEncoding(cie->lsda_encoding)) {
   1949             reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
   1950             return false;
   1951           }
   1952           // Don't check if the encoding is usable here --- we haven't
   1953           // read the FDE's fields yet, so we're not prepared for
   1954           // DW_EH_PE_funcrel, although that's a fine encoding for the
   1955           // LSDA to use, since it appears in the FDE.
   1956           break;
   1957 
   1958         case DW_Z_has_personality_routine:
   1959           // The CIE's augmentation data holds the personality routine
   1960           // pointer's encoding, followed by the pointer itself.
   1961           cie->has_z_personality = true;
   1962           // Fetch the personality routine pointer's encoding from the
   1963           // augmentation data.
   1964           if (data >= data_end) return ReportIncomplete(cie);
   1965           cie->personality_encoding = DwarfPointerEncoding(*data++);
   1966           if (!reader_->ValidEncoding(cie->personality_encoding)) {
   1967             reporter_->InvalidPointerEncoding(cie->offset,
   1968                                               cie->personality_encoding);
   1969             return false;
   1970           }
   1971           if (!reader_->UsableEncoding(cie->personality_encoding)) {
   1972             reporter_->UnusablePointerEncoding(cie->offset,
   1973                                                cie->personality_encoding);
   1974             return false;
   1975           }
   1976           // Fetch the personality routine's pointer itself from the data.
   1977           cie->personality_address =
   1978             reader_->ReadEncodedPointer(data, cie->personality_encoding,
   1979                                         &len);
   1980           if (len > size_t(data_end - data))
   1981             return ReportIncomplete(cie);
   1982           data += len;
   1983           break;
   1984 
   1985         case DW_Z_has_FDE_address_encoding:
   1986           // The CIE's augmentation data holds the pointer encoding to use
   1987           // for addresses in the FDE.
   1988           if (data >= data_end) return ReportIncomplete(cie);
   1989           cie->pointer_encoding = DwarfPointerEncoding(*data++);
   1990           if (!reader_->ValidEncoding(cie->pointer_encoding)) {
   1991             reporter_->InvalidPointerEncoding(cie->offset,
   1992                                               cie->pointer_encoding);
   1993             return false;
   1994           }
   1995           if (!reader_->UsableEncoding(cie->pointer_encoding)) {
   1996             reporter_->UnusablePointerEncoding(cie->offset,
   1997                                                cie->pointer_encoding);
   1998             return false;
   1999           }
   2000           break;
   2001 
   2002         case DW_Z_is_signal_trampoline:
   2003           // Frames using this CIE are signal delivery frames.
   2004           cie->has_z_signal_frame = true;
   2005           break;
   2006 
   2007         default:
   2008           // An augmentation we don't recognize.
   2009           reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
   2010           return false;
   2011       }
   2012     }
   2013   }
   2014 
   2015   // The CIE's instructions start here.
   2016   cie->instructions = cursor;
   2017 
   2018   return true;
   2019 }
   2020 
   2021 bool CallFrameInfo::ReadFDEFields(FDE *fde) {
   2022   const char *cursor = fde->fields;
   2023   size_t size;
   2024 
   2025   fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
   2026                                              &size);
   2027   if (size > size_t(fde->end - cursor))
   2028     return ReportIncomplete(fde);
   2029   cursor += size;
   2030   reader_->SetFunctionBase(fde->address);
   2031 
   2032   // For the length, we strip off the upper nybble of the encoding used for
   2033   // the starting address.
   2034   DwarfPointerEncoding length_encoding =
   2035     DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
   2036   fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
   2037   if (size > size_t(fde->end - cursor))
   2038     return ReportIncomplete(fde);
   2039   cursor += size;
   2040 
   2041   // If the CIE has a 'z' augmentation string, then augmentation data
   2042   // appears here.
   2043   if (fde->cie->has_z_augmentation) {
   2044     uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
   2045     if (size_t(fde->end - cursor) < size + data_size)
   2046       return ReportIncomplete(fde);
   2047     cursor += size;
   2048 
   2049     // In the abstract, we should walk the augmentation string, and extract
   2050     // items from the FDE's augmentation data as we encounter augmentation
   2051     // string characters that specify their presence: the ordering of items
   2052     // in the augmentation string determines the arrangement of values in
   2053     // the augmentation data.
   2054     //
   2055     // In practice, there's only ever one value in FDE augmentation data
   2056     // that we support --- the LSDA pointer --- and we have to bail if we
   2057     // see any unrecognized augmentation string characters. So if there is
   2058     // anything here at all, we know what it is, and where it starts.
   2059     if (fde->cie->has_z_lsda) {
   2060       // Check whether the LSDA's pointer encoding is usable now: only once
   2061       // we've parsed the FDE's starting address do we call reader_->
   2062       // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
   2063       // usable.
   2064       if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
   2065         reporter_->UnusablePointerEncoding(fde->cie->offset,
   2066                                            fde->cie->lsda_encoding);
   2067         return false;
   2068       }
   2069 
   2070       fde->lsda_address =
   2071         reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
   2072       if (size > data_size)
   2073         return ReportIncomplete(fde);
   2074       // Ideally, we would also complain here if there were unconsumed
   2075       // augmentation data.
   2076     }
   2077 
   2078     cursor += data_size;
   2079   }
   2080 
   2081   // The FDE's instructions start after those.
   2082   fde->instructions = cursor;
   2083 
   2084   return true;
   2085 }
   2086 
   2087 bool CallFrameInfo::Start() {
   2088   const char *buffer_end = buffer_ + buffer_length_;
   2089   const char *cursor;
   2090   bool all_ok = true;
   2091   const char *entry_end;
   2092   bool ok;
   2093 
   2094   // Traverse all the entries in buffer_, skipping CIEs and offering
   2095   // FDEs to the handler.
   2096   for (cursor = buffer_; cursor < buffer_end;
   2097        cursor = entry_end, all_ok = all_ok && ok) {
   2098     FDE fde;
   2099 
   2100     // Make it easy to skip this entry with 'continue': assume that
   2101     // things are not okay until we've checked all the data, and
   2102     // prepare the address of the next entry.
   2103     ok = false;
   2104 
   2105     // Read the entry's prologue.
   2106     if (!ReadEntryPrologue(cursor, &fde)) {
   2107       if (!fde.end) {
   2108         // If we couldn't even figure out this entry's extent, then we
   2109         // must stop processing entries altogether.
   2110         all_ok = false;
   2111         break;
   2112       }
   2113       entry_end = fde.end;
   2114       continue;
   2115     }
   2116 
   2117     // The next iteration picks up after this entry.
   2118     entry_end = fde.end;
   2119 
   2120     // Did we see an .eh_frame terminating mark?
   2121     if (fde.kind == kTerminator) {
   2122       // If there appears to be more data left in the section after the
   2123       // terminating mark, warn the user. But this is just a warning;
   2124       // we leave all_ok true.
   2125       if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
   2126       break;
   2127     }
   2128 
   2129     // In this loop, we skip CIEs. We only parse them fully when we
   2130     // parse an FDE that refers to them. This limits our memory
   2131     // consumption (beyond the buffer itself) to that needed to
   2132     // process the largest single entry.
   2133     if (fde.kind != kFDE) {
   2134       ok = true;
   2135       continue;
   2136     }
   2137 
   2138     // Validate the CIE pointer.
   2139     if (fde.id > buffer_length_) {
   2140       reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
   2141       continue;
   2142     }
   2143 
   2144     CIE cie;
   2145 
   2146     // Parse this FDE's CIE header.
   2147     if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
   2148       continue;
   2149     // This had better be an actual CIE.
   2150     if (cie.kind != kCIE) {
   2151       reporter_->BadCIEId(fde.offset, fde.id);
   2152       continue;
   2153     }
   2154     if (!ReadCIEFields(&cie))
   2155       continue;
   2156 
   2157     // We now have the values that govern both the CIE and the FDE.
   2158     cie.cie = &cie;
   2159     fde.cie = &cie;
   2160 
   2161     // Parse the FDE's header.
   2162     if (!ReadFDEFields(&fde))
   2163       continue;
   2164 
   2165     // Call Entry to ask the consumer if they're interested.
   2166     if (!handler_->Entry(fde.offset, fde.address, fde.size,
   2167                          cie.version, cie.augmentation,
   2168                          cie.return_address_register)) {
   2169       // The handler isn't interested in this entry. That's not an error.
   2170       ok = true;
   2171       continue;
   2172     }
   2173 
   2174     if (cie.has_z_augmentation) {
   2175       // Report the personality routine address, if we have one.
   2176       if (cie.has_z_personality) {
   2177         if (!handler_
   2178             ->PersonalityRoutine(cie.personality_address,
   2179                                  IsIndirectEncoding(cie.personality_encoding)))
   2180           continue;
   2181       }
   2182 
   2183       // Report the language-specific data area address, if we have one.
   2184       if (cie.has_z_lsda) {
   2185         if (!handler_
   2186             ->LanguageSpecificDataArea(fde.lsda_address,
   2187                                        IsIndirectEncoding(cie.lsda_encoding)))
   2188           continue;
   2189       }
   2190 
   2191       // If this is a signal-handling frame, report that.
   2192       if (cie.has_z_signal_frame) {
   2193         if (!handler_->SignalHandler())
   2194           continue;
   2195       }
   2196     }
   2197 
   2198     // Interpret the CIE's instructions, and then the FDE's instructions.
   2199     State state(reader_, handler_, reporter_, fde.address);
   2200     ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
   2201 
   2202     // Tell the ByteReader that the function start address from the
   2203     // FDE header is no longer valid.
   2204     reader_->ClearFunctionBase();
   2205 
   2206     // Report the end of the entry.
   2207     handler_->End();
   2208   }
   2209 
   2210   return all_ok;
   2211 }
   2212 
   2213 const char *CallFrameInfo::KindName(EntryKind kind) {
   2214   if (kind == CallFrameInfo::kUnknown)
   2215     return "entry";
   2216   else if (kind == CallFrameInfo::kCIE)
   2217     return "common information entry";
   2218   else if (kind == CallFrameInfo::kFDE)
   2219     return "frame description entry";
   2220   else {
   2221     assert (kind == CallFrameInfo::kTerminator);
   2222     return ".eh_frame sequence terminator";
   2223   }
   2224 }
   2225 
   2226 bool CallFrameInfo::ReportIncomplete(Entry *entry) {
   2227   reporter_->Incomplete(entry->offset, entry->kind);
   2228   return false;
   2229 }
   2230 
   2231 void CallFrameInfo::Reporter::Incomplete(uint64 offset,
   2232                                          CallFrameInfo::EntryKind kind) {
   2233   fprintf(stderr,
   2234           "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n",
   2235           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
   2236           section_.c_str());
   2237 }
   2238 
   2239 void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) {
   2240   fprintf(stderr,
   2241           "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker"
   2242           " before end of section contents\n",
   2243           filename_.c_str(), offset, section_.c_str());
   2244 }
   2245 
   2246 void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset,
   2247                                                    uint64 cie_offset) {
   2248   fprintf(stderr,
   2249           "%s: CFI frame description entry at offset 0x%llx in '%s':"
   2250           " CIE pointer is out of range: 0x%llx\n",
   2251           filename_.c_str(), offset, section_.c_str(), cie_offset);
   2252 }
   2253 
   2254 void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) {
   2255   fprintf(stderr,
   2256           "%s: CFI frame description entry at offset 0x%llx in '%s':"
   2257           " CIE pointer does not point to a CIE: 0x%llx\n",
   2258           filename_.c_str(), offset, section_.c_str(), cie_offset);
   2259 }
   2260 
   2261 void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) {
   2262   fprintf(stderr,
   2263           "%s: CFI frame description entry at offset 0x%llx in '%s':"
   2264           " CIE specifies unrecognized version: %d\n",
   2265           filename_.c_str(), offset, section_.c_str(), version);
   2266 }
   2267 
   2268 void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset,
   2269                                                        const string &aug) {
   2270   fprintf(stderr,
   2271           "%s: CFI frame description entry at offset 0x%llx in '%s':"
   2272           " CIE specifies unrecognized augmentation: '%s'\n",
   2273           filename_.c_str(), offset, section_.c_str(), aug.c_str());
   2274 }
   2275 
   2276 void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset,
   2277                                                      uint8 encoding) {
   2278   fprintf(stderr,
   2279           "%s: CFI common information entry at offset 0x%llx in '%s':"
   2280           " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
   2281           filename_.c_str(), offset, section_.c_str(), encoding);
   2282 }
   2283 
   2284 void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset,
   2285                                                       uint8 encoding) {
   2286   fprintf(stderr,
   2287           "%s: CFI common information entry at offset 0x%llx in '%s':"
   2288           " 'z' augmentation specifies a pointer encoding for which"
   2289           " we have no base address: 0x%02x\n",
   2290           filename_.c_str(), offset, section_.c_str(), encoding);
   2291 }
   2292 
   2293 void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) {
   2294   fprintf(stderr,
   2295           "%s: CFI common information entry at offset 0x%llx in '%s':"
   2296           " the DW_CFA_restore instruction at offset 0x%llx"
   2297           " cannot be used in a common information entry\n",
   2298           filename_.c_str(), offset, section_.c_str(), insn_offset);
   2299 }
   2300 
   2301 void CallFrameInfo::Reporter::BadInstruction(uint64 offset,
   2302                                              CallFrameInfo::EntryKind kind,
   2303                                              uint64 insn_offset) {
   2304   fprintf(stderr,
   2305           "%s: CFI %s at offset 0x%llx in section '%s':"
   2306           " the instruction at offset 0x%llx is unrecognized\n",
   2307           filename_.c_str(), CallFrameInfo::KindName(kind),
   2308           offset, section_.c_str(), insn_offset);
   2309 }
   2310 
   2311 void CallFrameInfo::Reporter::NoCFARule(uint64 offset,
   2312                                         CallFrameInfo::EntryKind kind,
   2313                                         uint64 insn_offset) {
   2314   fprintf(stderr,
   2315           "%s: CFI %s at offset 0x%llx in section '%s':"
   2316           " the instruction at offset 0x%llx assumes that a CFA rule has"
   2317           " been set, but none has been set\n",
   2318           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
   2319           section_.c_str(), insn_offset);
   2320 }
   2321 
   2322 void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset,
   2323                                               CallFrameInfo::EntryKind kind,
   2324                                               uint64 insn_offset) {
   2325   fprintf(stderr,
   2326           "%s: CFI %s at offset 0x%llx in section '%s':"
   2327           " the DW_CFA_restore_state instruction at offset 0x%llx"
   2328           " should pop a saved state from the stack, but the stack is empty\n",
   2329           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
   2330           section_.c_str(), insn_offset);
   2331 }
   2332 
   2333 void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset,
   2334                                               CallFrameInfo::EntryKind kind,
   2335                                               uint64 insn_offset) {
   2336   fprintf(stderr,
   2337           "%s: CFI %s at offset 0x%llx in section '%s':"
   2338           " the DW_CFA_restore_state instruction at offset 0x%llx"
   2339           " would clear the CFA rule in effect\n",
   2340           filename_.c_str(), CallFrameInfo::KindName(kind), offset,
   2341           section_.c_str(), insn_offset);
   2342 }
   2343 
   2344 }  // namespace dwarf2reader
   2345