Home | History | Annotate | Download | only in common
      1 // Copyright (c) 2010 Google Inc. All Rights Reserved.
      2 //
      3 // Redistribution and use in source and binary forms, with or without
      4 // modification, are permitted provided that the following conditions are
      5 // met:
      6 //
      7 //     * Redistributions of source code must retain the above copyright
      8 // notice, this list of conditions and the following disclaimer.
      9 //     * Redistributions in binary form must reproduce the above
     10 // copyright notice, this list of conditions and the following disclaimer
     11 // in the documentation and/or other materials provided with the
     12 // distribution.
     13 //     * Neither the name of Google Inc. nor the names of its
     14 // contributors may be used to endorse or promote products derived from
     15 // this software without specific prior written permission.
     16 //
     17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 
     29 // Original author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com>
     30 
     31 // This file implements the google_breakpad::StabsReader class.
     32 // See stabs_reader.h.
     33 
     34 #include "common/stabs_reader.h"
     35 
     36 #include <assert.h>
     37 #include <stab.h>
     38 #include <string.h>
     39 
     40 #include <string>
     41 
     42 #include "common/using_std_string.h"
     43 
     44 using std::vector;
     45 
     46 namespace google_breakpad {
     47 
     48 StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer,
     49                                           bool big_endian, size_t value_size)
     50     : value_size_(value_size), cursor_(buffer, big_endian) {
     51   // Actually, we could handle weird sizes just fine, but they're
     52   // probably mistakes --- expressed in bits, say.
     53   assert(value_size == 4 || value_size == 8);
     54   entry_.index = 0;
     55   Fetch();
     56 }
     57 
     58 void StabsReader::EntryIterator::Fetch() {
     59   cursor_
     60       .Read(4, false, &entry_.name_offset)
     61       .Read(1, false, &entry_.type)
     62       .Read(1, false, &entry_.other)
     63       .Read(2, false, &entry_.descriptor)
     64       .Read(value_size_, false, &entry_.value);
     65   entry_.at_end = !cursor_;
     66 }
     67 
     68 StabsReader::StabsReader(const uint8_t *stab,    size_t stab_size,
     69                          const uint8_t *stabstr, size_t stabstr_size,
     70                          bool big_endian, size_t value_size, bool unitized,
     71                          StabsHandler *handler)
     72     : entries_(stab, stab_size),
     73       strings_(stabstr, stabstr_size),
     74       iterator_(&entries_, big_endian, value_size),
     75       unitized_(unitized),
     76       handler_(handler),
     77       string_offset_(0),
     78       next_cu_string_offset_(0),
     79       current_source_file_(NULL) { }
     80 
     81 const char *StabsReader::SymbolString() {
     82   ptrdiff_t offset = string_offset_ + iterator_->name_offset;
     83   if (offset < 0 || (size_t) offset >= strings_.Size()) {
     84     handler_->Warning("symbol %d: name offset outside the string section\n",
     85                       iterator_->index);
     86     // Return our null string, to keep our promise about all names being
     87     // taken from the string section.
     88     offset = 0;
     89   }
     90   return reinterpret_cast<const char *>(strings_.start + offset);
     91 }
     92 
     93 bool StabsReader::Process() {
     94   while (!iterator_->at_end) {
     95     if (iterator_->type == N_SO) {
     96       if (! ProcessCompilationUnit())
     97         return false;
     98     } else if (iterator_->type == N_UNDF && unitized_) {
     99       // In unitized STABS (including Linux STABS, and pretty much anything
    100       // else that puts STABS data in sections), at the head of each
    101       // compilation unit's entries there is an N_UNDF stab giving the
    102       // number of symbols in the compilation unit, and the number of bytes
    103       // that compilation unit's strings take up in the .stabstr section.
    104       // Each CU's strings are separate; the n_strx values are offsets
    105       // within the current CU's portion of the .stabstr section.
    106       //
    107       // As an optimization, the GNU linker combines all the
    108       // compilation units into one, with a single N_UNDF at the
    109       // beginning. However, other linkers, like Gold, do not perform
    110       // this optimization.
    111       string_offset_ = next_cu_string_offset_;
    112       next_cu_string_offset_ = iterator_->value;
    113       ++iterator_;
    114     }
    115 #if defined(HAVE_MACH_O_NLIST_H)
    116     // Export symbols in Mach-O binaries look like this.
    117     // This is necessary in order to be able to dump symbols
    118     // from OS X system libraries.
    119     else if ((iterator_->type & N_STAB) == 0 &&
    120                (iterator_->type & N_TYPE) == N_SECT) {
    121       ProcessExtern();
    122     }
    123 #endif
    124     else {
    125       ++iterator_;
    126     }
    127   }
    128   return true;
    129 }
    130 
    131 bool StabsReader::ProcessCompilationUnit() {
    132   assert(!iterator_->at_end && iterator_->type == N_SO);
    133 
    134   // There may be an N_SO entry whose name ends with a slash,
    135   // indicating the directory in which the compilation occurred.
    136   // The build directory defaults to NULL.
    137   const char *build_directory = NULL;
    138   {
    139     const char *name = SymbolString();
    140     if (name[0] && name[strlen(name) - 1] == '/') {
    141       build_directory = name;
    142       ++iterator_;
    143     }
    144   }
    145 
    146   // We expect to see an N_SO entry with a filename next, indicating
    147   // the start of the compilation unit.
    148   {
    149     if (iterator_->at_end || iterator_->type != N_SO)
    150       return true;
    151     const char *name = SymbolString();
    152     if (name[0] == '\0') {
    153       // This seems to be a stray end-of-compilation-unit marker;
    154       // consume it, but don't report the end, since we didn't see a
    155       // beginning.
    156       ++iterator_;
    157       return true;
    158     }
    159     current_source_file_ = name;
    160   }
    161 
    162   if (! handler_->StartCompilationUnit(current_source_file_,
    163                                        iterator_->value,
    164                                        build_directory))
    165     return false;
    166 
    167   ++iterator_;
    168 
    169   // The STABS documentation says that some compilers may emit
    170   // additional N_SO entries with names immediately following the
    171   // first, and that they should be ignored.  However, the original
    172   // Breakpad STABS reader doesn't ignore them, so we won't either.
    173 
    174   // Process the body of the compilation unit, up to the next N_SO.
    175   while (!iterator_->at_end && iterator_->type != N_SO) {
    176     if (iterator_->type == N_FUN) {
    177       if (! ProcessFunction())
    178         return false;
    179     } else if (iterator_->type == N_SLINE) {
    180       // Mac OS X STABS place SLINE records before functions.
    181       Line line;
    182       // The value of an N_SLINE entry that appears outside a function is
    183       // the absolute address of the line.
    184       line.address = iterator_->value;
    185       line.filename = current_source_file_;
    186       // The n_desc of a N_SLINE entry is the line number.  It's a
    187       // signed 16-bit field; line numbers from 32768 to 65535 are
    188       // stored as n-65536.
    189       line.number = (uint16_t) iterator_->descriptor;
    190       queued_lines_.push_back(line);
    191       ++iterator_;
    192     } else if (iterator_->type == N_SOL) {
    193       current_source_file_ = SymbolString();
    194       ++iterator_;
    195     } else {
    196       // Ignore anything else.
    197       ++iterator_;
    198     }
    199   }
    200 
    201   // An N_SO with an empty name indicates the end of the compilation
    202   // unit.  Default to zero.
    203   uint64_t ending_address = 0;
    204   if (!iterator_->at_end) {
    205     assert(iterator_->type == N_SO);
    206     const char *name = SymbolString();
    207     if (name[0] == '\0') {
    208       ending_address = iterator_->value;
    209       ++iterator_;
    210     }
    211   }
    212 
    213   if (! handler_->EndCompilationUnit(ending_address))
    214     return false;
    215 
    216   queued_lines_.clear();
    217 
    218   return true;
    219 }
    220 
    221 bool StabsReader::ProcessFunction() {
    222   assert(!iterator_->at_end && iterator_->type == N_FUN);
    223 
    224   uint64_t function_address = iterator_->value;
    225   // The STABS string for an N_FUN entry is the name of the function,
    226   // followed by a colon, followed by type information for the
    227   // function.  We want to pass the name alone to StartFunction.
    228   const char *stab_string = SymbolString();
    229   const char *name_end = strchr(stab_string, ':');
    230   if (! name_end)
    231     name_end = stab_string + strlen(stab_string);
    232   string name(stab_string, name_end - stab_string);
    233   if (! handler_->StartFunction(name, function_address))
    234     return false;
    235   ++iterator_;
    236 
    237   // If there were any SLINE records given before the function, report them now.
    238   for (vector<Line>::const_iterator it = queued_lines_.begin();
    239        it != queued_lines_.end(); it++) {
    240     if (!handler_->Line(it->address, it->filename, it->number))
    241       return false;
    242   }
    243   queued_lines_.clear();
    244 
    245   while (!iterator_->at_end) {
    246     if (iterator_->type == N_SO || iterator_->type == N_FUN)
    247       break;
    248     else if (iterator_->type == N_SLINE) {
    249       // The value of an N_SLINE entry is the offset of the line from
    250       // the function's start address.
    251       uint64_t line_address = function_address + iterator_->value;
    252       // The n_desc of a N_SLINE entry is the line number.  It's a
    253       // signed 16-bit field; line numbers from 32768 to 65535 are
    254       // stored as n-65536.
    255       uint16_t line_number = iterator_->descriptor;
    256       if (! handler_->Line(line_address, current_source_file_, line_number))
    257         return false;
    258       ++iterator_;
    259     } else if (iterator_->type == N_SOL) {
    260       current_source_file_ = SymbolString();
    261       ++iterator_;
    262     } else
    263       // Ignore anything else.
    264       ++iterator_;
    265   }
    266 
    267   // We've reached the end of the function. See if we can figure out its
    268   // ending address.
    269   uint64_t ending_address = 0;
    270   if (!iterator_->at_end) {
    271     assert(iterator_->type == N_SO || iterator_->type == N_FUN);
    272     if (iterator_->type == N_FUN) {
    273       const char *symbol_name = SymbolString();
    274       if (symbol_name[0] == '\0') {
    275         // An N_FUN entry with no name is a terminator for this function;
    276         // its value is the function's size.
    277         ending_address = function_address + iterator_->value;
    278         ++iterator_;
    279       } else {
    280         // An N_FUN entry with a name is the next function, and we can take
    281         // its value as our ending address. Don't advance the iterator, as
    282         // we'll use this symbol to start the next function as well.
    283         ending_address = iterator_->value;
    284       }
    285     } else {
    286       // An N_SO entry could be an end-of-compilation-unit marker, or the
    287       // start of the next compilation unit, but in either case, its value
    288       // is our ending address. We don't advance the iterator;
    289       // ProcessCompilationUnit will decide what to do with this symbol.
    290       ending_address = iterator_->value;
    291     }
    292   }
    293 
    294   if (! handler_->EndFunction(ending_address))
    295     return false;
    296 
    297   return true;
    298 }
    299 
    300 bool StabsReader::ProcessExtern() {
    301 #if defined(HAVE_MACH_O_NLIST_H)
    302   assert(!iterator_->at_end &&
    303          (iterator_->type & N_STAB) == 0 &&
    304          (iterator_->type & N_TYPE) == N_SECT);
    305 #endif
    306 
    307   // TODO(mark): only do symbols in the text section?
    308   if (!handler_->Extern(SymbolString(), iterator_->value))
    309     return false;
    310 
    311   ++iterator_;
    312   return true;
    313 }
    314 
    315 } // namespace google_breakpad
    316