Home | History | Annotate | Download | only in common
      1 // -*- mode: c++ -*-
      2 
      3 // Copyright (c) 2010 Google Inc. All Rights Reserved.
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Original author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com>
     32 
     33 // stabs_reader.h: Define StabsReader, a parser for STABS debugging
     34 // information. A description of the STABS debugging format can be
     35 // found at:
     36 //
     37 //    http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
     38 //
     39 // The comments here assume you understand the format.
     40 //
     41 // This parser can handle big-endian and little-endian data, and the symbol
     42 // values may be either 32 or 64 bits long. It handles both STABS in
     43 // sections (as used on Linux) and STABS appearing directly in an
     44 // a.out-like symbol table (as used in Darwin OS X Mach-O files).
     45 
     46 #ifndef COMMON_STABS_READER_H__
     47 #define COMMON_STABS_READER_H__
     48 
     49 #include <stddef.h>
     50 #include <stdint.h>
     51 
     52 #ifdef HAVE_CONFIG_H
     53 #include <config.h>
     54 #endif
     55 
     56 #ifdef HAVE_A_OUT_H
     57 #include <a.out.h>
     58 #endif
     59 #ifdef HAVE_MACH_O_NLIST_H
     60 #include <mach-o/nlist.h>
     61 #endif
     62 
     63 #include <string>
     64 #include <vector>
     65 
     66 #include "common/byte_cursor.h"
     67 #include "common/using_std_string.h"
     68 
     69 namespace google_breakpad {
     70 
     71 class StabsHandler;
     72 
     73 class StabsReader {
     74  public:
     75   // Create a reader for the STABS debug information whose .stab section is
     76   // being traversed by ITERATOR, and whose .stabstr section is referred to
     77   // by STRINGS. The reader will call the member functions of HANDLER to
     78   // report the information it finds, when the reader's 'Process' member
     79   // function is called.
     80   //
     81   // BIG_ENDIAN should be true if the entries in the .stab section are in
     82   // big-endian form, or false if they are in little-endian form.
     83   //
     84   // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
     85   // field in each entry in bytes.
     86   //
     87   // UNITIZED should be true if the STABS data is stored in units with
     88   // N_UNDF headers. This is usually the case for STABS stored in sections,
     89   // like .stab/.stabstr, and usually not the case for STABS stored in the
     90   // actual symbol table; UNITIZED should be true when parsing Linux stabs,
     91   // false when parsing Mac OS X STABS. For details, see:
     92   // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
     93   //
     94   // Note that, in ELF, the .stabstr section should be found using the
     95   // 'sh_link' field of the .stab section header, not by name.
     96   StabsReader(const uint8_t *stab,    size_t stab_size,
     97               const uint8_t *stabstr, size_t stabstr_size,
     98               bool big_endian, size_t value_size, bool unitized,
     99               StabsHandler *handler);
    100 
    101   // Process the STABS data, calling the handler's member functions to
    102   // report what we find.  While the handler functions return true,
    103   // continue to process until we reach the end of the section.  If we
    104   // processed the entire section and all handlers returned true,
    105   // return true.  If any handler returned false, return false.
    106   //
    107   // This is only meant to be called once per StabsReader instance;
    108   // resuming a prior processing pass that stopped abruptly isn't supported.
    109   bool Process();
    110 
    111  private:
    112 
    113   // An class for walking arrays of STABS entries. This isolates the main
    114   // STABS reader from the exact format (size; endianness) of the entries
    115   // themselves.
    116   class EntryIterator {
    117    public:
    118     // The contents of a STABS entry, adjusted for the host's endianness,
    119     // word size, 'struct nlist' layout, and so on.
    120     struct Entry {
    121       // True if this iterator has reached the end of the entry array. When
    122       // this is set, the other members of this structure are not valid.
    123       bool at_end;
    124 
    125       // The number of this entry within the list.
    126       size_t index;
    127 
    128       // The current entry's name offset. This is the offset within the
    129       // current compilation unit's strings, as establish by the N_UNDF entries.
    130       size_t name_offset;
    131 
    132       // The current entry's type, 'other' field, descriptor, and value.
    133       unsigned char type;
    134       unsigned char other;
    135       short descriptor;
    136       uint64_t value;
    137     };
    138 
    139     // Create a EntryIterator walking the entries in BUFFER. Treat the
    140     // entries as big-endian if BIG_ENDIAN is true, as little-endian
    141     // otherwise. Assume each entry has a 'value' field whose size is
    142     // VALUE_SIZE.
    143     //
    144     // This would not be terribly clean to extend to other format variations,
    145     // but it's enough to handle Linux and Mac, and we'd like STABS to die
    146     // anyway.
    147     //
    148     // For the record: on Linux, STABS entry values are always 32 bits,
    149     // regardless of the architecture address size (don't ask me why); on
    150     // Mac, they are 32 or 64 bits long. Oddly, the section header's entry
    151     // size for a Linux ELF .stab section varies according to the ELF class
    152     // from 12 to 20 even as the actual entries remain unchanged.
    153     EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size);
    154 
    155     // Move to the next entry. This function's behavior is undefined if
    156     // at_end() is true when it is called.
    157     EntryIterator &operator++() { Fetch(); entry_.index++; return *this; }
    158 
    159     // Dereferencing this iterator produces a reference to an Entry structure
    160     // that holds the current entry's values. The entry is owned by this
    161     // EntryIterator, and will be invalidated at the next call to operator++.
    162     const Entry &operator*() const { return entry_; }
    163     const Entry *operator->() const { return &entry_; }
    164 
    165    private:
    166     // Read the STABS entry at cursor_, and set entry_ appropriately.
    167     void Fetch();
    168 
    169     // The size of entries' value field, in bytes.
    170     size_t value_size_;
    171 
    172     // A byte cursor traversing buffer_.
    173     ByteCursor cursor_;
    174 
    175     // Values for the entry this iterator refers to.
    176     Entry entry_;
    177   };
    178 
    179   // A source line, saved to be reported later.
    180   struct Line {
    181     uint64_t address;
    182     const char *filename;
    183     int number;
    184   };
    185 
    186   // Return the name of the current symbol.
    187   const char *SymbolString();
    188 
    189   // Process a compilation unit starting at symbol_.  Return true
    190   // to continue processing, or false to abort.
    191   bool ProcessCompilationUnit();
    192 
    193   // Process a function in current_source_file_ starting at symbol_.
    194   // Return true to continue processing, or false to abort.
    195   bool ProcessFunction();
    196 
    197   // Process an exported function symbol.
    198   // Return true to continue processing, or false to abort.
    199   bool ProcessExtern();
    200 
    201   // The STABS entries being parsed.
    202   ByteBuffer entries_;
    203 
    204   // The string section to which the entries refer.
    205   ByteBuffer strings_;
    206 
    207   // The iterator walking the STABS entries.
    208   EntryIterator iterator_;
    209 
    210   // True if the data is "unitized"; see the explanation in the comment for
    211   // StabsReader::StabsReader.
    212   bool unitized_;
    213 
    214   StabsHandler *handler_;
    215 
    216   // The offset of the current compilation unit's strings within stabstr_.
    217   size_t string_offset_;
    218 
    219   // The value string_offset_ should have for the next compilation unit,
    220   // as established by N_UNDF entries.
    221   size_t next_cu_string_offset_;
    222 
    223   // The current source file name.
    224   const char *current_source_file_;
    225 
    226   // Mac OS X STABS place SLINE records before functions; we accumulate a
    227   // vector of these until we see the FUN record, and then report them
    228   // after the StartFunction call.
    229   std::vector<Line> queued_lines_;
    230 };
    231 
    232 // Consumer-provided callback structure for the STABS reader.  Clients
    233 // of the STABS reader provide an instance of this structure.  The
    234 // reader then invokes the member functions of that instance to report
    235 // the information it finds.
    236 //
    237 // The default definitions of the member functions do nothing, and return
    238 // true so processing will continue.
    239 class StabsHandler {
    240  public:
    241   StabsHandler() { }
    242   virtual ~StabsHandler() { }
    243 
    244   // Some general notes about the handler callback functions:
    245 
    246   // Processing proceeds until the end of the .stabs section, or until
    247   // one of these functions returns false.
    248 
    249   // The addresses given are as reported in the STABS info, without
    250   // regard for whether the module may be loaded at different
    251   // addresses at different times (a shared library, say).  When
    252   // processing STABS from an ELF shared library, the addresses given
    253   // all assume the library is loaded at its nominal load address.
    254   // They are *not* offsets from the nominal load address.  If you
    255   // want offsets, you must subtract off the library's nominal load
    256   // address.
    257 
    258   // The arguments to these functions named FILENAME are all
    259   // references to strings stored in the .stabstr section.  Because
    260   // both the Linux and Solaris linkers factor out duplicate strings
    261   // from the .stabstr section, the consumer can assume that if two
    262   // FILENAME values are different addresses, they represent different
    263   // file names.
    264   //
    265   // Thus, it's safe to use (say) std::map<char *, ...>, which does
    266   // string address comparisons, not string content comparisons.
    267   // Since all the strings are in same array of characters --- the
    268   // .stabstr section --- comparing their addresses produces
    269   // predictable, if not lexicographically meaningful, results.
    270 
    271   // Begin processing a compilation unit whose main source file is
    272   // named FILENAME, and whose base address is ADDRESS.  If
    273   // BUILD_DIRECTORY is non-NULL, it is the name of the build
    274   // directory in which the compilation occurred.
    275   virtual bool StartCompilationUnit(const char *filename, uint64_t address,
    276                                     const char *build_directory) {
    277     return true;
    278   }
    279 
    280   // Finish processing the compilation unit.  If ADDRESS is non-zero,
    281   // it is the ending address of the compilation unit.  If ADDRESS is
    282   // zero, then the compilation unit's ending address is not
    283   // available, and the consumer must infer it by other means.
    284   virtual bool EndCompilationUnit(uint64_t address) { return true; }
    285 
    286   // Begin processing a function named NAME, whose starting address is
    287   // ADDRESS.  This function belongs to the compilation unit that was
    288   // most recently started but not ended.
    289   //
    290   // Note that, unlike filenames, NAME is not a pointer into the
    291   // .stabstr section; this is because the name as it appears in the
    292   // STABS data is followed by type information.  The value passed to
    293   // StartFunction is the function name alone.
    294   //
    295   // In languages that use name mangling, like C++, NAME is mangled.
    296   virtual bool StartFunction(const string &name, uint64_t address) {
    297     return true;
    298   }
    299 
    300   // Finish processing the function.  If ADDRESS is non-zero, it is
    301   // the ending address for the function.  If ADDRESS is zero, then
    302   // the function's ending address is not available, and the consumer
    303   // must infer it by other means.
    304   virtual bool EndFunction(uint64_t address) { return true; }
    305 
    306   // Report that the code at ADDRESS is attributable to line NUMBER of
    307   // the source file named FILENAME.  The caller must infer the ending
    308   // address of the line.
    309   virtual bool Line(uint64_t address, const char *filename, int number) {
    310     return true;
    311   }
    312 
    313   // Report that an exported function NAME is present at ADDRESS.
    314   // The size of the function is unknown.
    315   virtual bool Extern(const string &name, uint64_t address) {
    316     return true;
    317   }
    318 
    319   // Report a warning.  FORMAT is a printf-like format string,
    320   // specifying how to format the subsequent arguments.
    321   virtual void Warning(const char *format, ...) = 0;
    322 };
    323 
    324 } // namespace google_breakpad
    325 
    326 #endif  // COMMON_STABS_READER_H__
    327