Home | History | Annotate | Download | only in mac
      1 // -*- mode: c++ -*-
      2 
      3 // Copyright (c) 2011, Google Inc.
      4 // All rights reserved.
      5 //
      6 // Redistribution and use in source and binary forms, with or without
      7 // modification, are permitted provided that the following conditions are
      8 // met:
      9 //
     10 //     * Redistributions of source code must retain the above copyright
     11 // notice, this list of conditions and the following disclaimer.
     12 //     * Redistributions in binary form must reproduce the above
     13 // copyright notice, this list of conditions and the following disclaimer
     14 // in the documentation and/or other materials provided with the
     15 // distribution.
     16 //     * Neither the name of Google Inc. nor the names of its
     17 // contributors may be used to endorse or promote products derived from
     18 // this software without specific prior written permission.
     19 //
     20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31 
     32 // Author: Jim Blandy <jimb (a] mozilla.com> <jimb (a] red-bean.com>
     33 
     34 // dump_syms.mm: Create a symbol file for use with minidumps
     35 
     36 #include "common/mac/dump_syms.h"
     37 
     38 #include <Foundation/Foundation.h>
     39 #include <mach-o/arch.h>
     40 #include <mach-o/fat.h>
     41 #include <stdio.h>
     42 
     43 #include <ostream>
     44 #include <string>
     45 #include <vector>
     46 
     47 #include "common/dwarf/bytereader-inl.h"
     48 #include "common/dwarf/dwarf2reader.h"
     49 #include "common/dwarf_cfi_to_module.h"
     50 #include "common/dwarf_cu_to_module.h"
     51 #include "common/dwarf_line_to_module.h"
     52 #include "common/mac/file_id.h"
     53 #include "common/mac/arch_utilities.h"
     54 #include "common/mac/macho_reader.h"
     55 #include "common/module.h"
     56 #include "common/scoped_ptr.h"
     57 #include "common/stabs_reader.h"
     58 #include "common/stabs_to_module.h"
     59 #include "common/symbol_data.h"
     60 
     61 #ifndef CPU_TYPE_ARM
     62 #define CPU_TYPE_ARM (static_cast<cpu_type_t>(12))
     63 #endif //  CPU_TYPE_ARM
     64 
     65 #ifndef CPU_TYPE_ARM64
     66 #define CPU_TYPE_ARM64 (static_cast<cpu_type_t>(16777228))
     67 #endif  // CPU_TYPE_ARM64
     68 
     69 using dwarf2reader::ByteReader;
     70 using google_breakpad::DwarfCUToModule;
     71 using google_breakpad::DwarfLineToModule;
     72 using google_breakpad::FileID;
     73 using google_breakpad::mach_o::FatReader;
     74 using google_breakpad::mach_o::Section;
     75 using google_breakpad::mach_o::Segment;
     76 using google_breakpad::Module;
     77 using google_breakpad::StabsReader;
     78 using google_breakpad::StabsToModule;
     79 using google_breakpad::scoped_ptr;
     80 using std::make_pair;
     81 using std::pair;
     82 using std::string;
     83 using std::vector;
     84 
     85 namespace google_breakpad {
     86 
     87 bool DumpSymbols::Read(NSString *filename) {
     88   if (![[NSFileManager defaultManager] fileExistsAtPath:filename]) {
     89     fprintf(stderr, "Object file does not exist: %s\n",
     90             [filename fileSystemRepresentation]);
     91     return false;
     92   }
     93 
     94   input_pathname_ = [filename retain];
     95 
     96   // Does this filename refer to a dSYM bundle?
     97   NSBundle *bundle = [NSBundle bundleWithPath:input_pathname_];
     98 
     99   if (bundle) {
    100     // Filenames referring to bundles usually have names of the form
    101     // "<basename>.dSYM"; however, if the user has specified a wrapper
    102     // suffix (the WRAPPER_SUFFIX and WRAPPER_EXTENSION build settings),
    103     // then the name may have the form "<basename>.<extension>.dSYM". In
    104     // either case, the resource name for the file containing the DWARF
    105     // info within the bundle is <basename>.
    106     //
    107     // Since there's no way to tell how much to strip off, remove one
    108     // extension at a time, and use the first one that
    109     // pathForResource:ofType:inDirectory likes.
    110     NSString *base_name = [input_pathname_ lastPathComponent];
    111     NSString *dwarf_resource;
    112 
    113     do {
    114       NSString *new_base_name = [base_name stringByDeletingPathExtension];
    115 
    116       // If stringByDeletingPathExtension returned the name unchanged, then
    117       // there's nothing more for us to strip off --- lose.
    118       if ([new_base_name isEqualToString:base_name]) {
    119         fprintf(stderr, "Unable to find DWARF-bearing file in bundle: %s\n",
    120                 [input_pathname_ fileSystemRepresentation]);
    121         return false;
    122       }
    123 
    124       // Take the shortened result as our new base_name.
    125       base_name = new_base_name;
    126 
    127       // Try to find a DWARF resource in the bundle under the new base_name.
    128       dwarf_resource = [bundle pathForResource:base_name
    129                         ofType:nil inDirectory:@"DWARF"];
    130     } while (!dwarf_resource);
    131 
    132     object_filename_ = [dwarf_resource retain];
    133   } else {
    134     object_filename_ = [input_pathname_ retain];
    135   }
    136 
    137   // Read the file's contents into memory.
    138   //
    139   // The documentation for dataWithContentsOfMappedFile says:
    140   //
    141   //     Because of file mapping restrictions, this method should only be
    142   //     used if the file is guaranteed to exist for the duration of the
    143   //     data objects existence. It is generally safer to use the
    144   //     dataWithContentsOfFile: method.
    145   //
    146   // I gather this means that OS X doesn't have (or at least, that method
    147   // doesn't use) a form of mapping like Linux's MAP_PRIVATE, where the
    148   // process appears to get its own copy of the data, and changes to the
    149   // file don't affect memory and vice versa).
    150   NSError *error;
    151   contents_ = [NSData dataWithContentsOfFile:object_filename_
    152                                      options:0
    153                                        error:&error];
    154   if (!contents_) {
    155     fprintf(stderr, "Error reading object file: %s: %s\n",
    156             [object_filename_ fileSystemRepresentation],
    157             [[error localizedDescription] UTF8String]);
    158     return false;
    159   }
    160   [contents_ retain];
    161 
    162   // Get the list of object files present in the file.
    163   FatReader::Reporter fat_reporter([object_filename_
    164                                     fileSystemRepresentation]);
    165   FatReader fat_reader(&fat_reporter);
    166   if (!fat_reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes]),
    167                        [contents_ length])) {
    168     return false;
    169   }
    170 
    171   // Get our own copy of fat_reader's object file list.
    172   size_t object_files_count;
    173   const struct fat_arch *object_files =
    174     fat_reader.object_files(&object_files_count);
    175   if (object_files_count == 0) {
    176     fprintf(stderr, "Fat binary file contains *no* architectures: %s\n",
    177             [object_filename_ fileSystemRepresentation]);
    178     return false;
    179   }
    180   object_files_.resize(object_files_count);
    181   memcpy(&object_files_[0], object_files,
    182          sizeof(struct fat_arch) * object_files_count);
    183 
    184   return true;
    185 }
    186 
    187 bool DumpSymbols::SetArchitecture(cpu_type_t cpu_type,
    188                                   cpu_subtype_t cpu_subtype) {
    189   // Find the best match for the architecture the user requested.
    190   const struct fat_arch *best_match
    191     = NXFindBestFatArch(cpu_type, cpu_subtype, &object_files_[0],
    192                         static_cast<uint32_t>(object_files_.size()));
    193   if (!best_match) return false;
    194 
    195   // Record the selected object file.
    196   selected_object_file_ = best_match;
    197   return true;
    198 }
    199 
    200 bool DumpSymbols::SetArchitecture(const std::string &arch_name) {
    201   bool arch_set = false;
    202   const NXArchInfo *arch_info =
    203       google_breakpad::BreakpadGetArchInfoFromName(arch_name.c_str());
    204   if (arch_info) {
    205     arch_set = SetArchitecture(arch_info->cputype, arch_info->cpusubtype);
    206   }
    207   return arch_set;
    208 }
    209 
    210 string DumpSymbols::Identifier() {
    211   FileID file_id([object_filename_ fileSystemRepresentation]);
    212   unsigned char identifier_bytes[16];
    213   cpu_type_t cpu_type = selected_object_file_->cputype;
    214   cpu_subtype_t cpu_subtype = selected_object_file_->cpusubtype;
    215   if (!file_id.MachoIdentifier(cpu_type, cpu_subtype, identifier_bytes)) {
    216     fprintf(stderr, "Unable to calculate UUID of mach-o binary %s!\n",
    217             [object_filename_ fileSystemRepresentation]);
    218     return "";
    219   }
    220 
    221   char identifier_string[40];
    222   FileID::ConvertIdentifierToString(identifier_bytes, identifier_string,
    223                                     sizeof(identifier_string));
    224 
    225   string compacted(identifier_string);
    226   for(size_t i = compacted.find('-'); i != string::npos;
    227       i = compacted.find('-', i))
    228     compacted.erase(i, 1);
    229 
    230   return compacted;
    231 }
    232 
    233 // A line-to-module loader that accepts line number info parsed by
    234 // dwarf2reader::LineInfo and populates a Module and a line vector
    235 // with the results.
    236 class DumpSymbols::DumperLineToModule:
    237       public DwarfCUToModule::LineToModuleHandler {
    238  public:
    239   // Create a line-to-module converter using BYTE_READER.
    240   DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
    241       : byte_reader_(byte_reader) { }
    242 
    243   void StartCompilationUnit(const string& compilation_dir) {
    244     compilation_dir_ = compilation_dir;
    245   }
    246 
    247   void ReadProgram(const char *program, uint64 length,
    248                    Module *module, vector<Module::Line> *lines) {
    249     DwarfLineToModule handler(module, compilation_dir_, lines);
    250     dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
    251     parser.Start();
    252   }
    253  private:
    254   string compilation_dir_;
    255   dwarf2reader::ByteReader *byte_reader_;  // WEAK
    256 };
    257 
    258 bool DumpSymbols::ReadDwarf(google_breakpad::Module *module,
    259                             const mach_o::Reader &macho_reader,
    260                             const mach_o::SectionMap &dwarf_sections,
    261                             bool handle_inter_cu_refs) const {
    262   // Build a byte reader of the appropriate endianness.
    263   ByteReader byte_reader(macho_reader.big_endian()
    264                          ? dwarf2reader::ENDIANNESS_BIG
    265                          : dwarf2reader::ENDIANNESS_LITTLE);
    266 
    267   // Construct a context for this file.
    268   DwarfCUToModule::FileContext file_context(selected_object_name_,
    269                                             module,
    270                                             handle_inter_cu_refs);
    271 
    272   // Build a dwarf2reader::SectionMap from our mach_o::SectionMap.
    273   for (mach_o::SectionMap::const_iterator it = dwarf_sections.begin();
    274        it != dwarf_sections.end(); ++it) {
    275     file_context.AddSectionToSectionMap(
    276         it->first,
    277         reinterpret_cast<const char *>(it->second.contents.start),
    278         it->second.contents.Size());
    279   }
    280 
    281   // Find the __debug_info section.
    282   dwarf2reader::SectionMap::const_iterator debug_info_entry =
    283       file_context.section_map().find("__debug_info");
    284   assert(debug_info_entry != file_context.section_map().end());
    285   const std::pair<const char*, uint64>& debug_info_section =
    286       debug_info_entry->second;
    287   // There had better be a __debug_info section!
    288   if (!debug_info_section.first) {
    289     fprintf(stderr, "%s: __DWARF segment of file has no __debug_info section\n",
    290             selected_object_name_.c_str());
    291     return false;
    292   }
    293 
    294   // Build a line-to-module loader for the root handler to use.
    295   DumperLineToModule line_to_module(&byte_reader);
    296 
    297   // Walk the __debug_info section, one compilation unit at a time.
    298   uint64 debug_info_length = debug_info_section.second;
    299   for (uint64 offset = 0; offset < debug_info_length;) {
    300     // Make a handler for the root DIE that populates MODULE with the
    301     // debug info.
    302     DwarfCUToModule::WarningReporter reporter(selected_object_name_,
    303                                               offset);
    304     DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter);
    305     // Make a Dwarf2Handler that drives our DIEHandler.
    306     dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
    307     // Make a DWARF parser for the compilation unit at OFFSET.
    308     dwarf2reader::CompilationUnit dwarf_reader(file_context.section_map(),
    309                                                offset,
    310                                                &byte_reader,
    311                                                &die_dispatcher);
    312     // Process the entire compilation unit; get the offset of the next.
    313     offset += dwarf_reader.Start();
    314   }
    315 
    316   return true;
    317 }
    318 
    319 bool DumpSymbols::ReadCFI(google_breakpad::Module *module,
    320                           const mach_o::Reader &macho_reader,
    321                           const mach_o::Section &section,
    322                           bool eh_frame) const {
    323   // Find the appropriate set of register names for this file's
    324   // architecture.
    325   vector<string> register_names;
    326   switch (macho_reader.cpu_type()) {
    327     case CPU_TYPE_X86:
    328       register_names = DwarfCFIToModule::RegisterNames::I386();
    329       break;
    330     case CPU_TYPE_X86_64:
    331       register_names = DwarfCFIToModule::RegisterNames::X86_64();
    332       break;
    333     case CPU_TYPE_ARM:
    334       register_names = DwarfCFIToModule::RegisterNames::ARM();
    335       break;
    336     case CPU_TYPE_ARM64:
    337       register_names = DwarfCFIToModule::RegisterNames::ARM64();
    338       break;
    339     default: {
    340       const NXArchInfo *arch = google_breakpad::BreakpadGetArchInfoFromCpuType(
    341           macho_reader.cpu_type(), macho_reader.cpu_subtype());
    342       fprintf(stderr, "%s: cannot convert DWARF call frame information for ",
    343               selected_object_name_.c_str());
    344       if (arch)
    345         fprintf(stderr, "architecture '%s'", arch->name);
    346       else
    347         fprintf(stderr, "architecture %d,%d",
    348                 macho_reader.cpu_type(), macho_reader.cpu_subtype());
    349       fprintf(stderr, " to Breakpad symbol file: no register name table\n");
    350       return false;
    351     }
    352   }
    353 
    354   // Find the call frame information and its size.
    355   const char *cfi = reinterpret_cast<const char *>(section.contents.start);
    356   size_t cfi_size = section.contents.Size();
    357 
    358   // Plug together the parser, handler, and their entourages.
    359   DwarfCFIToModule::Reporter module_reporter(selected_object_name_,
    360                                              section.section_name);
    361   DwarfCFIToModule handler(module, register_names, &module_reporter);
    362   dwarf2reader::ByteReader byte_reader(macho_reader.big_endian() ?
    363                                        dwarf2reader::ENDIANNESS_BIG :
    364                                        dwarf2reader::ENDIANNESS_LITTLE);
    365   byte_reader.SetAddressSize(macho_reader.bits_64() ? 8 : 4);
    366   // At the moment, according to folks at Apple and some cursory
    367   // investigation, Mac OS X only uses DW_EH_PE_pcrel-based pointers, so
    368   // this is the only base address the CFI parser will need.
    369   byte_reader.SetCFIDataBase(section.address, cfi);
    370 
    371   dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(selected_object_name_,
    372                                                        section.section_name);
    373   dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
    374                                      &byte_reader, &handler, &dwarf_reporter,
    375                                      eh_frame);
    376   parser.Start();
    377   return true;
    378 }
    379 
    380 // A LoadCommandHandler that loads whatever debugging data it finds into a
    381 // Module.
    382 class DumpSymbols::LoadCommandDumper:
    383       public mach_o::Reader::LoadCommandHandler {
    384  public:
    385   // Create a load command dumper handling load commands from READER's
    386   // file, and adding data to MODULE.
    387   LoadCommandDumper(const DumpSymbols &dumper,
    388                     google_breakpad::Module *module,
    389                     const mach_o::Reader &reader,
    390                     SymbolData symbol_data,
    391                     bool handle_inter_cu_refs)
    392       : dumper_(dumper),
    393         module_(module),
    394         reader_(reader),
    395         symbol_data_(symbol_data),
    396         handle_inter_cu_refs_(handle_inter_cu_refs) { }
    397 
    398   bool SegmentCommand(const mach_o::Segment &segment);
    399   bool SymtabCommand(const ByteBuffer &entries, const ByteBuffer &strings);
    400 
    401  private:
    402   const DumpSymbols &dumper_;
    403   google_breakpad::Module *module_;  // WEAK
    404   const mach_o::Reader &reader_;
    405   const SymbolData symbol_data_;
    406   const bool handle_inter_cu_refs_;
    407 };
    408 
    409 bool DumpSymbols::LoadCommandDumper::SegmentCommand(const Segment &segment) {
    410   mach_o::SectionMap section_map;
    411   if (!reader_.MapSegmentSections(segment, &section_map))
    412     return false;
    413 
    414   if (segment.name == "__TEXT") {
    415     module_->SetLoadAddress(segment.vmaddr);
    416     if (symbol_data_ != NO_CFI) {
    417       mach_o::SectionMap::const_iterator eh_frame =
    418           section_map.find("__eh_frame");
    419       if (eh_frame != section_map.end()) {
    420         // If there is a problem reading this, don't treat it as a fatal error.
    421         dumper_.ReadCFI(module_, reader_, eh_frame->second, true);
    422       }
    423     }
    424     return true;
    425   }
    426 
    427   if (segment.name == "__DWARF") {
    428     if (symbol_data_ != ONLY_CFI) {
    429       if (!dumper_.ReadDwarf(module_, reader_, section_map,
    430                              handle_inter_cu_refs_)) {
    431         return false;
    432       }
    433     }
    434     if (symbol_data_ != NO_CFI) {
    435       mach_o::SectionMap::const_iterator debug_frame
    436           = section_map.find("__debug_frame");
    437       if (debug_frame != section_map.end()) {
    438         // If there is a problem reading this, don't treat it as a fatal error.
    439         dumper_.ReadCFI(module_, reader_, debug_frame->second, false);
    440       }
    441     }
    442   }
    443 
    444   return true;
    445 }
    446 
    447 bool DumpSymbols::LoadCommandDumper::SymtabCommand(const ByteBuffer &entries,
    448                                                    const ByteBuffer &strings) {
    449   StabsToModule stabs_to_module(module_);
    450   // Mac OS X STABS are never "unitized", and the size of the 'value' field
    451   // matches the address size of the executable.
    452   StabsReader stabs_reader(entries.start, entries.Size(),
    453                            strings.start, strings.Size(),
    454                            reader_.big_endian(),
    455                            reader_.bits_64() ? 8 : 4,
    456                            true,
    457                            &stabs_to_module);
    458   if (!stabs_reader.Process())
    459     return false;
    460   stabs_to_module.Finalize();
    461   return true;
    462 }
    463 
    464 bool DumpSymbols::ReadSymbolData(Module** out_module) {
    465   // Select an object file, if SetArchitecture hasn't been called to set one
    466   // explicitly.
    467   if (!selected_object_file_) {
    468     // If there's only one architecture, that's the one.
    469     if (object_files_.size() == 1)
    470       selected_object_file_ = &object_files_[0];
    471     else {
    472       // Look for an object file whose architecture matches our own.
    473       const NXArchInfo *local_arch = NXGetLocalArchInfo();
    474       if (!SetArchitecture(local_arch->cputype, local_arch->cpusubtype)) {
    475         fprintf(stderr, "%s: object file contains more than one"
    476                 " architecture, none of which match the current"
    477                 " architecture; specify an architecture explicitly"
    478                 " with '-a ARCH' to resolve the ambiguity\n",
    479                 [object_filename_ fileSystemRepresentation]);
    480         return false;
    481       }
    482     }
    483   }
    484 
    485   assert(selected_object_file_);
    486 
    487   // Find the name of the selected file's architecture, to appear in
    488   // the MODULE record and in error messages.
    489   const NXArchInfo *selected_arch_info =
    490       google_breakpad::BreakpadGetArchInfoFromCpuType(
    491           selected_object_file_->cputype, selected_object_file_->cpusubtype);
    492 
    493   const char *selected_arch_name = selected_arch_info->name;
    494   if (strcmp(selected_arch_name, "i386") == 0)
    495     selected_arch_name = "x86";
    496 
    497   // Produce a name to use in error messages that includes the
    498   // filename, and the architecture, if there is more than one.
    499   selected_object_name_ = [object_filename_ UTF8String];
    500   if (object_files_.size() > 1) {
    501     selected_object_name_ += ", architecture ";
    502     selected_object_name_ + selected_arch_name;
    503   }
    504 
    505   // Compute a module name, to appear in the MODULE record.
    506   NSString *module_name = [object_filename_ lastPathComponent];
    507 
    508   // Choose an identifier string, to appear in the MODULE record.
    509   string identifier = Identifier();
    510   if (identifier.empty())
    511     return false;
    512   identifier += "0";
    513 
    514   // Create a module to hold the debugging information.
    515   scoped_ptr<Module> module(new Module([module_name UTF8String],
    516                                        "mac",
    517                                        selected_arch_name,
    518                                        identifier));
    519 
    520   // Parse the selected object file.
    521   mach_o::Reader::Reporter reporter(selected_object_name_);
    522   mach_o::Reader reader(&reporter);
    523   if (!reader.Read(reinterpret_cast<const uint8_t *>([contents_ bytes])
    524                    + selected_object_file_->offset,
    525                    selected_object_file_->size,
    526                    selected_object_file_->cputype,
    527                    selected_object_file_->cpusubtype))
    528     return false;
    529 
    530   // Walk its load commands, and deal with whatever is there.
    531   LoadCommandDumper load_command_dumper(*this, module.get(), reader,
    532                                         symbol_data_, handle_inter_cu_refs_);
    533   if (!reader.WalkLoadCommands(&load_command_dumper))
    534     return false;
    535 
    536   *out_module = module.release();
    537 
    538   return true;
    539 }
    540 
    541 bool DumpSymbols::WriteSymbolFile(std::ostream &stream) {
    542   Module* module = NULL;
    543 
    544   if (ReadSymbolData(&module) && module) {
    545     bool res = module->Write(stream, symbol_data_);
    546     delete module;
    547     return res;
    548   }
    549 
    550   return false;
    551 }
    552 
    553 }  // namespace google_breakpad
    554