Home | History | Annotate | Download | only in llvm-objdump
      1 //===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the MachO-specific dumper for llvm-objdump.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm-objdump.h"
     15 #include "llvm/ADT/OwningPtr.h"
     16 #include "llvm/ADT/STLExtras.h"
     17 #include "llvm/ADT/StringExtras.h"
     18 #include "llvm/ADT/Triple.h"
     19 #include "llvm/DebugInfo/DIContext.h"
     20 #include "llvm/MC/MCAsmInfo.h"
     21 #include "llvm/MC/MCDisassembler.h"
     22 #include "llvm/MC/MCInst.h"
     23 #include "llvm/MC/MCInstPrinter.h"
     24 #include "llvm/MC/MCInstrAnalysis.h"
     25 #include "llvm/MC/MCInstrDesc.h"
     26 #include "llvm/MC/MCInstrInfo.h"
     27 #include "llvm/MC/MCRegisterInfo.h"
     28 #include "llvm/MC/MCSubtargetInfo.h"
     29 #include "llvm/Object/MachO.h"
     30 #include "llvm/Support/Casting.h"
     31 #include "llvm/Support/CommandLine.h"
     32 #include "llvm/Support/Debug.h"
     33 #include "llvm/Support/Format.h"
     34 #include "llvm/Support/GraphWriter.h"
     35 #include "llvm/Support/MachO.h"
     36 #include "llvm/Support/MemoryBuffer.h"
     37 #include "llvm/Support/TargetRegistry.h"
     38 #include "llvm/Support/TargetSelect.h"
     39 #include "llvm/Support/raw_ostream.h"
     40 #include "llvm/Support/system_error.h"
     41 #include <algorithm>
     42 #include <cstring>
     43 using namespace llvm;
     44 using namespace object;
     45 
     46 static cl::opt<bool>
     47   UseDbg("g", cl::desc("Print line information from debug info if available"));
     48 
     49 static cl::opt<std::string>
     50   DSYMFile("dsym", cl::desc("Use .dSYM file for debug info"));
     51 
     52 static const Target *GetTarget(const MachOObjectFile *MachOObj) {
     53   // Figure out the target triple.
     54   if (TripleName.empty()) {
     55     llvm::Triple TT("unknown-unknown-unknown");
     56     TT.setArch(Triple::ArchType(MachOObj->getArch()));
     57     TripleName = TT.str();
     58   }
     59 
     60   // Get the target specific parser.
     61   std::string Error;
     62   const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
     63   if (TheTarget)
     64     return TheTarget;
     65 
     66   errs() << "llvm-objdump: error: unable to get target for '" << TripleName
     67          << "', see --version and --triple.\n";
     68   return 0;
     69 }
     70 
     71 struct SymbolSorter {
     72   bool operator()(const SymbolRef &A, const SymbolRef &B) {
     73     SymbolRef::Type AType, BType;
     74     A.getType(AType);
     75     B.getType(BType);
     76 
     77     uint64_t AAddr, BAddr;
     78     if (AType != SymbolRef::ST_Function)
     79       AAddr = 0;
     80     else
     81       A.getAddress(AAddr);
     82     if (BType != SymbolRef::ST_Function)
     83       BAddr = 0;
     84     else
     85       B.getAddress(BAddr);
     86     return AAddr < BAddr;
     87   }
     88 };
     89 
     90 // Types for the storted data in code table that is built before disassembly
     91 // and the predicate function to sort them.
     92 typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
     93 typedef std::vector<DiceTableEntry> DiceTable;
     94 typedef DiceTable::iterator dice_table_iterator;
     95 
     96 static bool
     97 compareDiceTableEntries(const DiceTableEntry i,
     98                         const DiceTableEntry j) {
     99   return i.first == j.first;
    100 }
    101 
    102 static void DumpDataInCode(const char *bytes, uint64_t Size,
    103                            unsigned short Kind) {
    104   uint64_t Value;
    105 
    106   switch (Kind) {
    107   case macho::Data:
    108     switch (Size) {
    109     case 4:
    110       Value = bytes[3] << 24 |
    111               bytes[2] << 16 |
    112               bytes[1] << 8 |
    113               bytes[0];
    114       outs() << "\t.long " << Value;
    115       break;
    116     case 2:
    117       Value = bytes[1] << 8 |
    118               bytes[0];
    119       outs() << "\t.short " << Value;
    120       break;
    121     case 1:
    122       Value = bytes[0];
    123       outs() << "\t.byte " << Value;
    124       break;
    125     }
    126     outs() << "\t@ KIND_DATA\n";
    127     break;
    128   case macho::JumpTable8:
    129     Value = bytes[0];
    130     outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8";
    131     break;
    132   case macho::JumpTable16:
    133     Value = bytes[1] << 8 |
    134             bytes[0];
    135     outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16";
    136     break;
    137   case macho::JumpTable32:
    138     Value = bytes[3] << 24 |
    139             bytes[2] << 16 |
    140             bytes[1] << 8 |
    141             bytes[0];
    142     outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32";
    143     break;
    144   default:
    145     outs() << "\t@ data in code kind = " << Kind << "\n";
    146     break;
    147   }
    148 }
    149 
    150 static void
    151 getSectionsAndSymbols(const macho::Header Header,
    152                       MachOObjectFile *MachOObj,
    153                       std::vector<SectionRef> &Sections,
    154                       std::vector<SymbolRef> &Symbols,
    155                       SmallVectorImpl<uint64_t> &FoundFns,
    156                       uint64_t &BaseSegmentAddress) {
    157   error_code ec;
    158   for (symbol_iterator SI = MachOObj->begin_symbols(),
    159        SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
    160     Symbols.push_back(*SI);
    161 
    162   for (section_iterator SI = MachOObj->begin_sections(),
    163        SE = MachOObj->end_sections(); SI != SE; SI.increment(ec)) {
    164     SectionRef SR = *SI;
    165     StringRef SectName;
    166     SR.getName(SectName);
    167     Sections.push_back(*SI);
    168   }
    169 
    170   MachOObjectFile::LoadCommandInfo Command =
    171     MachOObj->getFirstLoadCommandInfo();
    172   bool BaseSegmentAddressSet = false;
    173   for (unsigned i = 0; ; ++i) {
    174     if (Command.C.Type == macho::LCT_FunctionStarts) {
    175       // We found a function starts segment, parse the addresses for later
    176       // consumption.
    177       macho::LinkeditDataLoadCommand LLC =
    178         MachOObj->getLinkeditDataLoadCommand(Command);
    179 
    180       MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
    181     }
    182     else if (Command.C.Type == macho::LCT_Segment) {
    183       macho::SegmentLoadCommand SLC =
    184         MachOObj->getSegmentLoadCommand(Command);
    185       StringRef SegName = SLC.Name;
    186       if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
    187         BaseSegmentAddressSet = true;
    188         BaseSegmentAddress = SLC.VMAddress;
    189       }
    190     }
    191 
    192     if (i == Header.NumLoadCommands - 1)
    193       break;
    194     else
    195       Command = MachOObj->getNextLoadCommandInfo(Command);
    196   }
    197 }
    198 
    199 static void DisassembleInputMachO2(StringRef Filename,
    200                                    MachOObjectFile *MachOOF);
    201 
    202 void llvm::DisassembleInputMachO(StringRef Filename) {
    203   OwningPtr<MemoryBuffer> Buff;
    204 
    205   if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
    206     errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n";
    207     return;
    208   }
    209 
    210   OwningPtr<MachOObjectFile> MachOOF(static_cast<MachOObjectFile*>(
    211         ObjectFile::createMachOObjectFile(Buff.take())));
    212 
    213   DisassembleInputMachO2(Filename, MachOOF.get());
    214 }
    215 
    216 static void DisassembleInputMachO2(StringRef Filename,
    217                                    MachOObjectFile *MachOOF) {
    218   const Target *TheTarget = GetTarget(MachOOF);
    219   if (!TheTarget) {
    220     // GetTarget prints out stuff.
    221     return;
    222   }
    223   OwningPtr<const MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
    224   OwningPtr<MCInstrAnalysis>
    225     InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo.get()));
    226 
    227   // Set up disassembler.
    228   OwningPtr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
    229   OwningPtr<const MCAsmInfo> AsmInfo(
    230       TheTarget->createMCAsmInfo(*MRI, TripleName));
    231   OwningPtr<const MCSubtargetInfo>
    232     STI(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
    233   OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI));
    234   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
    235   OwningPtr<MCInstPrinter>
    236     IP(TheTarget->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *InstrInfo,
    237                                       *MRI, *STI));
    238 
    239   if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) {
    240     errs() << "error: couldn't initialize disassembler for target "
    241            << TripleName << '\n';
    242     return;
    243   }
    244 
    245   outs() << '\n' << Filename << ":\n\n";
    246 
    247   macho::Header Header = MachOOF->getHeader();
    248 
    249   // FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to
    250   // determine function locations will eventually go in MCObjectDisassembler.
    251   // FIXME: Using the -cfg command line option, this code used to be able to
    252   // annotate relocations with the referenced symbol's name, and if this was
    253   // inside a __[cf]string section, the data it points to. This is now replaced
    254   // by the upcoming MCSymbolizer, which needs the appropriate setup done above.
    255   std::vector<SectionRef> Sections;
    256   std::vector<SymbolRef> Symbols;
    257   SmallVector<uint64_t, 8> FoundFns;
    258   uint64_t BaseSegmentAddress;
    259 
    260   getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
    261                         BaseSegmentAddress);
    262 
    263   // Make a copy of the unsorted symbol list. FIXME: duplication
    264   std::vector<SymbolRef> UnsortedSymbols(Symbols);
    265   // Sort the symbols by address, just in case they didn't come in that way.
    266   std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
    267 
    268   // Build a data in code table that is sorted on by the address of each entry.
    269   uint64_t BaseAddress = 0;
    270   if (Header.FileType == macho::HFT_Object)
    271     Sections[0].getAddress(BaseAddress);
    272   else
    273     BaseAddress = BaseSegmentAddress;
    274   DiceTable Dices;
    275   error_code ec;
    276   for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
    277        DI != DE; DI.increment(ec)){
    278     uint32_t Offset;
    279     DI->getOffset(Offset);
    280     Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
    281   }
    282   array_pod_sort(Dices.begin(), Dices.end());
    283 
    284 #ifndef NDEBUG
    285   raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
    286 #else
    287   raw_ostream &DebugOut = nulls();
    288 #endif
    289 
    290   OwningPtr<DIContext> diContext;
    291   ObjectFile *DbgObj = MachOOF;
    292   // Try to find debug info and set up the DIContext for it.
    293   if (UseDbg) {
    294     // A separate DSym file path was specified, parse it as a macho file,
    295     // get the sections and supply it to the section name parsing machinery.
    296     if (!DSYMFile.empty()) {
    297       OwningPtr<MemoryBuffer> Buf;
    298       if (error_code ec = MemoryBuffer::getFileOrSTDIN(DSYMFile, Buf)) {
    299         errs() << "llvm-objdump: " << Filename << ": " << ec.message() << '\n';
    300         return;
    301       }
    302       DbgObj = ObjectFile::createMachOObjectFile(Buf.take());
    303     }
    304 
    305     // Setup the DIContext
    306     diContext.reset(DIContext::getDWARFContext(DbgObj));
    307   }
    308 
    309   for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
    310 
    311     bool SectIsText = false;
    312     Sections[SectIdx].isText(SectIsText);
    313     if (SectIsText == false)
    314       continue;
    315 
    316     StringRef SectName;
    317     if (Sections[SectIdx].getName(SectName) ||
    318         SectName != "__text")
    319       continue; // Skip non-text sections
    320 
    321     DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
    322 
    323     StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR);
    324     if (SegmentName != "__TEXT")
    325       continue;
    326 
    327     StringRef Bytes;
    328     Sections[SectIdx].getContents(Bytes);
    329     StringRefMemoryObject memoryObject(Bytes);
    330     bool symbolTableWorked = false;
    331 
    332     // Parse relocations.
    333     std::vector<std::pair<uint64_t, SymbolRef> > Relocs;
    334     error_code ec;
    335     for (relocation_iterator RI = Sections[SectIdx].begin_relocations(),
    336          RE = Sections[SectIdx].end_relocations(); RI != RE; RI.increment(ec)) {
    337       uint64_t RelocOffset, SectionAddress;
    338       RI->getOffset(RelocOffset);
    339       Sections[SectIdx].getAddress(SectionAddress);
    340       RelocOffset -= SectionAddress;
    341 
    342       symbol_iterator RelocSym = RI->getSymbol();
    343 
    344       Relocs.push_back(std::make_pair(RelocOffset, *RelocSym));
    345     }
    346     array_pod_sort(Relocs.begin(), Relocs.end());
    347 
    348     // Disassemble symbol by symbol.
    349     for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
    350       StringRef SymName;
    351       Symbols[SymIdx].getName(SymName);
    352 
    353       SymbolRef::Type ST;
    354       Symbols[SymIdx].getType(ST);
    355       if (ST != SymbolRef::ST_Function)
    356         continue;
    357 
    358       // Make sure the symbol is defined in this section.
    359       bool containsSym = false;
    360       Sections[SectIdx].containsSymbol(Symbols[SymIdx], containsSym);
    361       if (!containsSym)
    362         continue;
    363 
    364       // Start at the address of the symbol relative to the section's address.
    365       uint64_t SectionAddress = 0;
    366       uint64_t Start = 0;
    367       Sections[SectIdx].getAddress(SectionAddress);
    368       Symbols[SymIdx].getAddress(Start);
    369       Start -= SectionAddress;
    370 
    371       // Stop disassembling either at the beginning of the next symbol or at
    372       // the end of the section.
    373       bool containsNextSym = false;
    374       uint64_t NextSym = 0;
    375       uint64_t NextSymIdx = SymIdx+1;
    376       while (Symbols.size() > NextSymIdx) {
    377         SymbolRef::Type NextSymType;
    378         Symbols[NextSymIdx].getType(NextSymType);
    379         if (NextSymType == SymbolRef::ST_Function) {
    380           Sections[SectIdx].containsSymbol(Symbols[NextSymIdx],
    381                                            containsNextSym);
    382           Symbols[NextSymIdx].getAddress(NextSym);
    383           NextSym -= SectionAddress;
    384           break;
    385         }
    386         ++NextSymIdx;
    387       }
    388 
    389       uint64_t SectSize;
    390       Sections[SectIdx].getSize(SectSize);
    391       uint64_t End = containsNextSym ?  NextSym : SectSize;
    392       uint64_t Size;
    393 
    394       symbolTableWorked = true;
    395 
    396       outs() << SymName << ":\n";
    397       DILineInfo lastLine;
    398       for (uint64_t Index = Start; Index < End; Index += Size) {
    399         MCInst Inst;
    400 
    401         uint64_t SectAddress = 0;
    402         Sections[SectIdx].getAddress(SectAddress);
    403         outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
    404 
    405         // Check the data in code table here to see if this is data not an
    406         // instruction to be disassembled.
    407         DiceTable Dice;
    408         Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
    409         dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
    410                                               Dice.begin(), Dice.end(),
    411                                               compareDiceTableEntries);
    412         if (DTI != Dices.end()){
    413           uint16_t Length;
    414           DTI->second.getLength(Length);
    415           DumpBytes(StringRef(Bytes.data() + Index, Length));
    416           uint16_t Kind;
    417           DTI->second.getKind(Kind);
    418           DumpDataInCode(Bytes.data() + Index, Length, Kind);
    419           continue;
    420         }
    421 
    422         if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
    423                                    DebugOut, nulls())) {
    424           DumpBytes(StringRef(Bytes.data() + Index, Size));
    425           IP->printInst(&Inst, outs(), "");
    426 
    427           // Print debug info.
    428           if (diContext) {
    429             DILineInfo dli =
    430               diContext->getLineInfoForAddress(SectAddress + Index);
    431             // Print valid line info if it changed.
    432             if (dli != lastLine && dli.getLine() != 0)
    433               outs() << "\t## " << dli.getFileName() << ':'
    434                 << dli.getLine() << ':' << dli.getColumn();
    435             lastLine = dli;
    436           }
    437           outs() << "\n";
    438         } else {
    439           errs() << "llvm-objdump: warning: invalid instruction encoding\n";
    440           if (Size == 0)
    441             Size = 1; // skip illegible bytes
    442         }
    443       }
    444     }
    445     if (!symbolTableWorked) {
    446       // Reading the symbol table didn't work, disassemble the whole section.
    447       uint64_t SectAddress;
    448       Sections[SectIdx].getAddress(SectAddress);
    449       uint64_t SectSize;
    450       Sections[SectIdx].getSize(SectSize);
    451       uint64_t InstSize;
    452       for (uint64_t Index = 0; Index < SectSize; Index += InstSize) {
    453         MCInst Inst;
    454 
    455         if (DisAsm->getInstruction(Inst, InstSize, memoryObject, Index,
    456                                    DebugOut, nulls())) {
    457           outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
    458           DumpBytes(StringRef(Bytes.data() + Index, InstSize));
    459           IP->printInst(&Inst, outs(), "");
    460           outs() << "\n";
    461         } else {
    462           errs() << "llvm-objdump: warning: invalid instruction encoding\n";
    463           if (InstSize == 0)
    464             InstSize = 1; // skip illegible bytes
    465         }
    466       }
    467     }
    468   }
    469 }
    470