Home | History | Annotate | Download | only in llvm-objdump
      1 //===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the MachO-specific dumper for llvm-objdump.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm-objdump.h"
     15 #include "llvm/ADT/STLExtras.h"
     16 #include "llvm/ADT/StringExtras.h"
     17 #include "llvm/ADT/Triple.h"
     18 #include "llvm/DebugInfo/DIContext.h"
     19 #include "llvm/MC/MCAsmInfo.h"
     20 #include "llvm/MC/MCContext.h"
     21 #include "llvm/MC/MCDisassembler.h"
     22 #include "llvm/MC/MCInst.h"
     23 #include "llvm/MC/MCInstPrinter.h"
     24 #include "llvm/MC/MCInstrAnalysis.h"
     25 #include "llvm/MC/MCInstrDesc.h"
     26 #include "llvm/MC/MCInstrInfo.h"
     27 #include "llvm/MC/MCRegisterInfo.h"
     28 #include "llvm/MC/MCSubtargetInfo.h"
     29 #include "llvm/Object/MachO.h"
     30 #include "llvm/Support/Casting.h"
     31 #include "llvm/Support/CommandLine.h"
     32 #include "llvm/Support/Debug.h"
     33 #include "llvm/Support/Format.h"
     34 #include "llvm/Support/GraphWriter.h"
     35 #include "llvm/Support/MachO.h"
     36 #include "llvm/Support/MemoryBuffer.h"
     37 #include "llvm/Support/TargetRegistry.h"
     38 #include "llvm/Support/TargetSelect.h"
     39 #include "llvm/Support/raw_ostream.h"
     40 #include <algorithm>
     41 #include <cstring>
     42 #include <system_error>
     43 using namespace llvm;
     44 using namespace object;
     45 
     46 static cl::opt<bool>
     47   UseDbg("g", cl::desc("Print line information from debug info if available"));
     48 
     49 static cl::opt<std::string>
     50   DSYMFile("dsym", cl::desc("Use .dSYM file for debug info"));
     51 
     52 static const Target *GetTarget(const MachOObjectFile *MachOObj) {
     53   // Figure out the target triple.
     54   if (TripleName.empty()) {
     55     llvm::Triple TT("unknown-unknown-unknown");
     56     TT.setArch(Triple::ArchType(MachOObj->getArch()));
     57     TripleName = TT.str();
     58   }
     59 
     60   // Get the target specific parser.
     61   std::string Error;
     62   const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
     63   if (TheTarget)
     64     return TheTarget;
     65 
     66   errs() << "llvm-objdump: error: unable to get target for '" << TripleName
     67          << "', see --version and --triple.\n";
     68   return nullptr;
     69 }
     70 
     71 struct SymbolSorter {
     72   bool operator()(const SymbolRef &A, const SymbolRef &B) {
     73     SymbolRef::Type AType, BType;
     74     A.getType(AType);
     75     B.getType(BType);
     76 
     77     uint64_t AAddr, BAddr;
     78     if (AType != SymbolRef::ST_Function)
     79       AAddr = 0;
     80     else
     81       A.getAddress(AAddr);
     82     if (BType != SymbolRef::ST_Function)
     83       BAddr = 0;
     84     else
     85       B.getAddress(BAddr);
     86     return AAddr < BAddr;
     87   }
     88 };
     89 
     90 // Types for the storted data in code table that is built before disassembly
     91 // and the predicate function to sort them.
     92 typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
     93 typedef std::vector<DiceTableEntry> DiceTable;
     94 typedef DiceTable::iterator dice_table_iterator;
     95 
     96 static bool
     97 compareDiceTableEntries(const DiceTableEntry i,
     98                         const DiceTableEntry j) {
     99   return i.first == j.first;
    100 }
    101 
    102 static void DumpDataInCode(const char *bytes, uint64_t Size,
    103                            unsigned short Kind) {
    104   uint64_t Value;
    105 
    106   switch (Kind) {
    107   case MachO::DICE_KIND_DATA:
    108     switch (Size) {
    109     case 4:
    110       Value = bytes[3] << 24 |
    111               bytes[2] << 16 |
    112               bytes[1] << 8 |
    113               bytes[0];
    114       outs() << "\t.long " << Value;
    115       break;
    116     case 2:
    117       Value = bytes[1] << 8 |
    118               bytes[0];
    119       outs() << "\t.short " << Value;
    120       break;
    121     case 1:
    122       Value = bytes[0];
    123       outs() << "\t.byte " << Value;
    124       break;
    125     }
    126     outs() << "\t@ KIND_DATA\n";
    127     break;
    128   case MachO::DICE_KIND_JUMP_TABLE8:
    129     Value = bytes[0];
    130     outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8";
    131     break;
    132   case MachO::DICE_KIND_JUMP_TABLE16:
    133     Value = bytes[1] << 8 |
    134             bytes[0];
    135     outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16";
    136     break;
    137   case MachO::DICE_KIND_JUMP_TABLE32:
    138     Value = bytes[3] << 24 |
    139             bytes[2] << 16 |
    140             bytes[1] << 8 |
    141             bytes[0];
    142     outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32";
    143     break;
    144   default:
    145     outs() << "\t@ data in code kind = " << Kind << "\n";
    146     break;
    147   }
    148 }
    149 
    150 static void getSectionsAndSymbols(const MachO::mach_header Header,
    151                                   MachOObjectFile *MachOObj,
    152                                   std::vector<SectionRef> &Sections,
    153                                   std::vector<SymbolRef> &Symbols,
    154                                   SmallVectorImpl<uint64_t> &FoundFns,
    155                                   uint64_t &BaseSegmentAddress) {
    156   for (const SymbolRef &Symbol : MachOObj->symbols())
    157     Symbols.push_back(Symbol);
    158 
    159   for (const SectionRef &Section : MachOObj->sections()) {
    160     StringRef SectName;
    161     Section.getName(SectName);
    162     Sections.push_back(Section);
    163   }
    164 
    165   MachOObjectFile::LoadCommandInfo Command =
    166       MachOObj->getFirstLoadCommandInfo();
    167   bool BaseSegmentAddressSet = false;
    168   for (unsigned i = 0; ; ++i) {
    169     if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
    170       // We found a function starts segment, parse the addresses for later
    171       // consumption.
    172       MachO::linkedit_data_command LLC =
    173         MachOObj->getLinkeditDataLoadCommand(Command);
    174 
    175       MachOObj->ReadULEB128s(LLC.dataoff, FoundFns);
    176     }
    177     else if (Command.C.cmd == MachO::LC_SEGMENT) {
    178       MachO::segment_command SLC =
    179         MachOObj->getSegmentLoadCommand(Command);
    180       StringRef SegName = SLC.segname;
    181       if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
    182         BaseSegmentAddressSet = true;
    183         BaseSegmentAddress = SLC.vmaddr;
    184       }
    185     }
    186 
    187     if (i == Header.ncmds - 1)
    188       break;
    189     else
    190       Command = MachOObj->getNextLoadCommandInfo(Command);
    191   }
    192 }
    193 
    194 static void DisassembleInputMachO2(StringRef Filename,
    195                                    MachOObjectFile *MachOOF);
    196 
    197 void llvm::DisassembleInputMachO(StringRef Filename) {
    198   ErrorOr<std::unique_ptr<MemoryBuffer>> Buff =
    199       MemoryBuffer::getFileOrSTDIN(Filename);
    200   if (std::error_code EC = Buff.getError()) {
    201     errs() << "llvm-objdump: " << Filename << ": " << EC.message() << "\n";
    202     return;
    203   }
    204 
    205   std::unique_ptr<MachOObjectFile> MachOOF(static_cast<MachOObjectFile *>(
    206       ObjectFile::createMachOObjectFile(Buff.get()).get()));
    207 
    208   DisassembleInputMachO2(Filename, MachOOF.get());
    209 }
    210 
    211 static void DisassembleInputMachO2(StringRef Filename,
    212                                    MachOObjectFile *MachOOF) {
    213   const Target *TheTarget = GetTarget(MachOOF);
    214   if (!TheTarget) {
    215     // GetTarget prints out stuff.
    216     return;
    217   }
    218   std::unique_ptr<const MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
    219   std::unique_ptr<MCInstrAnalysis> InstrAnalysis(
    220       TheTarget->createMCInstrAnalysis(InstrInfo.get()));
    221 
    222   // Set up disassembler.
    223   std::unique_ptr<const MCRegisterInfo> MRI(
    224       TheTarget->createMCRegInfo(TripleName));
    225   std::unique_ptr<const MCAsmInfo> AsmInfo(
    226       TheTarget->createMCAsmInfo(*MRI, TripleName));
    227   std::unique_ptr<const MCSubtargetInfo> STI(
    228       TheTarget->createMCSubtargetInfo(TripleName, "", ""));
    229   MCContext Ctx(AsmInfo.get(), MRI.get(), nullptr);
    230   std::unique_ptr<const MCDisassembler> DisAsm(
    231     TheTarget->createMCDisassembler(*STI, Ctx));
    232   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
    233   std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
    234       AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI, *STI));
    235 
    236   if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) {
    237     errs() << "error: couldn't initialize disassembler for target "
    238            << TripleName << '\n';
    239     return;
    240   }
    241 
    242   outs() << '\n' << Filename << ":\n\n";
    243 
    244   MachO::mach_header Header = MachOOF->getHeader();
    245 
    246   // FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to
    247   // determine function locations will eventually go in MCObjectDisassembler.
    248   // FIXME: Using the -cfg command line option, this code used to be able to
    249   // annotate relocations with the referenced symbol's name, and if this was
    250   // inside a __[cf]string section, the data it points to. This is now replaced
    251   // by the upcoming MCSymbolizer, which needs the appropriate setup done above.
    252   std::vector<SectionRef> Sections;
    253   std::vector<SymbolRef> Symbols;
    254   SmallVector<uint64_t, 8> FoundFns;
    255   uint64_t BaseSegmentAddress;
    256 
    257   getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
    258                         BaseSegmentAddress);
    259 
    260   // Sort the symbols by address, just in case they didn't come in that way.
    261   std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
    262 
    263   // Build a data in code table that is sorted on by the address of each entry.
    264   uint64_t BaseAddress = 0;
    265   if (Header.filetype == MachO::MH_OBJECT)
    266     Sections[0].getAddress(BaseAddress);
    267   else
    268     BaseAddress = BaseSegmentAddress;
    269   DiceTable Dices;
    270   for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
    271        DI != DE; ++DI) {
    272     uint32_t Offset;
    273     DI->getOffset(Offset);
    274     Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
    275   }
    276   array_pod_sort(Dices.begin(), Dices.end());
    277 
    278 #ifndef NDEBUG
    279   raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
    280 #else
    281   raw_ostream &DebugOut = nulls();
    282 #endif
    283 
    284   std::unique_ptr<DIContext> diContext;
    285   ObjectFile *DbgObj = MachOOF;
    286   // Try to find debug info and set up the DIContext for it.
    287   if (UseDbg) {
    288     // A separate DSym file path was specified, parse it as a macho file,
    289     // get the sections and supply it to the section name parsing machinery.
    290     if (!DSYMFile.empty()) {
    291       ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
    292           MemoryBuffer::getFileOrSTDIN(DSYMFile);
    293       if (std::error_code EC = Buf.getError()) {
    294         errs() << "llvm-objdump: " << Filename << ": " << EC.message() << '\n';
    295         return;
    296       }
    297       DbgObj = ObjectFile::createMachOObjectFile(Buf.get()).get();
    298     }
    299 
    300     // Setup the DIContext
    301     diContext.reset(DIContext::getDWARFContext(DbgObj));
    302   }
    303 
    304   for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
    305 
    306     bool SectIsText = false;
    307     Sections[SectIdx].isText(SectIsText);
    308     if (SectIsText == false)
    309       continue;
    310 
    311     StringRef SectName;
    312     if (Sections[SectIdx].getName(SectName) ||
    313         SectName != "__text")
    314       continue; // Skip non-text sections
    315 
    316     DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
    317 
    318     StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR);
    319     if (SegmentName != "__TEXT")
    320       continue;
    321 
    322     StringRef Bytes;
    323     Sections[SectIdx].getContents(Bytes);
    324     StringRefMemoryObject memoryObject(Bytes);
    325     bool symbolTableWorked = false;
    326 
    327     // Parse relocations.
    328     std::vector<std::pair<uint64_t, SymbolRef>> Relocs;
    329     for (const RelocationRef &Reloc : Sections[SectIdx].relocations()) {
    330       uint64_t RelocOffset, SectionAddress;
    331       Reloc.getOffset(RelocOffset);
    332       Sections[SectIdx].getAddress(SectionAddress);
    333       RelocOffset -= SectionAddress;
    334 
    335       symbol_iterator RelocSym = Reloc.getSymbol();
    336 
    337       Relocs.push_back(std::make_pair(RelocOffset, *RelocSym));
    338     }
    339     array_pod_sort(Relocs.begin(), Relocs.end());
    340 
    341     // Disassemble symbol by symbol.
    342     for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
    343       StringRef SymName;
    344       Symbols[SymIdx].getName(SymName);
    345 
    346       SymbolRef::Type ST;
    347       Symbols[SymIdx].getType(ST);
    348       if (ST != SymbolRef::ST_Function)
    349         continue;
    350 
    351       // Make sure the symbol is defined in this section.
    352       bool containsSym = false;
    353       Sections[SectIdx].containsSymbol(Symbols[SymIdx], containsSym);
    354       if (!containsSym)
    355         continue;
    356 
    357       // Start at the address of the symbol relative to the section's address.
    358       uint64_t SectionAddress = 0;
    359       uint64_t Start = 0;
    360       Sections[SectIdx].getAddress(SectionAddress);
    361       Symbols[SymIdx].getAddress(Start);
    362       Start -= SectionAddress;
    363 
    364       // Stop disassembling either at the beginning of the next symbol or at
    365       // the end of the section.
    366       bool containsNextSym = false;
    367       uint64_t NextSym = 0;
    368       uint64_t NextSymIdx = SymIdx+1;
    369       while (Symbols.size() > NextSymIdx) {
    370         SymbolRef::Type NextSymType;
    371         Symbols[NextSymIdx].getType(NextSymType);
    372         if (NextSymType == SymbolRef::ST_Function) {
    373           Sections[SectIdx].containsSymbol(Symbols[NextSymIdx],
    374                                            containsNextSym);
    375           Symbols[NextSymIdx].getAddress(NextSym);
    376           NextSym -= SectionAddress;
    377           break;
    378         }
    379         ++NextSymIdx;
    380       }
    381 
    382       uint64_t SectSize;
    383       Sections[SectIdx].getSize(SectSize);
    384       uint64_t End = containsNextSym ?  NextSym : SectSize;
    385       uint64_t Size;
    386 
    387       symbolTableWorked = true;
    388 
    389       outs() << SymName << ":\n";
    390       DILineInfo lastLine;
    391       for (uint64_t Index = Start; Index < End; Index += Size) {
    392         MCInst Inst;
    393 
    394         uint64_t SectAddress = 0;
    395         Sections[SectIdx].getAddress(SectAddress);
    396         outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
    397 
    398         // Check the data in code table here to see if this is data not an
    399         // instruction to be disassembled.
    400         DiceTable Dice;
    401         Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
    402         dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
    403                                               Dice.begin(), Dice.end(),
    404                                               compareDiceTableEntries);
    405         if (DTI != Dices.end()){
    406           uint16_t Length;
    407           DTI->second.getLength(Length);
    408           DumpBytes(StringRef(Bytes.data() + Index, Length));
    409           uint16_t Kind;
    410           DTI->second.getKind(Kind);
    411           DumpDataInCode(Bytes.data() + Index, Length, Kind);
    412           continue;
    413         }
    414 
    415         if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
    416                                    DebugOut, nulls())) {
    417           DumpBytes(StringRef(Bytes.data() + Index, Size));
    418           IP->printInst(&Inst, outs(), "");
    419 
    420           // Print debug info.
    421           if (diContext) {
    422             DILineInfo dli =
    423               diContext->getLineInfoForAddress(SectAddress + Index);
    424             // Print valid line info if it changed.
    425             if (dli != lastLine && dli.Line != 0)
    426               outs() << "\t## " << dli.FileName << ':' << dli.Line << ':'
    427                      << dli.Column;
    428             lastLine = dli;
    429           }
    430           outs() << "\n";
    431         } else {
    432           errs() << "llvm-objdump: warning: invalid instruction encoding\n";
    433           if (Size == 0)
    434             Size = 1; // skip illegible bytes
    435         }
    436       }
    437     }
    438     if (!symbolTableWorked) {
    439       // Reading the symbol table didn't work, disassemble the whole section.
    440       uint64_t SectAddress;
    441       Sections[SectIdx].getAddress(SectAddress);
    442       uint64_t SectSize;
    443       Sections[SectIdx].getSize(SectSize);
    444       uint64_t InstSize;
    445       for (uint64_t Index = 0; Index < SectSize; Index += InstSize) {
    446         MCInst Inst;
    447 
    448         if (DisAsm->getInstruction(Inst, InstSize, memoryObject, Index,
    449                                    DebugOut, nulls())) {
    450           outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
    451           DumpBytes(StringRef(Bytes.data() + Index, InstSize));
    452           IP->printInst(&Inst, outs(), "");
    453           outs() << "\n";
    454         } else {
    455           errs() << "llvm-objdump: warning: invalid instruction encoding\n";
    456           if (InstSize == 0)
    457             InstSize = 1; // skip illegible bytes
    458         }
    459       }
    460     }
    461   }
    462 }
    463