Home | History | Annotate | Download | only in dsymutil
      1 //===-- MachOUtils.h - Mach-o specific helpers for dsymutil  --------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "MachOUtils.h"
     11 #include "BinaryHolder.h"
     12 #include "DebugMap.h"
     13 #include "dsymutil.h"
     14 #include "NonRelocatableStringpool.h"
     15 #include "llvm/MC/MCSectionMachO.h"
     16 #include "llvm/MC/MCAsmLayout.h"
     17 #include "llvm/MC/MCSectionMachO.h"
     18 #include "llvm/MC/MCObjectStreamer.h"
     19 #include "llvm/MC/MCMachObjectWriter.h"
     20 #include "llvm/MC/MCStreamer.h"
     21 #include "llvm/Object/MachO.h"
     22 #include "llvm/Support/FileUtilities.h"
     23 #include "llvm/Support/Program.h"
     24 #include "llvm/Support/raw_ostream.h"
     25 
     26 namespace llvm {
     27 namespace dsymutil {
     28 namespace MachOUtils {
     29 
     30 std::string getArchName(StringRef Arch) {
     31   if (Arch.startswith("thumb"))
     32     return (llvm::Twine("arm") + Arch.drop_front(5)).str();
     33   return Arch;
     34 }
     35 
     36 static bool runLipo(StringRef SDKPath, SmallVectorImpl<const char *> &Args) {
     37   auto Path = sys::findProgramByName("lipo", makeArrayRef(SDKPath));
     38   if (!Path)
     39     Path = sys::findProgramByName("lipo");
     40 
     41   if (!Path) {
     42     errs() << "error: lipo: " << Path.getError().message() << "\n";
     43     return false;
     44   }
     45 
     46   std::string ErrMsg;
     47   int result =
     48       sys::ExecuteAndWait(*Path, Args.data(), nullptr, nullptr, 0, 0, &ErrMsg);
     49   if (result) {
     50     errs() << "error: lipo: " << ErrMsg << "\n";
     51     return false;
     52   }
     53 
     54   return true;
     55 }
     56 
     57 bool generateUniversalBinary(SmallVectorImpl<ArchAndFilename> &ArchFiles,
     58                              StringRef OutputFileName,
     59                              const LinkOptions &Options, StringRef SDKPath) {
     60   // No need to merge one file into a universal fat binary. First, try
     61   // to move it (rename) to the final location. If that fails because
     62   // of cross-device link issues then copy and delete.
     63   if (ArchFiles.size() == 1) {
     64     StringRef From(ArchFiles.front().Path);
     65     if (sys::fs::rename(From, OutputFileName)) {
     66       if (std::error_code EC = sys::fs::copy_file(From, OutputFileName)) {
     67         errs() << "error: while copying " << From << " to " << OutputFileName
     68                << ": " << EC.message() << "\n";
     69         return false;
     70       }
     71       sys::fs::remove(From);
     72     }
     73     return true;
     74   }
     75 
     76   SmallVector<const char *, 8> Args;
     77   Args.push_back("lipo");
     78   Args.push_back("-create");
     79 
     80   for (auto &Thin : ArchFiles)
     81     Args.push_back(Thin.Path.c_str());
     82 
     83   // Align segments to match dsymutil-classic alignment
     84   for (auto &Thin : ArchFiles) {
     85     Thin.Arch = getArchName(Thin.Arch);
     86     Args.push_back("-segalign");
     87     Args.push_back(Thin.Arch.c_str());
     88     Args.push_back("20");
     89   }
     90 
     91   Args.push_back("-output");
     92   Args.push_back(OutputFileName.data());
     93   Args.push_back(nullptr);
     94 
     95   if (Options.Verbose) {
     96     outs() << "Running lipo\n";
     97     for (auto Arg : Args)
     98       outs() << ' ' << ((Arg == nullptr) ? "\n" : Arg);
     99   }
    100 
    101   return Options.NoOutput ? true : runLipo(SDKPath, Args);
    102 }
    103 
    104 // Return a MachO::segment_command_64 that holds the same values as
    105 // the passed MachO::segment_command. We do that to avoid having to
    106 // duplicat the logic for 32bits and 64bits segments.
    107 struct MachO::segment_command_64 adaptFrom32bits(MachO::segment_command Seg) {
    108   MachO::segment_command_64 Seg64;
    109   Seg64.cmd = Seg.cmd;
    110   Seg64.cmdsize = Seg.cmdsize;
    111   memcpy(Seg64.segname, Seg.segname, sizeof(Seg.segname));
    112   Seg64.vmaddr = Seg.vmaddr;
    113   Seg64.vmsize = Seg.vmsize;
    114   Seg64.fileoff = Seg.fileoff;
    115   Seg64.filesize = Seg.filesize;
    116   Seg64.maxprot = Seg.maxprot;
    117   Seg64.initprot = Seg.initprot;
    118   Seg64.nsects = Seg.nsects;
    119   Seg64.flags = Seg.flags;
    120   return Seg64;
    121 }
    122 
    123 // Iterate on all \a Obj segments, and apply \a Handler to them.
    124 template <typename FunctionTy>
    125 static void iterateOnSegments(const object::MachOObjectFile &Obj,
    126                               FunctionTy Handler) {
    127   for (const auto &LCI : Obj.load_commands()) {
    128     MachO::segment_command_64 Segment;
    129     if (LCI.C.cmd == MachO::LC_SEGMENT)
    130       Segment = adaptFrom32bits(Obj.getSegmentLoadCommand(LCI));
    131     else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
    132       Segment = Obj.getSegment64LoadCommand(LCI);
    133     else
    134       continue;
    135 
    136     Handler(Segment);
    137   }
    138 }
    139 
    140 // Transfer the symbols described by \a NList to \a NewSymtab which is
    141 // just the raw contents of the symbol table for the dSYM companion file.
    142 // \returns whether the symbol was tranfered or not.
    143 template <typename NListTy>
    144 static bool transferSymbol(NListTy NList, bool IsLittleEndian,
    145                            StringRef Strings, SmallVectorImpl<char> &NewSymtab,
    146                            NonRelocatableStringpool &NewStrings,
    147                            bool &InDebugNote) {
    148   // Do not transfer undefined symbols, we want real addresses.
    149   if ((NList.n_type & MachO::N_TYPE) == MachO::N_UNDF)
    150     return false;
    151 
    152   StringRef Name = StringRef(Strings.begin() + NList.n_strx);
    153   if (InDebugNote) {
    154     InDebugNote =
    155         (NList.n_type != MachO::N_SO) || (!Name.empty() && Name[0] != '\0');
    156     return false;
    157   } else if (NList.n_type == MachO::N_SO) {
    158     InDebugNote = true;
    159     return false;
    160   }
    161 
    162   // FIXME: The + 1 is here to mimic dsymutil-classic that has 2 empty
    163   // strings at the start of the generated string table (There is
    164   // corresponding code in the string table emission).
    165   NList.n_strx = NewStrings.getStringOffset(Name) + 1;
    166   if (IsLittleEndian != sys::IsLittleEndianHost)
    167     MachO::swapStruct(NList);
    168 
    169   NewSymtab.append(reinterpret_cast<char *>(&NList),
    170                    reinterpret_cast<char *>(&NList + 1));
    171   return true;
    172 }
    173 
    174 // Wrapper around transferSymbol to transfer all of \a Obj symbols
    175 // to \a NewSymtab. This function does not write in the output file.
    176 // \returns the number of symbols in \a NewSymtab.
    177 static unsigned transferSymbols(const object::MachOObjectFile &Obj,
    178                                 SmallVectorImpl<char> &NewSymtab,
    179                                 NonRelocatableStringpool &NewStrings) {
    180   unsigned Syms = 0;
    181   StringRef Strings = Obj.getStringTableData();
    182   bool IsLittleEndian = Obj.isLittleEndian();
    183   bool InDebugNote = false;
    184 
    185   if (Obj.is64Bit()) {
    186     for (const object::SymbolRef &Symbol : Obj.symbols()) {
    187       object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
    188       if (transferSymbol(Obj.getSymbol64TableEntry(DRI), IsLittleEndian,
    189                          Strings, NewSymtab, NewStrings, InDebugNote))
    190         ++Syms;
    191     }
    192   } else {
    193     for (const object::SymbolRef &Symbol : Obj.symbols()) {
    194       object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
    195       if (transferSymbol(Obj.getSymbolTableEntry(DRI), IsLittleEndian, Strings,
    196                          NewSymtab, NewStrings, InDebugNote))
    197         ++Syms;
    198     }
    199   }
    200   return Syms;
    201 }
    202 
    203 static MachO::section
    204 getSection(const object::MachOObjectFile &Obj,
    205            const MachO::segment_command &Seg,
    206            const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
    207   return Obj.getSection(LCI, Idx);
    208 }
    209 
    210 static MachO::section_64
    211 getSection(const object::MachOObjectFile &Obj,
    212            const MachO::segment_command_64 &Seg,
    213            const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
    214   return Obj.getSection64(LCI, Idx);
    215 }
    216 
    217 // Transfer \a Segment from \a Obj to the output file. This calls into \a Writer
    218 // to write these load commands directly in the output file at the current
    219 // position.
    220 // The function also tries to find a hole in the address map to fit the __DWARF
    221 // segment of \a DwarfSegmentSize size. \a EndAddress is updated to point at the
    222 // highest segment address.
    223 // When the __LINKEDIT segment is transfered, its offset and size are set resp.
    224 // to \a LinkeditOffset and \a LinkeditSize.
    225 template <typename SegmentTy>
    226 static void transferSegmentAndSections(
    227     const object::MachOObjectFile::LoadCommandInfo &LCI, SegmentTy Segment,
    228     const object::MachOObjectFile &Obj, MCObjectWriter &Writer,
    229     uint64_t LinkeditOffset, uint64_t LinkeditSize, uint64_t DwarfSegmentSize,
    230     uint64_t &GapForDwarf, uint64_t &EndAddress) {
    231   if (StringRef("__DWARF") == Segment.segname)
    232     return;
    233 
    234   Segment.fileoff = Segment.filesize = 0;
    235 
    236   if (StringRef("__LINKEDIT") == Segment.segname) {
    237     Segment.fileoff = LinkeditOffset;
    238     Segment.filesize = LinkeditSize;
    239   }
    240 
    241   // Check if the end address of the last segment and our current
    242   // start address leave a sufficient gap to store the __DWARF
    243   // segment.
    244   uint64_t PrevEndAddress = EndAddress;
    245   EndAddress = alignTo(EndAddress, 0x1000);
    246   if (GapForDwarf == UINT64_MAX && Segment.vmaddr > EndAddress &&
    247       Segment.vmaddr - EndAddress >= DwarfSegmentSize)
    248     GapForDwarf = EndAddress;
    249 
    250   // The segments are not necessarily sorted by their vmaddr.
    251   EndAddress =
    252       std::max<uint64_t>(PrevEndAddress, Segment.vmaddr + Segment.vmsize);
    253   unsigned nsects = Segment.nsects;
    254   if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
    255     MachO::swapStruct(Segment);
    256   Writer.writeBytes(
    257       StringRef(reinterpret_cast<char *>(&Segment), sizeof(Segment)));
    258   for (unsigned i = 0; i < nsects; ++i) {
    259     auto Sect = getSection(Obj, Segment, LCI, i);
    260     Sect.offset = Sect.reloff = Sect.nreloc = 0;
    261     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
    262       MachO::swapStruct(Sect);
    263     Writer.writeBytes(StringRef(reinterpret_cast<char *>(&Sect), sizeof(Sect)));
    264   }
    265 }
    266 
    267 // Write the __DWARF segment load command to the output file.
    268 static void createDwarfSegment(uint64_t VMAddr, uint64_t FileOffset,
    269                                uint64_t FileSize, unsigned NumSections,
    270                                MCAsmLayout &Layout, MachObjectWriter &Writer) {
    271   Writer.writeSegmentLoadCommand("__DWARF", NumSections, VMAddr,
    272                                  alignTo(FileSize, 0x1000), FileOffset,
    273                                  FileSize, /* MaxProt */ 7,
    274                                  /* InitProt =*/3);
    275 
    276   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
    277     MCSection *Sec = Layout.getSectionOrder()[i];
    278     if (Sec->begin() == Sec->end() || !Layout.getSectionFileSize(Sec))
    279       continue;
    280 
    281     unsigned Align = Sec->getAlignment();
    282     if (Align > 1) {
    283       VMAddr = alignTo(VMAddr, Align);
    284       FileOffset = alignTo(FileOffset, Align);
    285     }
    286     Writer.writeSection(Layout, *Sec, VMAddr, FileOffset, 0, 0, 0);
    287 
    288     FileOffset += Layout.getSectionAddressSize(Sec);
    289     VMAddr += Layout.getSectionAddressSize(Sec);
    290   }
    291 }
    292 
    293 static bool isExecutable(const object::MachOObjectFile &Obj) {
    294   if (Obj.is64Bit())
    295     return Obj.getHeader64().filetype != MachO::MH_OBJECT;
    296   else
    297     return Obj.getHeader().filetype != MachO::MH_OBJECT;
    298 }
    299 
    300 static bool hasLinkEditSegment(const object::MachOObjectFile &Obj) {
    301   bool HasLinkEditSegment = false;
    302   iterateOnSegments(Obj, [&](const MachO::segment_command_64 &Segment) {
    303     if (StringRef("__LINKEDIT") == Segment.segname)
    304       HasLinkEditSegment = true;
    305   });
    306   return HasLinkEditSegment;
    307 }
    308 
    309 static unsigned segmentLoadCommandSize(bool Is64Bit, unsigned NumSections) {
    310   if (Is64Bit)
    311     return sizeof(MachO::segment_command_64) +
    312            NumSections * sizeof(MachO::section_64);
    313 
    314   return sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
    315 }
    316 
    317 // Stream a dSYM companion binary file corresponding to the binary referenced
    318 // by \a DM to \a OutFile. The passed \a MS MCStreamer is setup to write to
    319 // \a OutFile and it must be using a MachObjectWriter object to do so.
    320 bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS,
    321                            raw_fd_ostream &OutFile) {
    322   auto &ObjectStreamer = static_cast<MCObjectStreamer &>(MS);
    323   MCAssembler &MCAsm = ObjectStreamer.getAssembler();
    324   auto &Writer = static_cast<MachObjectWriter &>(MCAsm.getWriter());
    325   MCAsmLayout Layout(MCAsm);
    326 
    327   MCAsm.layout(Layout);
    328 
    329   BinaryHolder InputBinaryHolder(false);
    330   auto ErrOrObjs = InputBinaryHolder.GetObjectFiles(DM.getBinaryPath());
    331   if (auto Error = ErrOrObjs.getError())
    332     return error(Twine("opening ") + DM.getBinaryPath() + ": " +
    333                      Error.message(),
    334                  "output file streaming");
    335 
    336   auto ErrOrInputBinary =
    337       InputBinaryHolder.GetAs<object::MachOObjectFile>(DM.getTriple());
    338   if (auto Error = ErrOrInputBinary.getError())
    339     return error(Twine("opening ") + DM.getBinaryPath() + ": " +
    340                      Error.message(),
    341                  "output file streaming");
    342   auto &InputBinary = *ErrOrInputBinary;
    343 
    344   bool Is64Bit = Writer.is64Bit();
    345   MachO::symtab_command SymtabCmd = InputBinary.getSymtabLoadCommand();
    346 
    347   // Get UUID.
    348   MachO::uuid_command UUIDCmd;
    349   memset(&UUIDCmd, 0, sizeof(UUIDCmd));
    350   UUIDCmd.cmd = MachO::LC_UUID;
    351   UUIDCmd.cmdsize = sizeof(MachO::uuid_command);
    352   for (auto &LCI : InputBinary.load_commands()) {
    353     if (LCI.C.cmd == MachO::LC_UUID) {
    354       UUIDCmd = InputBinary.getUuidCommand(LCI);
    355       break;
    356     }
    357   }
    358 
    359   // Compute the number of load commands we will need.
    360   unsigned LoadCommandSize = 0;
    361   unsigned NumLoadCommands = 0;
    362   // We will copy the UUID if there is one.
    363   if (UUIDCmd.cmd != 0) {
    364     ++NumLoadCommands;
    365     LoadCommandSize += sizeof(MachO::uuid_command);
    366   }
    367 
    368   // If we have a valid symtab to copy, do it.
    369   bool ShouldEmitSymtab =
    370       isExecutable(InputBinary) && hasLinkEditSegment(InputBinary);
    371   if (ShouldEmitSymtab) {
    372     LoadCommandSize += sizeof(MachO::symtab_command);
    373     ++NumLoadCommands;
    374   }
    375 
    376   unsigned HeaderSize =
    377       Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
    378   // We will copy every segment that isn't __DWARF.
    379   iterateOnSegments(InputBinary, [&](const MachO::segment_command_64 &Segment) {
    380     if (StringRef("__DWARF") == Segment.segname)
    381       return;
    382 
    383     ++NumLoadCommands;
    384     LoadCommandSize += segmentLoadCommandSize(Is64Bit, Segment.nsects);
    385   });
    386 
    387   // We will add our own brand new __DWARF segment if we have debug
    388   // info.
    389   unsigned NumDwarfSections = 0;
    390   uint64_t DwarfSegmentSize = 0;
    391 
    392   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
    393     MCSection *Sec = Layout.getSectionOrder()[i];
    394     if (Sec->begin() == Sec->end())
    395       continue;
    396 
    397     if (uint64_t Size = Layout.getSectionFileSize(Sec)) {
    398       DwarfSegmentSize = alignTo(DwarfSegmentSize, Sec->getAlignment());
    399       DwarfSegmentSize += Size;
    400       ++NumDwarfSections;
    401     }
    402   }
    403 
    404   if (NumDwarfSections) {
    405     ++NumLoadCommands;
    406     LoadCommandSize += segmentLoadCommandSize(Is64Bit, NumDwarfSections);
    407   }
    408 
    409   SmallString<0> NewSymtab;
    410   NonRelocatableStringpool NewStrings;
    411   unsigned NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
    412   unsigned NumSyms = 0;
    413   uint64_t NewStringsSize = 0;
    414   if (ShouldEmitSymtab) {
    415     NewSymtab.reserve(SymtabCmd.nsyms * NListSize / 2);
    416     NumSyms = transferSymbols(InputBinary, NewSymtab, NewStrings);
    417     NewStringsSize = NewStrings.getSize() + 1;
    418   }
    419 
    420   uint64_t SymtabStart = LoadCommandSize;
    421   SymtabStart += HeaderSize;
    422   SymtabStart = alignTo(SymtabStart, 0x1000);
    423 
    424   // We gathered all the information we need, start emitting the output file.
    425   Writer.writeHeader(MachO::MH_DSYM, NumLoadCommands, LoadCommandSize, false);
    426 
    427   // Write the load commands.
    428   assert(OutFile.tell() == HeaderSize);
    429   if (UUIDCmd.cmd != 0) {
    430     Writer.write32(UUIDCmd.cmd);
    431     Writer.write32(UUIDCmd.cmdsize);
    432     Writer.writeBytes(
    433         StringRef(reinterpret_cast<const char *>(UUIDCmd.uuid), 16));
    434     assert(OutFile.tell() == HeaderSize + sizeof(UUIDCmd));
    435   }
    436 
    437   assert(SymtabCmd.cmd && "No symbol table.");
    438   uint64_t StringStart = SymtabStart + NumSyms * NListSize;
    439   if (ShouldEmitSymtab)
    440     Writer.writeSymtabLoadCommand(SymtabStart, NumSyms, StringStart,
    441                                   NewStringsSize);
    442 
    443   uint64_t DwarfSegmentStart = StringStart + NewStringsSize;
    444   DwarfSegmentStart = alignTo(DwarfSegmentStart, 0x1000);
    445 
    446   // Write the load commands for the segments and sections we 'import' from
    447   // the original binary.
    448   uint64_t EndAddress = 0;
    449   uint64_t GapForDwarf = UINT64_MAX;
    450   for (auto &LCI : InputBinary.load_commands()) {
    451     if (LCI.C.cmd == MachO::LC_SEGMENT)
    452       transferSegmentAndSections(LCI, InputBinary.getSegmentLoadCommand(LCI),
    453                                  InputBinary, Writer, SymtabStart,
    454                                  StringStart + NewStringsSize - SymtabStart,
    455                                  DwarfSegmentSize, GapForDwarf, EndAddress);
    456     else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
    457       transferSegmentAndSections(LCI, InputBinary.getSegment64LoadCommand(LCI),
    458                                  InputBinary, Writer, SymtabStart,
    459                                  StringStart + NewStringsSize - SymtabStart,
    460                                  DwarfSegmentSize, GapForDwarf, EndAddress);
    461   }
    462 
    463   uint64_t DwarfVMAddr = alignTo(EndAddress, 0x1000);
    464   uint64_t DwarfVMMax = Is64Bit ? UINT64_MAX : UINT32_MAX;
    465   if (DwarfVMAddr + DwarfSegmentSize > DwarfVMMax ||
    466       DwarfVMAddr + DwarfSegmentSize < DwarfVMAddr /* Overflow */) {
    467     // There is no room for the __DWARF segment at the end of the
    468     // address space. Look trhough segments to find a gap.
    469     DwarfVMAddr = GapForDwarf;
    470     if (DwarfVMAddr == UINT64_MAX)
    471       warn("not enough VM space for the __DWARF segment.",
    472            "output file streaming");
    473   }
    474 
    475   // Write the load command for the __DWARF segment.
    476   createDwarfSegment(DwarfVMAddr, DwarfSegmentStart, DwarfSegmentSize,
    477                      NumDwarfSections, Layout, Writer);
    478 
    479   assert(OutFile.tell() == LoadCommandSize + HeaderSize);
    480   Writer.WriteZeros(SymtabStart - (LoadCommandSize + HeaderSize));
    481   assert(OutFile.tell() == SymtabStart);
    482 
    483   // Transfer symbols.
    484   if (ShouldEmitSymtab) {
    485     Writer.writeBytes(NewSymtab.str());
    486     assert(OutFile.tell() == StringStart);
    487 
    488     // Transfer string table.
    489     // FIXME: The NonRelocatableStringpool starts with an empty string, but
    490     // dsymutil-classic starts the reconstructed string table with 2 of these.
    491     // Reproduce that behavior for now (there is corresponding code in
    492     // transferSymbol).
    493     Writer.WriteZeros(1);
    494     typedef NonRelocatableStringpool::MapTy MapTy;
    495     for (auto *Entry = NewStrings.getFirstEntry(); Entry;
    496          Entry = static_cast<MapTy::MapEntryTy *>(Entry->getValue().second))
    497       Writer.writeBytes(
    498           StringRef(Entry->getKey().data(), Entry->getKey().size() + 1));
    499   }
    500 
    501   assert(OutFile.tell() == StringStart + NewStringsSize);
    502 
    503   // Pad till the Dwarf segment start.
    504   Writer.WriteZeros(DwarfSegmentStart - (StringStart + NewStringsSize));
    505   assert(OutFile.tell() == DwarfSegmentStart);
    506 
    507   // Emit the Dwarf sections contents.
    508   for (const MCSection &Sec : MCAsm) {
    509     if (Sec.begin() == Sec.end())
    510       continue;
    511 
    512     uint64_t Pos = OutFile.tell();
    513     Writer.WriteZeros(alignTo(Pos, Sec.getAlignment()) - Pos);
    514     MCAsm.writeSectionData(&Sec, Layout);
    515   }
    516 
    517   return true;
    518 }
    519 }
    520 }
    521 }
    522