1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "Error.h" 11 #include "obj2yaml.h" 12 #include "llvm/Object/MachOUniversal.h" 13 #include "llvm/ObjectYAML/ObjectYAML.h" 14 #include "llvm/Support/ErrorHandling.h" 15 #include "llvm/Support/LEB128.h" 16 17 #include <string.h> // for memcpy 18 19 using namespace llvm; 20 21 class MachODumper { 22 23 template <typename StructType> 24 const char *processLoadCommandData( 25 MachOYAML::LoadCommand &LC, 26 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd); 27 28 const object::MachOObjectFile &Obj; 29 void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y); 30 void dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y); 31 void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y); 32 void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y); 33 void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes, 34 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false); 35 void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y); 36 void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y); 37 38 public: 39 MachODumper(const object::MachOObjectFile &O) : Obj(O) {} 40 Expected<std::unique_ptr<MachOYAML::Object>> dump(); 41 }; 42 43 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 44 case MachO::LCName: \ 45 memcpy((void *) & (LC.Data.LCStruct##_data), LoadCmd.Ptr, \ 46 sizeof(MachO::LCStruct)); \ 47 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \ 48 MachO::swapStruct(LC.Data.LCStruct##_data); \ 49 EndPtr = processLoadCommandData<MachO::LCStruct>(LC, LoadCmd); \ 50 break; 51 52 template <typename SectionType> 53 MachOYAML::Section constructSectionCommon(SectionType Sec) { 54 MachOYAML::Section TempSec; 55 memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16); 56 memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16); 57 TempSec.addr = Sec.addr; 58 TempSec.size = Sec.size; 59 TempSec.offset = Sec.offset; 60 TempSec.align = Sec.align; 61 TempSec.reloff = Sec.reloff; 62 TempSec.nreloc = Sec.nreloc; 63 TempSec.flags = Sec.flags; 64 TempSec.reserved1 = Sec.reserved1; 65 TempSec.reserved2 = Sec.reserved2; 66 TempSec.reserved3 = 0; 67 return TempSec; 68 } 69 70 template <typename SectionType> 71 MachOYAML::Section constructSection(SectionType Sec); 72 73 template <> MachOYAML::Section constructSection(MachO::section Sec) { 74 MachOYAML::Section TempSec = constructSectionCommon(Sec); 75 TempSec.reserved3 = 0; 76 return TempSec; 77 } 78 79 template <> MachOYAML::Section constructSection(MachO::section_64 Sec) { 80 MachOYAML::Section TempSec = constructSectionCommon(Sec); 81 TempSec.reserved3 = Sec.reserved3; 82 return TempSec; 83 } 84 85 template <typename SectionType, typename SegmentType> 86 const char * 87 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 88 std::vector<MachOYAML::Section> &Sections, 89 bool IsLittleEndian) { 90 auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; 91 const SectionType *Curr = 92 reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); 93 for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { 94 if (IsLittleEndian != sys::IsLittleEndianHost) { 95 SectionType Sec; 96 memcpy((void *)&Sec, Curr, sizeof(SectionType)); 97 MachO::swapStruct(Sec); 98 Sections.push_back(constructSection(Sec)); 99 } else { 100 Sections.push_back(constructSection(*Curr)); 101 } 102 } 103 return reinterpret_cast<const char *>(Curr); 104 } 105 106 template <typename StructType> 107 const char *MachODumper::processLoadCommandData( 108 MachOYAML::LoadCommand &LC, 109 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 110 return LoadCmd.Ptr + sizeof(StructType); 111 } 112 113 template <> 114 const char *MachODumper::processLoadCommandData<MachO::segment_command>( 115 MachOYAML::LoadCommand &LC, 116 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 117 return extractSections<MachO::section, MachO::segment_command>( 118 LoadCmd, LC.Sections, Obj.isLittleEndian()); 119 } 120 121 template <> 122 const char *MachODumper::processLoadCommandData<MachO::segment_command_64>( 123 MachOYAML::LoadCommand &LC, 124 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 125 return extractSections<MachO::section_64, MachO::segment_command_64>( 126 LoadCmd, LC.Sections, Obj.isLittleEndian()); 127 } 128 129 template <typename StructType> 130 const char * 131 readString(MachOYAML::LoadCommand &LC, 132 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 133 auto Start = LoadCmd.Ptr + sizeof(StructType); 134 auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType); 135 auto Size = strnlen(Start, MaxSize); 136 LC.PayloadString = StringRef(Start, Size).str(); 137 return Start + Size; 138 } 139 140 template <> 141 const char *MachODumper::processLoadCommandData<MachO::dylib_command>( 142 MachOYAML::LoadCommand &LC, 143 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 144 return readString<MachO::dylib_command>(LC, LoadCmd); 145 } 146 147 template <> 148 const char *MachODumper::processLoadCommandData<MachO::dylinker_command>( 149 MachOYAML::LoadCommand &LC, 150 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 151 return readString<MachO::dylinker_command>(LC, LoadCmd); 152 } 153 154 template <> 155 const char *MachODumper::processLoadCommandData<MachO::rpath_command>( 156 MachOYAML::LoadCommand &LC, 157 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 158 return readString<MachO::rpath_command>(LC, LoadCmd); 159 } 160 161 Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() { 162 auto Y = make_unique<MachOYAML::Object>(); 163 dumpHeader(Y); 164 dumpLoadCommands(Y); 165 dumpLinkEdit(Y); 166 return std::move(Y); 167 } 168 169 void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) { 170 Y->Header.magic = Obj.getHeader().magic; 171 Y->Header.cputype = Obj.getHeader().cputype; 172 Y->Header.cpusubtype = Obj.getHeader().cpusubtype; 173 Y->Header.filetype = Obj.getHeader().filetype; 174 Y->Header.ncmds = Obj.getHeader().ncmds; 175 Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds; 176 Y->Header.flags = Obj.getHeader().flags; 177 Y->Header.reserved = 0; 178 } 179 180 void MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) { 181 for (auto LoadCmd : Obj.load_commands()) { 182 MachOYAML::LoadCommand LC; 183 const char *EndPtr = LoadCmd.Ptr; 184 switch (LoadCmd.C.cmd) { 185 default: 186 memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr, 187 sizeof(MachO::load_command)); 188 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) 189 MachO::swapStruct(LC.Data.load_command_data); 190 EndPtr = processLoadCommandData<MachO::load_command>(LC, LoadCmd); 191 break; 192 #include "llvm/Support/MachO.def" 193 } 194 auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr); 195 if (!std::all_of(EndPtr, &EndPtr[RemainingBytes], 196 [](const char C) { return C == 0; })) { 197 LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr, 198 &EndPtr[RemainingBytes]); 199 RemainingBytes = 0; 200 } 201 LC.ZeroPadBytes = RemainingBytes; 202 Y->LoadCommands.push_back(std::move(LC)); 203 } 204 } 205 206 void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) { 207 dumpRebaseOpcodes(Y); 208 dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes()); 209 dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes, 210 Obj.getDyldInfoWeakBindOpcodes()); 211 dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(), 212 true); 213 dumpExportTrie(Y); 214 dumpSymbols(Y); 215 } 216 217 void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) { 218 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 219 220 auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes(); 221 for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end(); 222 ++OpCode) { 223 MachOYAML::RebaseOpcode RebaseOp; 224 RebaseOp.Opcode = 225 static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK); 226 RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK; 227 228 unsigned Count; 229 uint64_t ULEB = 0; 230 231 switch (RebaseOp.Opcode) { 232 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: 233 234 ULEB = decodeULEB128(OpCode + 1, &Count); 235 RebaseOp.ExtraData.push_back(ULEB); 236 OpCode += Count; 237 // Intentionally no break here -- This opcode has two ULEB values 238 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 239 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: 240 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: 241 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: 242 243 ULEB = decodeULEB128(OpCode + 1, &Count); 244 RebaseOp.ExtraData.push_back(ULEB); 245 OpCode += Count; 246 break; 247 default: 248 break; 249 } 250 251 LEData.RebaseOpcodes.push_back(RebaseOp); 252 253 if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE) 254 break; 255 } 256 } 257 258 StringRef ReadStringRef(const uint8_t *Start) { 259 const uint8_t *Itr = Start; 260 for (; *Itr; ++Itr) 261 ; 262 return StringRef(reinterpret_cast<const char *>(Start), Itr - Start); 263 } 264 265 void MachODumper::dumpBindOpcodes( 266 std::vector<MachOYAML::BindOpcode> &BindOpcodes, 267 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) { 268 for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end(); 269 ++OpCode) { 270 MachOYAML::BindOpcode BindOp; 271 BindOp.Opcode = 272 static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK); 273 BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK; 274 275 unsigned Count; 276 uint64_t ULEB = 0; 277 int64_t SLEB = 0; 278 279 switch (BindOp.Opcode) { 280 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: 281 ULEB = decodeULEB128(OpCode + 1, &Count); 282 BindOp.ULEBExtraData.push_back(ULEB); 283 OpCode += Count; 284 // Intentionally no break here -- this opcode has two ULEB values 285 286 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: 287 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 288 case MachO::BIND_OPCODE_ADD_ADDR_ULEB: 289 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: 290 ULEB = decodeULEB128(OpCode + 1, &Count); 291 BindOp.ULEBExtraData.push_back(ULEB); 292 OpCode += Count; 293 break; 294 295 case MachO::BIND_OPCODE_SET_ADDEND_SLEB: 296 SLEB = decodeSLEB128(OpCode + 1, &Count); 297 BindOp.SLEBExtraData.push_back(SLEB); 298 OpCode += Count; 299 break; 300 301 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: 302 BindOp.Symbol = ReadStringRef(OpCode + 1); 303 OpCode += BindOp.Symbol.size() + 1; 304 break; 305 default: 306 break; 307 } 308 309 BindOpcodes.push_back(BindOp); 310 311 // Lazy bindings have DONE opcodes between operations, so we need to keep 312 // processing after a DONE. 313 if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE) 314 break; 315 } 316 } 317 318 /*! 319 * /brief processes a node from the export trie, and its children. 320 * 321 * To my knowledge there is no documentation of the encoded format of this data 322 * other than in the heads of the Apple linker engineers. To that end hopefully 323 * this comment and the implementation below can serve to light the way for 324 * anyone crazy enough to come down this path in the future. 325 * 326 * This function reads and preserves the trie structure of the export trie. To 327 * my knowledge there is no code anywhere else that reads the data and preserves 328 * the Trie. LD64 (sources available at opensource.apple.com) has a similar 329 * implementation that parses the export trie into a vector. That code as well 330 * as LLVM's libObject MachO implementation were the basis for this. 331 * 332 * The export trie is an encoded trie. The node serialization is a bit awkward. 333 * The below pseudo-code is the best description I've come up with for it. 334 * 335 * struct SerializedNode { 336 * ULEB128 TerminalSize; 337 * struct TerminalData { <-- This is only present if TerminalSize > 0 338 * ULEB128 Flags; 339 * ULEB128 Address; <-- Present if (! Flags & REEXPORT ) 340 * ULEB128 Other; <-- Present if ( Flags & REEXPORT || 341 * Flags & STUB_AND_RESOLVER ) 342 * char[] ImportName; <-- Present if ( Flags & REEXPORT ) 343 * } 344 * uint8_t ChildrenCount; 345 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount]; 346 * SerializedNode Children[ChildrenCount] 347 * } 348 * 349 * Terminal nodes are nodes that represent actual exports. They can appear 350 * anywhere in the tree other than at the root; they do not need to be leaf 351 * nodes. When reading the data out of the trie this routine reads it in-order, 352 * but it puts the child names and offsets directly into the child nodes. This 353 * results in looping over the children twice during serialization and 354 * de-serialization, but it makes the YAML representation more human readable. 355 * 356 * Below is an example of the graph from a "Hello World" executable: 357 * 358 * ------- 359 * | '' | 360 * ------- 361 * | 362 * ------- 363 * | '_' | 364 * ------- 365 * | 366 * |----------------------------------------| 367 * | | 368 * ------------------------ --------------------- 369 * | '_mh_execute_header' | | 'main' | 370 * | Flags: 0x00000000 | | Flags: 0x00000000 | 371 * | Addr: 0x00000000 | | Addr: 0x00001160 | 372 * ------------------------ --------------------- 373 * 374 * This graph represents the trie for the exports "__mh_execute_header" and 375 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are 376 * terminal. 377 */ 378 379 const uint8_t *processExportNode(const uint8_t *CurrPtr, 380 const uint8_t *const End, 381 MachOYAML::ExportEntry &Entry) { 382 if (CurrPtr >= End) 383 return CurrPtr; 384 unsigned Count = 0; 385 Entry.TerminalSize = decodeULEB128(CurrPtr, &Count); 386 CurrPtr += Count; 387 if (Entry.TerminalSize != 0) { 388 Entry.Flags = decodeULEB128(CurrPtr, &Count); 389 CurrPtr += Count; 390 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { 391 Entry.Address = 0; 392 Entry.Other = decodeULEB128(CurrPtr, &Count); 393 CurrPtr += Count; 394 Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr)); 395 } else { 396 Entry.Address = decodeULEB128(CurrPtr, &Count); 397 CurrPtr += Count; 398 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 399 Entry.Other = decodeULEB128(CurrPtr, &Count); 400 CurrPtr += Count; 401 } else 402 Entry.Other = 0; 403 } 404 } 405 uint8_t childrenCount = *CurrPtr++; 406 if (childrenCount == 0) 407 return CurrPtr; 408 409 Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount, 410 MachOYAML::ExportEntry()); 411 for (auto &Child : Entry.Children) { 412 Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr)); 413 CurrPtr += Child.Name.length() + 1; 414 Child.NodeOffset = decodeULEB128(CurrPtr, &Count); 415 CurrPtr += Count; 416 } 417 for (auto &Child : Entry.Children) { 418 CurrPtr = processExportNode(CurrPtr, End, Child); 419 } 420 return CurrPtr; 421 } 422 423 void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) { 424 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 425 auto ExportsTrie = Obj.getDyldInfoExportsTrie(); 426 processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie); 427 } 428 429 template <typename nlist_t> 430 MachOYAML::NListEntry constructNameList(const nlist_t &nlist) { 431 MachOYAML::NListEntry NL; 432 NL.n_strx = nlist.n_strx; 433 NL.n_type = nlist.n_type; 434 NL.n_sect = nlist.n_sect; 435 NL.n_desc = nlist.n_desc; 436 NL.n_value = nlist.n_value; 437 return NL; 438 } 439 440 void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) { 441 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 442 443 for (auto Symbol : Obj.symbols()) { 444 MachOYAML::NListEntry NLE = 445 Obj.is64Bit() ? constructNameList<MachO::nlist_64>( 446 *reinterpret_cast<const MachO::nlist_64 *>( 447 Symbol.getRawDataRefImpl().p)) 448 : constructNameList<MachO::nlist>( 449 *reinterpret_cast<const MachO::nlist *>( 450 Symbol.getRawDataRefImpl().p)); 451 LEData.NameList.push_back(NLE); 452 } 453 454 StringRef RemainingTable = Obj.getStringTableData(); 455 while (RemainingTable.size() > 0) { 456 auto SymbolPair = RemainingTable.split('\0'); 457 RemainingTable = SymbolPair.second; 458 if (SymbolPair.first.empty()) 459 break; 460 LEData.StringTable.push_back(SymbolPair.first); 461 } 462 } 463 464 Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) { 465 MachODumper Dumper(Obj); 466 Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump(); 467 if (!YAML) 468 return YAML.takeError(); 469 470 yaml::YamlObjectFile YAMLFile; 471 YAMLFile.MachO = std::move(YAML.get()); 472 473 yaml::Output Yout(Out); 474 Yout << YAMLFile; 475 return Error::success(); 476 } 477 478 Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) { 479 yaml::YamlObjectFile YAMLFile; 480 YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary()); 481 MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; 482 YAML.Header.magic = Obj.getMagic(); 483 YAML.Header.nfat_arch = Obj.getNumberOfObjects(); 484 485 for (auto Slice : Obj.objects()) { 486 MachOYAML::FatArch arch; 487 arch.cputype = Slice.getCPUType(); 488 arch.cpusubtype = Slice.getCPUSubType(); 489 arch.offset = Slice.getOffset(); 490 arch.size = Slice.getSize(); 491 arch.align = Slice.getAlign(); 492 arch.reserved = Slice.getReserved(); 493 YAML.FatArchs.push_back(arch); 494 495 auto SliceObj = Slice.getAsObjectFile(); 496 if (!SliceObj) 497 return SliceObj.takeError(); 498 499 MachODumper Dumper(*SliceObj.get()); 500 Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump(); 501 if (!YAMLObj) 502 return YAMLObj.takeError(); 503 YAML.Slices.push_back(*YAMLObj.get()); 504 } 505 506 yaml::Output Yout(Out); 507 Yout << YAML; 508 return Error::success(); 509 } 510 511 std::error_code macho2yaml(raw_ostream &Out, const object::Binary &Binary) { 512 if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary)) { 513 if (auto Err = macho2yaml(Out, *MachOObj)) { 514 return errorToErrorCode(std::move(Err)); 515 } 516 return obj2yaml_error::success; 517 } 518 519 if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary)) { 520 if (auto Err = macho2yaml(Out, *MachOObj)) { 521 return errorToErrorCode(std::move(Err)); 522 } 523 return obj2yaml_error::success; 524 } 525 526 return obj2yaml_error::unsupported_obj_file_format; 527 } 528