1 //===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Implementation of the MC-JIT runtime dynamic linker. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "dyld" 15 #include "llvm/ADT/OwningPtr.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "RuntimeDyldImpl.h" 19 using namespace llvm; 20 using namespace llvm::object; 21 22 namespace llvm { 23 24 bool RuntimeDyldMachO:: 25 resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, 26 unsigned Type, unsigned Size) { 27 // This just dispatches to the proper target specific routine. 28 switch (CPUType) { 29 default: assert(0 && "Unsupported CPU type!"); 30 case mach::CTM_x86_64: 31 return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value, 32 isPCRel, Type, Size); 33 case mach::CTM_ARM: 34 return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value, 35 isPCRel, Type, Size); 36 } 37 llvm_unreachable(""); 38 } 39 40 bool RuntimeDyldMachO:: 41 resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, 42 bool isPCRel, unsigned Type, 43 unsigned Size) { 44 // If the relocation is PC-relative, the value to be encoded is the 45 // pointer difference. 46 if (isPCRel) 47 // FIXME: It seems this value needs to be adjusted by 4 for an effective PC 48 // address. Is that expected? Only for branches, perhaps? 49 Value -= Address + 4; 50 51 switch(Type) { 52 default: 53 llvm_unreachable("Invalid relocation type!"); 54 case macho::RIT_X86_64_Unsigned: 55 case macho::RIT_X86_64_Branch: { 56 // Mask in the target value a byte at a time (we don't have an alignment 57 // guarantee for the target address, so this is safest). 58 uint8_t *p = (uint8_t*)Address; 59 for (unsigned i = 0; i < Size; ++i) { 60 *p++ = (uint8_t)Value; 61 Value >>= 8; 62 } 63 return false; 64 } 65 case macho::RIT_X86_64_Signed: 66 case macho::RIT_X86_64_GOTLoad: 67 case macho::RIT_X86_64_GOT: 68 case macho::RIT_X86_64_Subtractor: 69 case macho::RIT_X86_64_Signed1: 70 case macho::RIT_X86_64_Signed2: 71 case macho::RIT_X86_64_Signed4: 72 case macho::RIT_X86_64_TLV: 73 return Error("Relocation type not implemented yet!"); 74 } 75 return false; 76 } 77 78 bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value, 79 bool isPCRel, unsigned Type, 80 unsigned Size) { 81 // If the relocation is PC-relative, the value to be encoded is the 82 // pointer difference. 83 if (isPCRel) { 84 Value -= Address; 85 // ARM PCRel relocations have an effective-PC offset of two instructions 86 // (four bytes in Thumb mode, 8 bytes in ARM mode). 87 // FIXME: For now, assume ARM mode. 88 Value -= 8; 89 } 90 91 switch(Type) { 92 default: 93 llvm_unreachable("Invalid relocation type!"); 94 case macho::RIT_Vanilla: { 95 llvm_unreachable("Invalid relocation type!"); 96 // Mask in the target value a byte at a time (we don't have an alignment 97 // guarantee for the target address, so this is safest). 98 uint8_t *p = (uint8_t*)Address; 99 for (unsigned i = 0; i < Size; ++i) { 100 *p++ = (uint8_t)Value; 101 Value >>= 8; 102 } 103 break; 104 } 105 case macho::RIT_ARM_Branch24Bit: { 106 // Mask the value into the target address. We know instructions are 107 // 32-bit aligned, so we can do it all at once. 108 uint32_t *p = (uint32_t*)Address; 109 // The low two bits of the value are not encoded. 110 Value >>= 2; 111 // Mask the value to 24 bits. 112 Value &= 0xffffff; 113 // FIXME: If the destination is a Thumb function (and the instruction 114 // is a non-predicated BL instruction), we need to change it to a BLX 115 // instruction instead. 116 117 // Insert the value into the instruction. 118 *p = (*p & ~0xffffff) | Value; 119 break; 120 } 121 case macho::RIT_ARM_ThumbBranch22Bit: 122 case macho::RIT_ARM_ThumbBranch32Bit: 123 case macho::RIT_ARM_Half: 124 case macho::RIT_ARM_HalfDifference: 125 case macho::RIT_Pair: 126 case macho::RIT_Difference: 127 case macho::RIT_ARM_LocalDifference: 128 case macho::RIT_ARM_PreboundLazyPointer: 129 return Error("Relocation type not implemented yet!"); 130 } 131 return false; 132 } 133 134 bool RuntimeDyldMachO:: 135 loadSegment32(const MachOObject *Obj, 136 const MachOObject::LoadCommandInfo *SegmentLCI, 137 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { 138 InMemoryStruct<macho::SegmentLoadCommand> SegmentLC; 139 Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC); 140 if (!SegmentLC) 141 return Error("unable to load segment load command"); 142 143 for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { 144 InMemoryStruct<macho::Section> Sect; 145 Obj->ReadSection(*SegmentLCI, SectNum, Sect); 146 if (!Sect) 147 return Error("unable to load section: '" + Twine(SectNum) + "'"); 148 149 // FIXME: For the time being, we're only loading text segments. 150 if (Sect->Flags != 0x80000400) 151 continue; 152 153 // Address and names of symbols in the section. 154 typedef std::pair<uint64_t, StringRef> SymbolEntry; 155 SmallVector<SymbolEntry, 64> Symbols; 156 // Index of all the names, in this section or not. Used when we're 157 // dealing with relocation entries. 158 SmallVector<StringRef, 64> SymbolNames; 159 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { 160 InMemoryStruct<macho::SymbolTableEntry> STE; 161 Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); 162 if (!STE) 163 return Error("unable to read symbol: '" + Twine(i) + "'"); 164 if (STE->SectionIndex > SegmentLC->NumSections) 165 return Error("invalid section index for symbol: '" + Twine(i) + "'"); 166 // Get the symbol name. 167 StringRef Name = Obj->getStringAtIndex(STE->StringIndex); 168 SymbolNames.push_back(Name); 169 170 // Just skip symbols not defined in this section. 171 if ((unsigned)STE->SectionIndex - 1 != SectNum) 172 continue; 173 174 // FIXME: Check the symbol type and flags. 175 if (STE->Type != 0xF) // external, defined in this section. 176 continue; 177 // Flags == 0x8 marks a thumb function for ARM, which is fine as it 178 // doesn't require any special handling here. 179 if (STE->Flags != 0x0 && STE->Flags != 0x8) 180 continue; 181 182 // Remember the symbol. 183 Symbols.push_back(SymbolEntry(STE->Value, Name)); 184 185 DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << 186 (Sect->Address + STE->Value) << "\n"); 187 } 188 // Sort the symbols by address, just in case they didn't come in that way. 189 array_pod_sort(Symbols.begin(), Symbols.end()); 190 191 // If there weren't any functions (odd, but just in case...) 192 if (!Symbols.size()) 193 continue; 194 195 // Extract the function data. 196 uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset, 197 SegmentLC->FileSize).data(); 198 for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { 199 uint64_t StartOffset = Sect->Address + Symbols[i].first; 200 uint64_t EndOffset = Symbols[i + 1].first - 1; 201 DEBUG(dbgs() << "Extracting function: " << Symbols[i].second 202 << " from [" << StartOffset << ", " << EndOffset << "]\n"); 203 extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); 204 } 205 // The last symbol we do after since the end address is calculated 206 // differently because there is no next symbol to reference. 207 uint64_t StartOffset = Symbols[Symbols.size() - 1].first; 208 uint64_t EndOffset = Sect->Size - 1; 209 DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second 210 << " from [" << StartOffset << ", " << EndOffset << "]\n"); 211 extractFunction(Symbols[Symbols.size()-1].second, 212 Base + StartOffset, Base + EndOffset); 213 214 // Now extract the relocation information for each function and process it. 215 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { 216 InMemoryStruct<macho::RelocationEntry> RE; 217 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); 218 if (RE->Word0 & macho::RF_Scattered) 219 return Error("NOT YET IMPLEMENTED: scattered relocations."); 220 // Word0 of the relocation is the offset into the section where the 221 // relocation should be applied. We need to translate that into an 222 // offset into a function since that's our atom. 223 uint32_t Offset = RE->Word0; 224 // Look for the function containing the address. This is used for JIT 225 // code, so the number of functions in section is almost always going 226 // to be very small (usually just one), so until we have use cases 227 // where that's not true, just use a trivial linear search. 228 unsigned SymbolNum; 229 unsigned NumSymbols = Symbols.size(); 230 assert(NumSymbols > 0 && Symbols[0].first <= Offset && 231 "No symbol containing relocation!"); 232 for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) 233 if (Symbols[SymbolNum + 1].first > Offset) 234 break; 235 // Adjust the offset to be relative to the symbol. 236 Offset -= Symbols[SymbolNum].first; 237 // Get the name of the symbol containing the relocation. 238 StringRef TargetName = SymbolNames[SymbolNum]; 239 240 bool isExtern = (RE->Word1 >> 27) & 1; 241 // Figure out the source symbol of the relocation. If isExtern is true, 242 // this relocation references the symbol table, otherwise it references 243 // a section in the same object, numbered from 1 through NumSections 244 // (SectionBases is [0, NumSections-1]). 245 // FIXME: Some targets (ARM) use internal relocations even for 246 // externally visible symbols, if the definition is in the same 247 // file as the reference. We need to convert those back to by-name 248 // references. We can resolve the address based on the section 249 // offset and see if we have a symbol at that address. If we do, 250 // use that; otherwise, puke. 251 if (!isExtern) 252 return Error("Internal relocations not supported."); 253 uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value 254 StringRef SourceName = SymbolNames[SourceNum]; 255 256 // FIXME: Get the relocation addend from the target address. 257 258 // Now store the relocation information. Associate it with the source 259 // symbol. 260 Relocations[SourceName].push_back(RelocationEntry(TargetName, 261 Offset, 262 RE->Word1, 263 0 /*Addend*/)); 264 DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset 265 << " from '" << SourceName << "(Word1: " 266 << format("0x%x", RE->Word1) << ")\n"); 267 } 268 } 269 return false; 270 } 271 272 273 bool RuntimeDyldMachO:: 274 loadSegment64(const MachOObject *Obj, 275 const MachOObject::LoadCommandInfo *SegmentLCI, 276 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { 277 InMemoryStruct<macho::Segment64LoadCommand> Segment64LC; 278 Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC); 279 if (!Segment64LC) 280 return Error("unable to load segment load command"); 281 282 for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { 283 InMemoryStruct<macho::Section64> Sect; 284 Obj->ReadSection64(*SegmentLCI, SectNum, Sect); 285 if (!Sect) 286 return Error("unable to load section: '" + Twine(SectNum) + "'"); 287 288 // FIXME: For the time being, we're only loading text segments. 289 if (Sect->Flags != 0x80000400) 290 continue; 291 292 // Address and names of symbols in the section. 293 typedef std::pair<uint64_t, StringRef> SymbolEntry; 294 SmallVector<SymbolEntry, 64> Symbols; 295 // Index of all the names, in this section or not. Used when we're 296 // dealing with relocation entries. 297 SmallVector<StringRef, 64> SymbolNames; 298 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { 299 InMemoryStruct<macho::Symbol64TableEntry> STE; 300 Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); 301 if (!STE) 302 return Error("unable to read symbol: '" + Twine(i) + "'"); 303 if (STE->SectionIndex > Segment64LC->NumSections) 304 return Error("invalid section index for symbol: '" + Twine(i) + "'"); 305 // Get the symbol name. 306 StringRef Name = Obj->getStringAtIndex(STE->StringIndex); 307 SymbolNames.push_back(Name); 308 309 // Just skip symbols not defined in this section. 310 if ((unsigned)STE->SectionIndex - 1 != SectNum) 311 continue; 312 313 // FIXME: Check the symbol type and flags. 314 if (STE->Type != 0xF) // external, defined in this section. 315 continue; 316 if (STE->Flags != 0x0) 317 continue; 318 319 // Remember the symbol. 320 Symbols.push_back(SymbolEntry(STE->Value, Name)); 321 322 DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << 323 (Sect->Address + STE->Value) << "\n"); 324 } 325 // Sort the symbols by address, just in case they didn't come in that way. 326 array_pod_sort(Symbols.begin(), Symbols.end()); 327 328 // If there weren't any functions (odd, but just in case...) 329 if (!Symbols.size()) 330 continue; 331 332 // Extract the function data. 333 uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset, 334 Segment64LC->FileSize).data(); 335 for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { 336 uint64_t StartOffset = Sect->Address + Symbols[i].first; 337 uint64_t EndOffset = Symbols[i + 1].first - 1; 338 DEBUG(dbgs() << "Extracting function: " << Symbols[i].second 339 << " from [" << StartOffset << ", " << EndOffset << "]\n"); 340 extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); 341 } 342 // The last symbol we do after since the end address is calculated 343 // differently because there is no next symbol to reference. 344 uint64_t StartOffset = Symbols[Symbols.size() - 1].first; 345 uint64_t EndOffset = Sect->Size - 1; 346 DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second 347 << " from [" << StartOffset << ", " << EndOffset << "]\n"); 348 extractFunction(Symbols[Symbols.size()-1].second, 349 Base + StartOffset, Base + EndOffset); 350 351 // Now extract the relocation information for each function and process it. 352 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { 353 InMemoryStruct<macho::RelocationEntry> RE; 354 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); 355 if (RE->Word0 & macho::RF_Scattered) 356 return Error("NOT YET IMPLEMENTED: scattered relocations."); 357 // Word0 of the relocation is the offset into the section where the 358 // relocation should be applied. We need to translate that into an 359 // offset into a function since that's our atom. 360 uint32_t Offset = RE->Word0; 361 // Look for the function containing the address. This is used for JIT 362 // code, so the number of functions in section is almost always going 363 // to be very small (usually just one), so until we have use cases 364 // where that's not true, just use a trivial linear search. 365 unsigned SymbolNum; 366 unsigned NumSymbols = Symbols.size(); 367 assert(NumSymbols > 0 && Symbols[0].first <= Offset && 368 "No symbol containing relocation!"); 369 for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) 370 if (Symbols[SymbolNum + 1].first > Offset) 371 break; 372 // Adjust the offset to be relative to the symbol. 373 Offset -= Symbols[SymbolNum].first; 374 // Get the name of the symbol containing the relocation. 375 StringRef TargetName = SymbolNames[SymbolNum]; 376 377 bool isExtern = (RE->Word1 >> 27) & 1; 378 // Figure out the source symbol of the relocation. If isExtern is true, 379 // this relocation references the symbol table, otherwise it references 380 // a section in the same object, numbered from 1 through NumSections 381 // (SectionBases is [0, NumSections-1]). 382 if (!isExtern) 383 return Error("Internal relocations not supported."); 384 uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value 385 StringRef SourceName = SymbolNames[SourceNum]; 386 387 // FIXME: Get the relocation addend from the target address. 388 389 // Now store the relocation information. Associate it with the source 390 // symbol. 391 Relocations[SourceName].push_back(RelocationEntry(TargetName, 392 Offset, 393 RE->Word1, 394 0 /*Addend*/)); 395 DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset 396 << " from '" << SourceName << "(Word1: " 397 << format("0x%x", RE->Word1) << ")\n"); 398 } 399 } 400 return false; 401 } 402 403 bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) { 404 // If the linker is in an error state, don't do anything. 405 if (hasError()) 406 return true; 407 // Load the Mach-O wrapper object. 408 std::string ErrorStr; 409 OwningPtr<MachOObject> Obj( 410 MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr)); 411 if (!Obj) 412 return Error("unable to load object: '" + ErrorStr + "'"); 413 414 // Get the CPU type information from the header. 415 const macho::Header &Header = Obj->getHeader(); 416 417 // FIXME: Error checking that the loaded object is compatible with 418 // the system we're running on. 419 CPUType = Header.CPUType; 420 CPUSubtype = Header.CPUSubtype; 421 422 // Validate that the load commands match what we expect. 423 const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0, 424 *DysymtabLCI = 0; 425 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { 426 const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i); 427 switch (LCI.Command.Type) { 428 case macho::LCT_Segment: 429 case macho::LCT_Segment64: 430 if (SegmentLCI) 431 return Error("unexpected input object (multiple segments)"); 432 SegmentLCI = &LCI; 433 break; 434 case macho::LCT_Symtab: 435 if (SymtabLCI) 436 return Error("unexpected input object (multiple symbol tables)"); 437 SymtabLCI = &LCI; 438 break; 439 case macho::LCT_Dysymtab: 440 if (DysymtabLCI) 441 return Error("unexpected input object (multiple symbol tables)"); 442 DysymtabLCI = &LCI; 443 break; 444 default: 445 return Error("unexpected input object (unexpected load command"); 446 } 447 } 448 449 if (!SymtabLCI) 450 return Error("no symbol table found in object"); 451 if (!SegmentLCI) 452 return Error("no symbol table found in object"); 453 454 // Read and register the symbol table data. 455 InMemoryStruct<macho::SymtabLoadCommand> SymtabLC; 456 Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); 457 if (!SymtabLC) 458 return Error("unable to load symbol table load command"); 459 Obj->RegisterStringTable(*SymtabLC); 460 461 // Read the dynamic link-edit information, if present (not present in static 462 // objects). 463 if (DysymtabLCI) { 464 InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC; 465 Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC); 466 if (!DysymtabLC) 467 return Error("unable to load dynamic link-exit load command"); 468 469 // FIXME: We don't support anything interesting yet. 470 // if (DysymtabLC->LocalSymbolsIndex != 0) 471 // return Error("NOT YET IMPLEMENTED: local symbol entries"); 472 // if (DysymtabLC->ExternalSymbolsIndex != 0) 473 // return Error("NOT YET IMPLEMENTED: non-external symbol entries"); 474 // if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries) 475 // return Error("NOT YET IMPLEMENTED: undefined symbol entries"); 476 } 477 478 // Load the segment load command. 479 if (SegmentLCI->Command.Type == macho::LCT_Segment) { 480 if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC)) 481 return true; 482 } else { 483 if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC)) 484 return true; 485 } 486 487 return false; 488 } 489 490 // Assign an address to a symbol name and resolve all the relocations 491 // associated with it. 492 void RuntimeDyldMachO::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { 493 // Assign the address in our symbol table. 494 SymbolTable[Name] = Addr; 495 496 RelocationList &Relocs = Relocations[Name]; 497 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { 498 RelocationEntry &RE = Relocs[i]; 499 uint8_t *Target = SymbolTable[RE.Target] + RE.Offset; 500 bool isPCRel = (RE.Data >> 24) & 1; 501 unsigned Type = (RE.Data >> 28) & 0xf; 502 unsigned Size = 1 << ((RE.Data >> 25) & 3); 503 504 DEBUG(dbgs() << "Resolving relocation at '" << RE.Target 505 << "' + " << RE.Offset << " (" << format("%p", Target) << ")" 506 << " from '" << Name << " (" << format("%p", Addr) << ")" 507 << "(" << (isPCRel ? "pcrel" : "absolute") 508 << ", type: " << Type << ", Size: " << Size << ").\n"); 509 510 resolveRelocation(Target, Addr, isPCRel, Type, Size); 511 RE.isResolved = true; 512 } 513 } 514 515 bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) { 516 StringRef Magic = InputBuffer->getBuffer().slice(0, 4); 517 if (Magic == "\xFE\xED\xFA\xCE") return true; 518 if (Magic == "\xCE\xFA\xED\xFE") return true; 519 if (Magic == "\xFE\xED\xFA\xCF") return true; 520 if (Magic == "\xCF\xFA\xED\xFE") return true; 521 return false; 522 } 523 524 } // end namespace llvm 525