1 //===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Link Time Optimization library. This library is 11 // intended to be used by linker to optimize code at link time. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/LTO/LTOModule.h" 16 #include "llvm/ADT/Triple.h" 17 #include "llvm/Bitcode/ReaderWriter.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/LLVMContext.h" 20 #include "llvm/IR/Metadata.h" 21 #include "llvm/IR/Module.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCParser/MCAsmParser.h" 26 #include "llvm/MC/MCSection.h" 27 #include "llvm/MC/MCSubtargetInfo.h" 28 #include "llvm/MC/MCSymbol.h" 29 #include "llvm/MC/MCTargetAsmParser.h" 30 #include "llvm/MC/SubtargetFeature.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/Host.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Support/Path.h" 36 #include "llvm/Support/SourceMgr.h" 37 #include "llvm/Support/TargetRegistry.h" 38 #include "llvm/Support/TargetSelect.h" 39 #include "llvm/Target/TargetLowering.h" 40 #include "llvm/Target/TargetLoweringObjectFile.h" 41 #include "llvm/Target/TargetRegisterInfo.h" 42 #include "llvm/Transforms/Utils/GlobalStatus.h" 43 #include <system_error> 44 using namespace llvm; 45 46 LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj, 47 llvm::TargetMachine *TM) 48 : IRFile(std::move(Obj)), _target(TM) {} 49 50 /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM 51 /// bitcode. 52 bool LTOModule::isBitcodeFile(const void *mem, size_t length) { 53 return sys::fs::identify_magic(StringRef((const char *)mem, length)) == 54 sys::fs::file_magic::bitcode; 55 } 56 57 bool LTOModule::isBitcodeFile(const char *path) { 58 sys::fs::file_magic type; 59 if (sys::fs::identify_magic(path, type)) 60 return false; 61 return type == sys::fs::file_magic::bitcode; 62 } 63 64 bool LTOModule::isBitcodeForTarget(MemoryBuffer *buffer, 65 StringRef triplePrefix) { 66 std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext()); 67 return StringRef(Triple).startswith(triplePrefix); 68 } 69 70 LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options, 71 std::string &errMsg) { 72 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 73 MemoryBuffer::getFile(path); 74 if (std::error_code EC = BufferOrErr.getError()) { 75 errMsg = EC.message(); 76 return nullptr; 77 } 78 return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg); 79 } 80 81 LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size, 82 TargetOptions options, 83 std::string &errMsg) { 84 return createFromOpenFileSlice(fd, path, size, 0, options, errMsg); 85 } 86 87 LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path, 88 size_t map_size, off_t offset, 89 TargetOptions options, 90 std::string &errMsg) { 91 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 92 MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset); 93 if (std::error_code EC = BufferOrErr.getError()) { 94 errMsg = EC.message(); 95 return nullptr; 96 } 97 return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg); 98 } 99 100 LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length, 101 TargetOptions options, 102 std::string &errMsg, StringRef path) { 103 std::unique_ptr<MemoryBuffer> buffer(makeBuffer(mem, length, path)); 104 if (!buffer) 105 return nullptr; 106 return makeLTOModule(std::move(buffer), options, errMsg); 107 } 108 109 LTOModule *LTOModule::makeLTOModule(std::unique_ptr<MemoryBuffer> Buffer, 110 TargetOptions options, 111 std::string &errMsg) { 112 ErrorOr<Module *> MOrErr = 113 getLazyBitcodeModule(Buffer.get(), getGlobalContext()); 114 if (std::error_code EC = MOrErr.getError()) { 115 errMsg = EC.message(); 116 return nullptr; 117 } 118 std::unique_ptr<Module> M(MOrErr.get()); 119 120 std::string TripleStr = M->getTargetTriple(); 121 if (TripleStr.empty()) 122 TripleStr = sys::getDefaultTargetTriple(); 123 llvm::Triple Triple(TripleStr); 124 125 // find machine architecture for this module 126 const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); 127 if (!march) 128 return nullptr; 129 130 // construct LTOModule, hand over ownership of module and target 131 SubtargetFeatures Features; 132 Features.getDefaultSubtargetFeatures(Triple); 133 std::string FeatureStr = Features.getString(); 134 // Set a default CPU for Darwin triples. 135 std::string CPU; 136 if (Triple.isOSDarwin()) { 137 if (Triple.getArch() == llvm::Triple::x86_64) 138 CPU = "core2"; 139 else if (Triple.getArch() == llvm::Triple::x86) 140 CPU = "yonah"; 141 else if (Triple.getArch() == llvm::Triple::arm64 || 142 Triple.getArch() == llvm::Triple::aarch64) 143 CPU = "cyclone"; 144 } 145 146 TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr, 147 options); 148 M->materializeAllPermanently(true); 149 M->setDataLayout(target->getDataLayout()); 150 151 std::unique_ptr<object::IRObjectFile> IRObj( 152 new object::IRObjectFile(std::move(Buffer), std::move(M))); 153 154 LTOModule *Ret = new LTOModule(std::move(IRObj), target); 155 156 if (Ret->parseSymbols(errMsg)) { 157 delete Ret; 158 return nullptr; 159 } 160 161 Ret->parseMetadata(); 162 163 return Ret; 164 } 165 166 /// Create a MemoryBuffer from a memory range with an optional name. 167 MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length, 168 StringRef name) { 169 const char *startPtr = (const char*)mem; 170 return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false); 171 } 172 173 /// objcClassNameFromExpression - Get string that the data pointer points to. 174 bool 175 LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) { 176 if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) { 177 Constant *op = ce->getOperand(0); 178 if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) { 179 Constant *cn = gvn->getInitializer(); 180 if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) { 181 if (ca->isCString()) { 182 name = ".objc_class_name_" + ca->getAsCString().str(); 183 return true; 184 } 185 } 186 } 187 } 188 return false; 189 } 190 191 /// addObjCClass - Parse i386/ppc ObjC class data structure. 192 void LTOModule::addObjCClass(const GlobalVariable *clgv) { 193 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer()); 194 if (!c) return; 195 196 // second slot in __OBJC,__class is pointer to superclass name 197 std::string superclassName; 198 if (objcClassNameFromExpression(c->getOperand(1), superclassName)) { 199 NameAndAttributes info; 200 StringMap<NameAndAttributes>::value_type &entry = 201 _undefines.GetOrCreateValue(superclassName); 202 if (!entry.getValue().name) { 203 const char *symbolName = entry.getKey().data(); 204 info.name = symbolName; 205 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; 206 info.isFunction = false; 207 info.symbol = clgv; 208 entry.setValue(info); 209 } 210 } 211 212 // third slot in __OBJC,__class is pointer to class name 213 std::string className; 214 if (objcClassNameFromExpression(c->getOperand(2), className)) { 215 StringSet::value_type &entry = _defines.GetOrCreateValue(className); 216 entry.setValue(1); 217 218 NameAndAttributes info; 219 info.name = entry.getKey().data(); 220 info.attributes = LTO_SYMBOL_PERMISSIONS_DATA | 221 LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT; 222 info.isFunction = false; 223 info.symbol = clgv; 224 _symbols.push_back(info); 225 } 226 } 227 228 /// addObjCCategory - Parse i386/ppc ObjC category data structure. 229 void LTOModule::addObjCCategory(const GlobalVariable *clgv) { 230 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer()); 231 if (!c) return; 232 233 // second slot in __OBJC,__category is pointer to target class name 234 std::string targetclassName; 235 if (!objcClassNameFromExpression(c->getOperand(1), targetclassName)) 236 return; 237 238 NameAndAttributes info; 239 StringMap<NameAndAttributes>::value_type &entry = 240 _undefines.GetOrCreateValue(targetclassName); 241 242 if (entry.getValue().name) 243 return; 244 245 const char *symbolName = entry.getKey().data(); 246 info.name = symbolName; 247 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; 248 info.isFunction = false; 249 info.symbol = clgv; 250 entry.setValue(info); 251 } 252 253 /// addObjCClassRef - Parse i386/ppc ObjC class list data structure. 254 void LTOModule::addObjCClassRef(const GlobalVariable *clgv) { 255 std::string targetclassName; 256 if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) 257 return; 258 259 NameAndAttributes info; 260 StringMap<NameAndAttributes>::value_type &entry = 261 _undefines.GetOrCreateValue(targetclassName); 262 if (entry.getValue().name) 263 return; 264 265 const char *symbolName = entry.getKey().data(); 266 info.name = symbolName; 267 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; 268 info.isFunction = false; 269 info.symbol = clgv; 270 entry.setValue(info); 271 } 272 273 void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) { 274 SmallString<64> Buffer; 275 { 276 raw_svector_ostream OS(Buffer); 277 Sym.printName(OS); 278 } 279 280 const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); 281 addDefinedDataSymbol(Buffer.c_str(), V); 282 } 283 284 void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) { 285 // Add to list of defined symbols. 286 addDefinedSymbol(Name, v, false); 287 288 if (!v->hasSection() /* || !isTargetDarwin */) 289 return; 290 291 // Special case i386/ppc ObjC data structures in magic sections: 292 // The issue is that the old ObjC object format did some strange 293 // contortions to avoid real linker symbols. For instance, the 294 // ObjC class data structure is allocated statically in the executable 295 // that defines that class. That data structures contains a pointer to 296 // its superclass. But instead of just initializing that part of the 297 // struct to the address of its superclass, and letting the static and 298 // dynamic linkers do the rest, the runtime works by having that field 299 // instead point to a C-string that is the name of the superclass. 300 // At runtime the objc initialization updates that pointer and sets 301 // it to point to the actual super class. As far as the linker 302 // knows it is just a pointer to a string. But then someone wanted the 303 // linker to issue errors at build time if the superclass was not found. 304 // So they figured out a way in mach-o object format to use an absolute 305 // symbols (.objc_class_name_Foo = 0) and a floating reference 306 // (.reference .objc_class_name_Bar) to cause the linker into erroring when 307 // a class was missing. 308 // The following synthesizes the implicit .objc_* symbols for the linker 309 // from the ObjC data structures generated by the front end. 310 311 // special case if this data blob is an ObjC class definition 312 std::string Section = v->getSection(); 313 if (Section.compare(0, 15, "__OBJC,__class,") == 0) { 314 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { 315 addObjCClass(gv); 316 } 317 } 318 319 // special case if this data blob is an ObjC category definition 320 else if (Section.compare(0, 18, "__OBJC,__category,") == 0) { 321 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { 322 addObjCCategory(gv); 323 } 324 } 325 326 // special case if this data blob is the list of referenced classes 327 else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) { 328 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { 329 addObjCClassRef(gv); 330 } 331 } 332 } 333 334 void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) { 335 SmallString<64> Buffer; 336 { 337 raw_svector_ostream OS(Buffer); 338 Sym.printName(OS); 339 } 340 341 const Function *F = 342 cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl())); 343 addDefinedFunctionSymbol(Buffer.c_str(), F); 344 } 345 346 void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) { 347 // add to list of defined symbols 348 addDefinedSymbol(Name, F, true); 349 } 350 351 static bool canBeHidden(const GlobalValue *GV) { 352 // FIXME: this is duplicated with another static function in AsmPrinter.cpp 353 GlobalValue::LinkageTypes L = GV->getLinkage(); 354 355 if (L != GlobalValue::LinkOnceODRLinkage) 356 return false; 357 358 if (GV->hasUnnamedAddr()) 359 return true; 360 361 // If it is a non constant variable, it needs to be uniqued across shared 362 // objects. 363 if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) { 364 if (!Var->isConstant()) 365 return false; 366 } 367 368 GlobalStatus GS; 369 if (GlobalStatus::analyzeGlobal(GV, GS)) 370 return false; 371 372 return !GS.IsCompared; 373 } 374 375 void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def, 376 bool isFunction) { 377 // set alignment part log2() can have rounding errors 378 uint32_t align = def->getAlignment(); 379 uint32_t attr = align ? countTrailingZeros(align) : 0; 380 381 // set permissions part 382 if (isFunction) { 383 attr |= LTO_SYMBOL_PERMISSIONS_CODE; 384 } else { 385 const GlobalVariable *gv = dyn_cast<GlobalVariable>(def); 386 if (gv && gv->isConstant()) 387 attr |= LTO_SYMBOL_PERMISSIONS_RODATA; 388 else 389 attr |= LTO_SYMBOL_PERMISSIONS_DATA; 390 } 391 392 // set definition part 393 if (def->hasWeakLinkage() || def->hasLinkOnceLinkage()) 394 attr |= LTO_SYMBOL_DEFINITION_WEAK; 395 else if (def->hasCommonLinkage()) 396 attr |= LTO_SYMBOL_DEFINITION_TENTATIVE; 397 else 398 attr |= LTO_SYMBOL_DEFINITION_REGULAR; 399 400 // set scope part 401 if (def->hasLocalLinkage()) 402 // Ignore visibility if linkage is local. 403 attr |= LTO_SYMBOL_SCOPE_INTERNAL; 404 else if (def->hasHiddenVisibility()) 405 attr |= LTO_SYMBOL_SCOPE_HIDDEN; 406 else if (def->hasProtectedVisibility()) 407 attr |= LTO_SYMBOL_SCOPE_PROTECTED; 408 else if (canBeHidden(def)) 409 attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN; 410 else 411 attr |= LTO_SYMBOL_SCOPE_DEFAULT; 412 413 StringSet::value_type &entry = _defines.GetOrCreateValue(Name); 414 entry.setValue(1); 415 416 // fill information structure 417 NameAndAttributes info; 418 StringRef NameRef = entry.getKey(); 419 info.name = NameRef.data(); 420 assert(info.name[NameRef.size()] == '\0'); 421 info.attributes = attr; 422 info.isFunction = isFunction; 423 info.symbol = def; 424 425 // add to table of symbols 426 _symbols.push_back(info); 427 } 428 429 /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the 430 /// defined list. 431 void LTOModule::addAsmGlobalSymbol(const char *name, 432 lto_symbol_attributes scope) { 433 StringSet::value_type &entry = _defines.GetOrCreateValue(name); 434 435 // only add new define if not already defined 436 if (entry.getValue()) 437 return; 438 439 entry.setValue(1); 440 441 NameAndAttributes &info = _undefines[entry.getKey().data()]; 442 443 if (info.symbol == nullptr) { 444 // FIXME: This is trying to take care of module ASM like this: 445 // 446 // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0" 447 // 448 // but is gross and its mother dresses it funny. Have the ASM parser give us 449 // more details for this type of situation so that we're not guessing so 450 // much. 451 452 // fill information structure 453 info.name = entry.getKey().data(); 454 info.attributes = 455 LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope; 456 info.isFunction = false; 457 info.symbol = nullptr; 458 459 // add to table of symbols 460 _symbols.push_back(info); 461 return; 462 } 463 464 if (info.isFunction) 465 addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol)); 466 else 467 addDefinedDataSymbol(info.name, info.symbol); 468 469 _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK; 470 _symbols.back().attributes |= scope; 471 } 472 473 /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the 474 /// undefined list. 475 void LTOModule::addAsmGlobalSymbolUndef(const char *name) { 476 StringMap<NameAndAttributes>::value_type &entry = 477 _undefines.GetOrCreateValue(name); 478 479 _asm_undefines.push_back(entry.getKey().data()); 480 481 // we already have the symbol 482 if (entry.getValue().name) 483 return; 484 485 uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED; 486 attr |= LTO_SYMBOL_SCOPE_DEFAULT; 487 NameAndAttributes info; 488 info.name = entry.getKey().data(); 489 info.attributes = attr; 490 info.isFunction = false; 491 info.symbol = nullptr; 492 493 entry.setValue(info); 494 } 495 496 /// Add a symbol which isn't defined just yet to a list to be resolved later. 497 void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, 498 bool isFunc) { 499 SmallString<64> name; 500 { 501 raw_svector_ostream OS(name); 502 Sym.printName(OS); 503 } 504 505 StringMap<NameAndAttributes>::value_type &entry = 506 _undefines.GetOrCreateValue(name); 507 508 // we already have the symbol 509 if (entry.getValue().name) 510 return; 511 512 NameAndAttributes info; 513 514 info.name = entry.getKey().data(); 515 516 const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); 517 518 if (decl->hasExternalWeakLinkage()) 519 info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF; 520 else 521 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; 522 523 info.isFunction = isFunc; 524 info.symbol = decl; 525 526 entry.setValue(info); 527 } 528 529 /// parseSymbols - Parse the symbols from the module and model-level ASM and add 530 /// them to either the defined or undefined lists. 531 bool LTOModule::parseSymbols(std::string &errMsg) { 532 for (auto &Sym : IRFile->symbols()) { 533 const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); 534 uint32_t Flags = Sym.getFlags(); 535 if (Flags & object::BasicSymbolRef::SF_FormatSpecific) 536 continue; 537 538 bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined; 539 540 if (!GV) { 541 SmallString<64> Buffer; 542 { 543 raw_svector_ostream OS(Buffer); 544 Sym.printName(OS); 545 } 546 const char *Name = Buffer.c_str(); 547 548 if (IsUndefined) 549 addAsmGlobalSymbolUndef(Name); 550 else if (Flags & object::BasicSymbolRef::SF_Global) 551 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT); 552 else 553 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL); 554 continue; 555 } 556 557 auto *F = dyn_cast<Function>(GV); 558 if (IsUndefined) { 559 addPotentialUndefinedSymbol(Sym, F != nullptr); 560 continue; 561 } 562 563 if (F) { 564 addDefinedFunctionSymbol(Sym); 565 continue; 566 } 567 568 if (isa<GlobalVariable>(GV)) { 569 addDefinedDataSymbol(Sym); 570 continue; 571 } 572 573 assert(isa<GlobalAlias>(GV)); 574 addDefinedDataSymbol(Sym); 575 } 576 577 // make symbols for all undefines 578 for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(), 579 e = _undefines.end(); u != e; ++u) { 580 // If this symbol also has a definition, then don't make an undefine because 581 // it is a tentative definition. 582 if (_defines.count(u->getKey())) continue; 583 NameAndAttributes info = u->getValue(); 584 _symbols.push_back(info); 585 } 586 587 return false; 588 } 589 590 /// parseMetadata - Parse metadata from the module 591 void LTOModule::parseMetadata() { 592 // Linker Options 593 if (Value *Val = getModule().getModuleFlag("Linker Options")) { 594 MDNode *LinkerOptions = cast<MDNode>(Val); 595 for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { 596 MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); 597 for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { 598 MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); 599 StringRef Op = _linkeropt_strings. 600 GetOrCreateValue(MDOption->getString()).getKey(); 601 StringRef DepLibName = _target->getTargetLowering()-> 602 getObjFileLowering().getDepLibFromLinkerOpt(Op); 603 if (!DepLibName.empty()) 604 _deplibs.push_back(DepLibName.data()); 605 else if (!Op.empty()) 606 _linkeropts.push_back(Op.data()); 607 } 608 } 609 } 610 611 // Add other interesting metadata here. 612 } 613