1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Implementation for LLVM symbolization library. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LLVMSymbolize.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/Config/config.h" 17 #include "llvm/Object/ELFObjectFile.h" 18 #include "llvm/Object/MachO.h" 19 #include "llvm/Support/Casting.h" 20 #include "llvm/Support/Compression.h" 21 #include "llvm/Support/DataExtractor.h" 22 #include "llvm/Support/Errc.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/Path.h" 26 #include <sstream> 27 #include <stdlib.h> 28 29 namespace llvm { 30 namespace symbolize { 31 32 static bool error(std::error_code ec) { 33 if (!ec) 34 return false; 35 errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n"; 36 return true; 37 } 38 39 static DILineInfoSpecifier 40 getDILineInfoSpecifier(const LLVMSymbolizer::Options &Opts) { 41 return DILineInfoSpecifier( 42 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, 43 Opts.PrintFunctions); 44 } 45 46 ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) 47 : Module(Obj), DebugInfoContext(DICtx) { 48 std::unique_ptr<DataExtractor> OpdExtractor; 49 uint64_t OpdAddress = 0; 50 // Find the .opd (function descriptor) section if any, for big-endian 51 // PowerPC64 ELF. 52 if (Module->getArch() == Triple::ppc64) { 53 for (section_iterator Section : Module->sections()) { 54 StringRef Name; 55 if (!error(Section->getName(Name)) && Name == ".opd") { 56 StringRef Data; 57 if (!error(Section->getContents(Data))) { 58 OpdExtractor.reset(new DataExtractor(Data, Module->isLittleEndian(), 59 Module->getBytesInAddress())); 60 OpdAddress = Section->getAddress(); 61 } 62 break; 63 } 64 } 65 } 66 for (const SymbolRef &Symbol : Module->symbols()) { 67 addSymbol(Symbol, OpdExtractor.get(), OpdAddress); 68 } 69 bool NoSymbolTable = (Module->symbol_begin() == Module->symbol_end()); 70 if (NoSymbolTable && Module->isELF()) { 71 // Fallback to dynamic symbol table, if regular symbol table is stripped. 72 std::pair<symbol_iterator, symbol_iterator> IDyn = 73 getELFDynamicSymbolIterators(Module); 74 for (symbol_iterator si = IDyn.first, se = IDyn.second; si != se; ++si) { 75 addSymbol(*si, OpdExtractor.get(), OpdAddress); 76 } 77 } 78 } 79 80 void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor, 81 uint64_t OpdAddress) { 82 SymbolRef::Type SymbolType; 83 if (error(Symbol.getType(SymbolType))) 84 return; 85 if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) 86 return; 87 uint64_t SymbolAddress; 88 if (error(Symbol.getAddress(SymbolAddress)) || 89 SymbolAddress == UnknownAddressOrSize) 90 return; 91 if (OpdExtractor) { 92 // For big-endian PowerPC64 ELF, symbols in the .opd section refer to 93 // function descriptors. The first word of the descriptor is a pointer to 94 // the function's code. 95 // For the purposes of symbolization, pretend the symbol's address is that 96 // of the function's code, not the descriptor. 97 uint64_t OpdOffset = SymbolAddress - OpdAddress; 98 uint32_t OpdOffset32 = OpdOffset; 99 if (OpdOffset == OpdOffset32 && 100 OpdExtractor->isValidOffsetForAddress(OpdOffset32)) 101 SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); 102 } 103 uint64_t SymbolSize; 104 // Getting symbol size is linear for Mach-O files, so assume that symbol 105 // occupies the memory range up to the following symbol. 106 if (isa<MachOObjectFile>(Module)) 107 SymbolSize = 0; 108 else if (error(Symbol.getSize(SymbolSize)) || 109 SymbolSize == UnknownAddressOrSize) 110 return; 111 StringRef SymbolName; 112 if (error(Symbol.getName(SymbolName))) 113 return; 114 // Mach-O symbol table names have leading underscore, skip it. 115 if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') 116 SymbolName = SymbolName.drop_front(); 117 // FIXME: If a function has alias, there are two entries in symbol table 118 // with same address size. Make sure we choose the correct one. 119 auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; 120 SymbolDesc SD = { SymbolAddress, SymbolSize }; 121 M.insert(std::make_pair(SD, SymbolName)); 122 } 123 124 bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, 125 std::string &Name, uint64_t &Addr, 126 uint64_t &Size) const { 127 const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects; 128 if (SymbolMap.empty()) 129 return false; 130 SymbolDesc SD = { Address, Address }; 131 auto SymbolIterator = SymbolMap.upper_bound(SD); 132 if (SymbolIterator == SymbolMap.begin()) 133 return false; 134 --SymbolIterator; 135 if (SymbolIterator->first.Size != 0 && 136 SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address) 137 return false; 138 Name = SymbolIterator->second.str(); 139 Addr = SymbolIterator->first.Addr; 140 Size = SymbolIterator->first.Size; 141 return true; 142 } 143 144 DILineInfo ModuleInfo::symbolizeCode( 145 uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { 146 DILineInfo LineInfo; 147 if (DebugInfoContext) { 148 LineInfo = DebugInfoContext->getLineInfoForAddress( 149 ModuleOffset, getDILineInfoSpecifier(Opts)); 150 } 151 // Override function name from symbol table if necessary. 152 if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { 153 std::string FunctionName; 154 uint64_t Start, Size; 155 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, 156 FunctionName, Start, Size)) { 157 LineInfo.FunctionName = FunctionName; 158 } 159 } 160 return LineInfo; 161 } 162 163 DIInliningInfo ModuleInfo::symbolizeInlinedCode( 164 uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { 165 DIInliningInfo InlinedContext; 166 if (DebugInfoContext) { 167 InlinedContext = DebugInfoContext->getInliningInfoForAddress( 168 ModuleOffset, getDILineInfoSpecifier(Opts)); 169 } 170 // Make sure there is at least one frame in context. 171 if (InlinedContext.getNumberOfFrames() == 0) { 172 InlinedContext.addFrame(DILineInfo()); 173 } 174 // Override the function name in lower frame with name from symbol table. 175 if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { 176 DIInliningInfo PatchedInlinedContext; 177 for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 178 DILineInfo LineInfo = InlinedContext.getFrame(i); 179 if (i == n - 1) { 180 std::string FunctionName; 181 uint64_t Start, Size; 182 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, 183 FunctionName, Start, Size)) { 184 LineInfo.FunctionName = FunctionName; 185 } 186 } 187 PatchedInlinedContext.addFrame(LineInfo); 188 } 189 InlinedContext = PatchedInlinedContext; 190 } 191 return InlinedContext; 192 } 193 194 bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name, 195 uint64_t &Start, uint64_t &Size) const { 196 return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start, 197 Size); 198 } 199 200 const char LLVMSymbolizer::kBadString[] = "??"; 201 202 std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 203 uint64_t ModuleOffset) { 204 ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); 205 if (!Info) 206 return printDILineInfo(DILineInfo()); 207 if (Opts.PrintInlining) { 208 DIInliningInfo InlinedContext = 209 Info->symbolizeInlinedCode(ModuleOffset, Opts); 210 uint32_t FramesNum = InlinedContext.getNumberOfFrames(); 211 assert(FramesNum > 0); 212 std::string Result; 213 for (uint32_t i = 0; i < FramesNum; i++) { 214 DILineInfo LineInfo = InlinedContext.getFrame(i); 215 Result += printDILineInfo(LineInfo); 216 } 217 return Result; 218 } 219 DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts); 220 return printDILineInfo(LineInfo); 221 } 222 223 std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 224 uint64_t ModuleOffset) { 225 std::string Name = kBadString; 226 uint64_t Start = 0; 227 uint64_t Size = 0; 228 if (Opts.UseSymbolTable) { 229 if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { 230 if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) 231 Name = DemangleName(Name); 232 } 233 } 234 std::stringstream ss; 235 ss << Name << "\n" << Start << " " << Size << "\n"; 236 return ss.str(); 237 } 238 239 void LLVMSymbolizer::flush() { 240 DeleteContainerSeconds(Modules); 241 ObjectPairForPathArch.clear(); 242 ObjectFileForArch.clear(); 243 } 244 245 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in 246 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 247 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 248 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 249 static 250 std::string getDarwinDWARFResourceForPath( 251 const std::string &Path, const std::string &Basename) { 252 SmallString<16> ResourceName = StringRef(Path); 253 if (sys::path::extension(Path) != ".dSYM") { 254 ResourceName += ".dSYM"; 255 } 256 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 257 sys::path::append(ResourceName, Basename); 258 return ResourceName.str(); 259 } 260 261 static bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 262 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 263 MemoryBuffer::getFileOrSTDIN(Path); 264 if (!MB) 265 return false; 266 return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer()); 267 } 268 269 static bool findDebugBinary(const std::string &OrigPath, 270 const std::string &DebuglinkName, uint32_t CRCHash, 271 std::string &Result) { 272 std::string OrigRealPath = OrigPath; 273 #if defined(HAVE_REALPATH) 274 if (char *RP = realpath(OrigPath.c_str(), nullptr)) { 275 OrigRealPath = RP; 276 free(RP); 277 } 278 #endif 279 SmallString<16> OrigDir(OrigRealPath); 280 llvm::sys::path::remove_filename(OrigDir); 281 SmallString<16> DebugPath = OrigDir; 282 // Try /path/to/original_binary/debuglink_name 283 llvm::sys::path::append(DebugPath, DebuglinkName); 284 if (checkFileCRC(DebugPath, CRCHash)) { 285 Result = DebugPath.str(); 286 return true; 287 } 288 // Try /path/to/original_binary/.debug/debuglink_name 289 DebugPath = OrigRealPath; 290 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 291 if (checkFileCRC(DebugPath, CRCHash)) { 292 Result = DebugPath.str(); 293 return true; 294 } 295 // Try /usr/lib/debug/path/to/original_binary/debuglink_name 296 DebugPath = "/usr/lib/debug"; 297 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 298 DebuglinkName); 299 if (checkFileCRC(DebugPath, CRCHash)) { 300 Result = DebugPath.str(); 301 return true; 302 } 303 return false; 304 } 305 306 static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 307 uint32_t &CRCHash) { 308 if (!Obj) 309 return false; 310 for (const SectionRef &Section : Obj->sections()) { 311 StringRef Name; 312 Section.getName(Name); 313 Name = Name.substr(Name.find_first_not_of("._")); 314 if (Name == "gnu_debuglink") { 315 StringRef Data; 316 Section.getContents(Data); 317 DataExtractor DE(Data, Obj->isLittleEndian(), 0); 318 uint32_t Offset = 0; 319 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 320 // 4-byte align the offset. 321 Offset = (Offset + 3) & ~0x3; 322 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 323 DebugName = DebugNameStr; 324 CRCHash = DE.getU32(&Offset); 325 return true; 326 } 327 } 328 break; 329 } 330 } 331 return false; 332 } 333 334 static 335 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 336 const MachOObjectFile *Obj) { 337 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 338 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 339 if (dbg_uuid.empty() || bin_uuid.empty()) 340 return false; 341 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 342 } 343 344 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 345 const MachOObjectFile *MachExeObj, const std::string &ArchName) { 346 // On Darwin we may find DWARF in separate object file in 347 // resource directory. 348 std::vector<std::string> DsymPaths; 349 StringRef Filename = sys::path::filename(ExePath); 350 DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); 351 for (const auto &Path : Opts.DsymHints) { 352 DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); 353 } 354 for (const auto &path : DsymPaths) { 355 ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(path); 356 std::error_code EC = BinaryOrErr.getError(); 357 if (EC != errc::no_such_file_or_directory && !error(EC)) { 358 OwningBinary<Binary> B = std::move(BinaryOrErr.get()); 359 ObjectFile *DbgObj = 360 getObjectFileFromBinary(B.getBinary(), ArchName); 361 const MachOObjectFile *MachDbgObj = 362 dyn_cast<const MachOObjectFile>(DbgObj); 363 if (!MachDbgObj) continue; 364 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) { 365 addOwningBinary(std::move(B)); 366 return DbgObj; 367 } 368 } 369 } 370 return nullptr; 371 } 372 373 LLVMSymbolizer::ObjectPair 374 LLVMSymbolizer::getOrCreateObjects(const std::string &Path, 375 const std::string &ArchName) { 376 const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 377 if (I != ObjectPairForPathArch.end()) 378 return I->second; 379 ObjectFile *Obj = nullptr; 380 ObjectFile *DbgObj = nullptr; 381 ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(Path); 382 if (!error(BinaryOrErr.getError())) { 383 OwningBinary<Binary> &B = BinaryOrErr.get(); 384 Obj = getObjectFileFromBinary(B.getBinary(), ArchName); 385 if (!Obj) { 386 ObjectPair Res = std::make_pair(nullptr, nullptr); 387 ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res; 388 return Res; 389 } 390 addOwningBinary(std::move(B)); 391 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 392 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 393 // Try to locate the debug binary using .gnu_debuglink section. 394 if (!DbgObj) { 395 std::string DebuglinkName; 396 uint32_t CRCHash; 397 std::string DebugBinaryPath; 398 if (getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash) && 399 findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) { 400 BinaryOrErr = createBinary(DebugBinaryPath); 401 if (!error(BinaryOrErr.getError())) { 402 OwningBinary<Binary> B = std::move(BinaryOrErr.get()); 403 DbgObj = getObjectFileFromBinary(B.getBinary(), ArchName); 404 addOwningBinary(std::move(B)); 405 } 406 } 407 } 408 } 409 if (!DbgObj) 410 DbgObj = Obj; 411 ObjectPair Res = std::make_pair(Obj, DbgObj); 412 ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res; 413 return Res; 414 } 415 416 ObjectFile * 417 LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, 418 const std::string &ArchName) { 419 if (!Bin) 420 return nullptr; 421 ObjectFile *Res = nullptr; 422 if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) { 423 const auto &I = ObjectFileForArch.find( 424 std::make_pair(UB, ArchName)); 425 if (I != ObjectFileForArch.end()) 426 return I->second; 427 ErrorOr<std::unique_ptr<ObjectFile>> ParsedObj = 428 UB->getObjectForArch(Triple(ArchName).getArch()); 429 if (ParsedObj) { 430 Res = ParsedObj.get().get(); 431 ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get())); 432 } 433 ObjectFileForArch[std::make_pair(UB, ArchName)] = Res; 434 } else if (Bin->isObject()) { 435 Res = cast<ObjectFile>(Bin); 436 } 437 return Res; 438 } 439 440 ModuleInfo * 441 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { 442 const auto &I = Modules.find(ModuleName); 443 if (I != Modules.end()) 444 return I->second; 445 std::string BinaryName = ModuleName; 446 std::string ArchName = Opts.DefaultArch; 447 size_t ColonPos = ModuleName.find_last_of(':'); 448 // Verify that substring after colon form a valid arch name. 449 if (ColonPos != std::string::npos) { 450 std::string ArchStr = ModuleName.substr(ColonPos + 1); 451 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 452 BinaryName = ModuleName.substr(0, ColonPos); 453 ArchName = ArchStr; 454 } 455 } 456 ObjectPair Objects = getOrCreateObjects(BinaryName, ArchName); 457 458 if (!Objects.first) { 459 // Failed to find valid object file. 460 Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr)); 461 return nullptr; 462 } 463 DIContext *Context = DIContext::getDWARFContext(*Objects.second); 464 assert(Context); 465 ModuleInfo *Info = new ModuleInfo(Objects.first, Context); 466 Modules.insert(make_pair(ModuleName, Info)); 467 return Info; 468 } 469 470 std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { 471 // By default, DILineInfo contains "<invalid>" for function/filename it 472 // cannot fetch. We replace it to "??" to make our output closer to addr2line. 473 static const std::string kDILineInfoBadString = "<invalid>"; 474 std::stringstream Result; 475 if (Opts.PrintFunctions != FunctionNameKind::None) { 476 std::string FunctionName = LineInfo.FunctionName; 477 if (FunctionName == kDILineInfoBadString) 478 FunctionName = kBadString; 479 else if (Opts.Demangle) 480 FunctionName = DemangleName(FunctionName); 481 Result << FunctionName << "\n"; 482 } 483 std::string Filename = LineInfo.FileName; 484 if (Filename == kDILineInfoBadString) 485 Filename = kBadString; 486 Result << Filename << ":" << LineInfo.Line << ":" << LineInfo.Column << "\n"; 487 return Result.str(); 488 } 489 490 #if !defined(_MSC_VER) 491 // Assume that __cxa_demangle is provided by libcxxabi (except for Windows). 492 extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, 493 size_t *length, int *status); 494 #endif 495 496 std::string LLVMSymbolizer::DemangleName(const std::string &Name) { 497 #if !defined(_MSC_VER) 498 // We can spoil names of symbols with C linkage, so use an heuristic 499 // approach to check if the name should be demangled. 500 if (Name.substr(0, 2) != "_Z") 501 return Name; 502 int status = 0; 503 char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); 504 if (status != 0) 505 return Name; 506 std::string Result = DemangledName; 507 free(DemangledName); 508 return Result; 509 #else 510 return Name; 511 #endif 512 } 513 514 } // namespace symbolize 515 } // namespace llvm 516