1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Implementation for LLVM symbolization library. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LLVMSymbolize.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/Config/config.h" 17 #include "llvm/Object/ELFObjectFile.h" 18 #include "llvm/Object/MachO.h" 19 #include "llvm/Support/Casting.h" 20 #include "llvm/Support/Compression.h" 21 #include "llvm/Support/DataExtractor.h" 22 #include "llvm/Support/Errc.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/Path.h" 26 #include <sstream> 27 #include <stdlib.h> 28 29 namespace llvm { 30 namespace symbolize { 31 32 static bool error(std::error_code ec) { 33 if (!ec) 34 return false; 35 errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n"; 36 return true; 37 } 38 39 static DILineInfoSpecifier 40 getDILineInfoSpecifier(const LLVMSymbolizer::Options &Opts) { 41 return DILineInfoSpecifier( 42 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, 43 Opts.PrintFunctions); 44 } 45 46 ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) 47 : Module(Obj), DebugInfoContext(DICtx) { 48 for (const SymbolRef &Symbol : Module->symbols()) { 49 addSymbol(Symbol); 50 } 51 bool NoSymbolTable = (Module->symbol_begin() == Module->symbol_end()); 52 if (NoSymbolTable && Module->isELF()) { 53 // Fallback to dynamic symbol table, if regular symbol table is stripped. 54 std::pair<symbol_iterator, symbol_iterator> IDyn = 55 getELFDynamicSymbolIterators(Module); 56 for (symbol_iterator si = IDyn.first, se = IDyn.second; si != se; ++si) { 57 addSymbol(*si); 58 } 59 } 60 } 61 62 void ModuleInfo::addSymbol(const SymbolRef &Symbol) { 63 SymbolRef::Type SymbolType; 64 if (error(Symbol.getType(SymbolType))) 65 return; 66 if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) 67 return; 68 uint64_t SymbolAddress; 69 if (error(Symbol.getAddress(SymbolAddress)) || 70 SymbolAddress == UnknownAddressOrSize) 71 return; 72 uint64_t SymbolSize; 73 // Getting symbol size is linear for Mach-O files, so assume that symbol 74 // occupies the memory range up to the following symbol. 75 if (isa<MachOObjectFile>(Module)) 76 SymbolSize = 0; 77 else if (error(Symbol.getSize(SymbolSize)) || 78 SymbolSize == UnknownAddressOrSize) 79 return; 80 StringRef SymbolName; 81 if (error(Symbol.getName(SymbolName))) 82 return; 83 // Mach-O symbol table names have leading underscore, skip it. 84 if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') 85 SymbolName = SymbolName.drop_front(); 86 // FIXME: If a function has alias, there are two entries in symbol table 87 // with same address size. Make sure we choose the correct one. 88 SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; 89 SymbolDesc SD = { SymbolAddress, SymbolSize }; 90 M.insert(std::make_pair(SD, SymbolName)); 91 } 92 93 bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, 94 std::string &Name, uint64_t &Addr, 95 uint64_t &Size) const { 96 const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects; 97 if (M.empty()) 98 return false; 99 SymbolDesc SD = { Address, Address }; 100 SymbolMapTy::const_iterator it = M.upper_bound(SD); 101 if (it == M.begin()) 102 return false; 103 --it; 104 if (it->first.Size != 0 && it->first.Addr + it->first.Size <= Address) 105 return false; 106 Name = it->second.str(); 107 Addr = it->first.Addr; 108 Size = it->first.Size; 109 return true; 110 } 111 112 DILineInfo ModuleInfo::symbolizeCode( 113 uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { 114 DILineInfo LineInfo; 115 if (DebugInfoContext) { 116 LineInfo = DebugInfoContext->getLineInfoForAddress( 117 ModuleOffset, getDILineInfoSpecifier(Opts)); 118 } 119 // Override function name from symbol table if necessary. 120 if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { 121 std::string FunctionName; 122 uint64_t Start, Size; 123 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, 124 FunctionName, Start, Size)) { 125 LineInfo.FunctionName = FunctionName; 126 } 127 } 128 return LineInfo; 129 } 130 131 DIInliningInfo ModuleInfo::symbolizeInlinedCode( 132 uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { 133 DIInliningInfo InlinedContext; 134 if (DebugInfoContext) { 135 InlinedContext = DebugInfoContext->getInliningInfoForAddress( 136 ModuleOffset, getDILineInfoSpecifier(Opts)); 137 } 138 // Make sure there is at least one frame in context. 139 if (InlinedContext.getNumberOfFrames() == 0) { 140 InlinedContext.addFrame(DILineInfo()); 141 } 142 // Override the function name in lower frame with name from symbol table. 143 if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { 144 DIInliningInfo PatchedInlinedContext; 145 for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 146 DILineInfo LineInfo = InlinedContext.getFrame(i); 147 if (i == n - 1) { 148 std::string FunctionName; 149 uint64_t Start, Size; 150 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, 151 FunctionName, Start, Size)) { 152 LineInfo.FunctionName = FunctionName; 153 } 154 } 155 PatchedInlinedContext.addFrame(LineInfo); 156 } 157 InlinedContext = PatchedInlinedContext; 158 } 159 return InlinedContext; 160 } 161 162 bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name, 163 uint64_t &Start, uint64_t &Size) const { 164 return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start, 165 Size); 166 } 167 168 const char LLVMSymbolizer::kBadString[] = "??"; 169 170 std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 171 uint64_t ModuleOffset) { 172 ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); 173 if (!Info) 174 return printDILineInfo(DILineInfo()); 175 if (Opts.PrintInlining) { 176 DIInliningInfo InlinedContext = 177 Info->symbolizeInlinedCode(ModuleOffset, Opts); 178 uint32_t FramesNum = InlinedContext.getNumberOfFrames(); 179 assert(FramesNum > 0); 180 std::string Result; 181 for (uint32_t i = 0; i < FramesNum; i++) { 182 DILineInfo LineInfo = InlinedContext.getFrame(i); 183 Result += printDILineInfo(LineInfo); 184 } 185 return Result; 186 } 187 DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts); 188 return printDILineInfo(LineInfo); 189 } 190 191 std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 192 uint64_t ModuleOffset) { 193 std::string Name = kBadString; 194 uint64_t Start = 0; 195 uint64_t Size = 0; 196 if (Opts.UseSymbolTable) { 197 if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { 198 if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) 199 Name = DemangleName(Name); 200 } 201 } 202 std::stringstream ss; 203 ss << Name << "\n" << Start << " " << Size << "\n"; 204 return ss.str(); 205 } 206 207 void LLVMSymbolizer::flush() { 208 DeleteContainerSeconds(Modules); 209 BinaryForPath.clear(); 210 ObjectFileForArch.clear(); 211 } 212 213 static std::string getDarwinDWARFResourceForPath(const std::string &Path) { 214 StringRef Basename = sys::path::filename(Path); 215 const std::string &DSymDirectory = Path + ".dSYM"; 216 SmallString<16> ResourceName = StringRef(DSymDirectory); 217 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 218 sys::path::append(ResourceName, Basename); 219 return ResourceName.str(); 220 } 221 222 static bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 223 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 224 MemoryBuffer::getFileOrSTDIN(Path); 225 if (!MB) 226 return false; 227 return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer()); 228 } 229 230 static bool findDebugBinary(const std::string &OrigPath, 231 const std::string &DebuglinkName, uint32_t CRCHash, 232 std::string &Result) { 233 std::string OrigRealPath = OrigPath; 234 #if defined(HAVE_REALPATH) 235 if (char *RP = realpath(OrigPath.c_str(), nullptr)) { 236 OrigRealPath = RP; 237 free(RP); 238 } 239 #endif 240 SmallString<16> OrigDir(OrigRealPath); 241 llvm::sys::path::remove_filename(OrigDir); 242 SmallString<16> DebugPath = OrigDir; 243 // Try /path/to/original_binary/debuglink_name 244 llvm::sys::path::append(DebugPath, DebuglinkName); 245 if (checkFileCRC(DebugPath, CRCHash)) { 246 Result = DebugPath.str(); 247 return true; 248 } 249 // Try /path/to/original_binary/.debug/debuglink_name 250 DebugPath = OrigRealPath; 251 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 252 if (checkFileCRC(DebugPath, CRCHash)) { 253 Result = DebugPath.str(); 254 return true; 255 } 256 // Try /usr/lib/debug/path/to/original_binary/debuglink_name 257 DebugPath = "/usr/lib/debug"; 258 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 259 DebuglinkName); 260 if (checkFileCRC(DebugPath, CRCHash)) { 261 Result = DebugPath.str(); 262 return true; 263 } 264 return false; 265 } 266 267 static bool getGNUDebuglinkContents(const Binary *Bin, std::string &DebugName, 268 uint32_t &CRCHash) { 269 const ObjectFile *Obj = dyn_cast<ObjectFile>(Bin); 270 if (!Obj) 271 return false; 272 for (const SectionRef &Section : Obj->sections()) { 273 StringRef Name; 274 Section.getName(Name); 275 Name = Name.substr(Name.find_first_not_of("._")); 276 if (Name == "gnu_debuglink") { 277 StringRef Data; 278 Section.getContents(Data); 279 DataExtractor DE(Data, Obj->isLittleEndian(), 0); 280 uint32_t Offset = 0; 281 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 282 // 4-byte align the offset. 283 Offset = (Offset + 3) & ~0x3; 284 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 285 DebugName = DebugNameStr; 286 CRCHash = DE.getU32(&Offset); 287 return true; 288 } 289 } 290 break; 291 } 292 } 293 return false; 294 } 295 296 LLVMSymbolizer::BinaryPair 297 LLVMSymbolizer::getOrCreateBinary(const std::string &Path) { 298 BinaryMapTy::iterator I = BinaryForPath.find(Path); 299 if (I != BinaryForPath.end()) 300 return I->second; 301 Binary *Bin = nullptr; 302 Binary *DbgBin = nullptr; 303 ErrorOr<Binary *> BinaryOrErr = createBinary(Path); 304 if (!error(BinaryOrErr.getError())) { 305 std::unique_ptr<Binary> ParsedBinary(BinaryOrErr.get()); 306 // Check if it's a universal binary. 307 Bin = ParsedBinary.get(); 308 ParsedBinariesAndObjects.push_back(std::move(ParsedBinary)); 309 if (Bin->isMachO() || Bin->isMachOUniversalBinary()) { 310 // On Darwin we may find DWARF in separate object file in 311 // resource directory. 312 const std::string &ResourcePath = 313 getDarwinDWARFResourceForPath(Path); 314 BinaryOrErr = createBinary(ResourcePath); 315 std::error_code EC = BinaryOrErr.getError(); 316 if (EC != errc::no_such_file_or_directory && !error(EC)) { 317 DbgBin = BinaryOrErr.get(); 318 ParsedBinariesAndObjects.push_back(std::unique_ptr<Binary>(DbgBin)); 319 } 320 } 321 // Try to locate the debug binary using .gnu_debuglink section. 322 if (!DbgBin) { 323 std::string DebuglinkName; 324 uint32_t CRCHash; 325 std::string DebugBinaryPath; 326 if (getGNUDebuglinkContents(Bin, DebuglinkName, CRCHash) && 327 findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) { 328 BinaryOrErr = createBinary(DebugBinaryPath); 329 if (!error(BinaryOrErr.getError())) { 330 DbgBin = BinaryOrErr.get(); 331 ParsedBinariesAndObjects.push_back(std::unique_ptr<Binary>(DbgBin)); 332 } 333 } 334 } 335 } 336 if (!DbgBin) 337 DbgBin = Bin; 338 BinaryPair Res = std::make_pair(Bin, DbgBin); 339 BinaryForPath[Path] = Res; 340 return Res; 341 } 342 343 ObjectFile * 344 LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) { 345 if (!Bin) 346 return nullptr; 347 ObjectFile *Res = nullptr; 348 if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) { 349 ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find( 350 std::make_pair(UB, ArchName)); 351 if (I != ObjectFileForArch.end()) 352 return I->second; 353 ErrorOr<std::unique_ptr<ObjectFile>> ParsedObj = 354 UB->getObjectForArch(Triple(ArchName).getArch()); 355 if (ParsedObj) { 356 Res = ParsedObj.get().get(); 357 ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get())); 358 } 359 ObjectFileForArch[std::make_pair(UB, ArchName)] = Res; 360 } else if (Bin->isObject()) { 361 Res = cast<ObjectFile>(Bin); 362 } 363 return Res; 364 } 365 366 ModuleInfo * 367 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { 368 ModuleMapTy::iterator I = Modules.find(ModuleName); 369 if (I != Modules.end()) 370 return I->second; 371 std::string BinaryName = ModuleName; 372 std::string ArchName = Opts.DefaultArch; 373 size_t ColonPos = ModuleName.find_last_of(':'); 374 // Verify that substring after colon form a valid arch name. 375 if (ColonPos != std::string::npos) { 376 std::string ArchStr = ModuleName.substr(ColonPos + 1); 377 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 378 BinaryName = ModuleName.substr(0, ColonPos); 379 ArchName = ArchStr; 380 } 381 } 382 BinaryPair Binaries = getOrCreateBinary(BinaryName); 383 ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName); 384 ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName); 385 386 if (!Obj) { 387 // Failed to find valid object file. 388 Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr)); 389 return nullptr; 390 } 391 DIContext *Context = DIContext::getDWARFContext(DbgObj); 392 assert(Context); 393 ModuleInfo *Info = new ModuleInfo(Obj, Context); 394 Modules.insert(make_pair(ModuleName, Info)); 395 return Info; 396 } 397 398 std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { 399 // By default, DILineInfo contains "<invalid>" for function/filename it 400 // cannot fetch. We replace it to "??" to make our output closer to addr2line. 401 static const std::string kDILineInfoBadString = "<invalid>"; 402 std::stringstream Result; 403 if (Opts.PrintFunctions != FunctionNameKind::None) { 404 std::string FunctionName = LineInfo.FunctionName; 405 if (FunctionName == kDILineInfoBadString) 406 FunctionName = kBadString; 407 else if (Opts.Demangle) 408 FunctionName = DemangleName(FunctionName); 409 Result << FunctionName << "\n"; 410 } 411 std::string Filename = LineInfo.FileName; 412 if (Filename == kDILineInfoBadString) 413 Filename = kBadString; 414 Result << Filename << ":" << LineInfo.Line << ":" << LineInfo.Column << "\n"; 415 return Result.str(); 416 } 417 418 #if !defined(_MSC_VER) 419 // Assume that __cxa_demangle is provided by libcxxabi (except for Windows). 420 extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, 421 size_t *length, int *status); 422 #endif 423 424 std::string LLVMSymbolizer::DemangleName(const std::string &Name) { 425 #if !defined(_MSC_VER) 426 // We can spoil names of symbols with C linkage, so use an heuristic 427 // approach to check if the name should be demangled. 428 if (Name.substr(0, 2) != "_Z") 429 return Name; 430 int status = 0; 431 char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); 432 if (status != 0) 433 return Name; 434 std::string Result = DemangledName; 435 free(DemangledName); 436 return Result; 437 #else 438 return Name; 439 #endif 440 } 441 442 } // namespace symbolize 443 } // namespace llvm 444