1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Implementation for LLVM symbolization library. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "LLVMSymbolize.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/Object/MachO.h" 17 #include "llvm/Support/Casting.h" 18 #include "llvm/Support/FileSystem.h" 19 #include "llvm/Support/Path.h" 20 21 #include <sstream> 22 23 namespace llvm { 24 namespace symbolize { 25 26 static bool error(error_code ec) { 27 if (!ec) 28 return false; 29 errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n"; 30 return true; 31 } 32 33 static uint32_t 34 getDILineInfoSpecifierFlags(const LLVMSymbolizer::Options &Opts) { 35 uint32_t Flags = llvm::DILineInfoSpecifier::FileLineInfo | 36 llvm::DILineInfoSpecifier::AbsoluteFilePath; 37 if (Opts.PrintFunctions) 38 Flags |= llvm::DILineInfoSpecifier::FunctionName; 39 return Flags; 40 } 41 42 static void patchFunctionNameInDILineInfo(const std::string &NewFunctionName, 43 DILineInfo &LineInfo) { 44 std::string FileName = LineInfo.getFileName(); 45 LineInfo = DILineInfo(StringRef(FileName), StringRef(NewFunctionName), 46 LineInfo.getLine(), LineInfo.getColumn()); 47 } 48 49 ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) 50 : Module(Obj), DebugInfoContext(DICtx) { 51 error_code ec; 52 for (symbol_iterator si = Module->begin_symbols(), se = Module->end_symbols(); 53 si != se; si.increment(ec)) { 54 if (error(ec)) 55 return; 56 SymbolRef::Type SymbolType; 57 if (error(si->getType(SymbolType))) 58 continue; 59 if (SymbolType != SymbolRef::ST_Function && 60 SymbolType != SymbolRef::ST_Data) 61 continue; 62 uint64_t SymbolAddress; 63 if (error(si->getAddress(SymbolAddress)) || 64 SymbolAddress == UnknownAddressOrSize) 65 continue; 66 uint64_t SymbolSize; 67 // Getting symbol size is linear for Mach-O files, so assume that symbol 68 // occupies the memory range up to the following symbol. 69 if (isa<MachOObjectFile>(Obj)) 70 SymbolSize = 0; 71 else if (error(si->getSize(SymbolSize)) || 72 SymbolSize == UnknownAddressOrSize) 73 continue; 74 StringRef SymbolName; 75 if (error(si->getName(SymbolName))) 76 continue; 77 // Mach-O symbol table names have leading underscore, skip it. 78 if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') 79 SymbolName = SymbolName.drop_front(); 80 // FIXME: If a function has alias, there are two entries in symbol table 81 // with same address size. Make sure we choose the correct one. 82 SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; 83 SymbolDesc SD = { SymbolAddress, SymbolSize }; 84 M.insert(std::make_pair(SD, SymbolName)); 85 } 86 } 87 88 bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, 89 std::string &Name, uint64_t &Addr, 90 uint64_t &Size) const { 91 const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects; 92 if (M.empty()) 93 return false; 94 SymbolDesc SD = { Address, Address }; 95 SymbolMapTy::const_iterator it = M.upper_bound(SD); 96 if (it == M.begin()) 97 return false; 98 --it; 99 if (it->first.Size != 0 && it->first.Addr + it->first.Size <= Address) 100 return false; 101 Name = it->second.str(); 102 Addr = it->first.Addr; 103 Size = it->first.Size; 104 return true; 105 } 106 107 DILineInfo ModuleInfo::symbolizeCode( 108 uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { 109 DILineInfo LineInfo; 110 if (DebugInfoContext) { 111 LineInfo = DebugInfoContext->getLineInfoForAddress( 112 ModuleOffset, getDILineInfoSpecifierFlags(Opts)); 113 } 114 // Override function name from symbol table if necessary. 115 if (Opts.PrintFunctions && Opts.UseSymbolTable) { 116 std::string FunctionName; 117 uint64_t Start, Size; 118 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, 119 FunctionName, Start, Size)) { 120 patchFunctionNameInDILineInfo(FunctionName, LineInfo); 121 } 122 } 123 return LineInfo; 124 } 125 126 DIInliningInfo ModuleInfo::symbolizeInlinedCode( 127 uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { 128 DIInliningInfo InlinedContext; 129 if (DebugInfoContext) { 130 InlinedContext = DebugInfoContext->getInliningInfoForAddress( 131 ModuleOffset, getDILineInfoSpecifierFlags(Opts)); 132 } 133 // Make sure there is at least one frame in context. 134 if (InlinedContext.getNumberOfFrames() == 0) { 135 InlinedContext.addFrame(DILineInfo()); 136 } 137 // Override the function name in lower frame with name from symbol table. 138 if (Opts.PrintFunctions && Opts.UseSymbolTable) { 139 DIInliningInfo PatchedInlinedContext; 140 for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 141 DILineInfo LineInfo = InlinedContext.getFrame(i); 142 if (i == n - 1) { 143 std::string FunctionName; 144 uint64_t Start, Size; 145 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, 146 FunctionName, Start, Size)) { 147 patchFunctionNameInDILineInfo(FunctionName, LineInfo); 148 } 149 } 150 PatchedInlinedContext.addFrame(LineInfo); 151 } 152 InlinedContext = PatchedInlinedContext; 153 } 154 return InlinedContext; 155 } 156 157 bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name, 158 uint64_t &Start, uint64_t &Size) const { 159 return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start, 160 Size); 161 } 162 163 const char LLVMSymbolizer::kBadString[] = "??"; 164 165 std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 166 uint64_t ModuleOffset) { 167 ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); 168 if (Info == 0) 169 return printDILineInfo(DILineInfo()); 170 if (Opts.PrintInlining) { 171 DIInliningInfo InlinedContext = 172 Info->symbolizeInlinedCode(ModuleOffset, Opts); 173 uint32_t FramesNum = InlinedContext.getNumberOfFrames(); 174 assert(FramesNum > 0); 175 std::string Result; 176 for (uint32_t i = 0; i < FramesNum; i++) { 177 DILineInfo LineInfo = InlinedContext.getFrame(i); 178 Result += printDILineInfo(LineInfo); 179 } 180 return Result; 181 } 182 DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts); 183 return printDILineInfo(LineInfo); 184 } 185 186 std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 187 uint64_t ModuleOffset) { 188 std::string Name = kBadString; 189 uint64_t Start = 0; 190 uint64_t Size = 0; 191 if (Opts.UseSymbolTable) { 192 if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { 193 if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) 194 Name = DemangleName(Name); 195 } 196 } 197 std::stringstream ss; 198 ss << Name << "\n" << Start << " " << Size << "\n"; 199 return ss.str(); 200 } 201 202 void LLVMSymbolizer::flush() { 203 DeleteContainerSeconds(Modules); 204 DeleteContainerPointers(ParsedBinariesAndObjects); 205 BinaryForPath.clear(); 206 ObjectFileForArch.clear(); 207 } 208 209 static std::string getDarwinDWARFResourceForPath(const std::string &Path) { 210 StringRef Basename = sys::path::filename(Path); 211 const std::string &DSymDirectory = Path + ".dSYM"; 212 SmallString<16> ResourceName = StringRef(DSymDirectory); 213 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 214 sys::path::append(ResourceName, Basename); 215 return ResourceName.str(); 216 } 217 218 LLVMSymbolizer::BinaryPair 219 LLVMSymbolizer::getOrCreateBinary(const std::string &Path) { 220 BinaryMapTy::iterator I = BinaryForPath.find(Path); 221 if (I != BinaryForPath.end()) 222 return I->second; 223 Binary *Bin = 0; 224 Binary *DbgBin = 0; 225 OwningPtr<Binary> ParsedBinary; 226 OwningPtr<Binary> ParsedDbgBinary; 227 if (!error(createBinary(Path, ParsedBinary))) { 228 // Check if it's a universal binary. 229 Bin = ParsedBinary.take(); 230 ParsedBinariesAndObjects.push_back(Bin); 231 if (Bin->isMachO() || Bin->isMachOUniversalBinary()) { 232 // On Darwin we may find DWARF in separate object file in 233 // resource directory. 234 const std::string &ResourcePath = 235 getDarwinDWARFResourceForPath(Path); 236 bool ResourceFileExists = false; 237 if (!sys::fs::exists(ResourcePath, ResourceFileExists) && 238 ResourceFileExists && 239 !error(createBinary(ResourcePath, ParsedDbgBinary))) { 240 DbgBin = ParsedDbgBinary.take(); 241 ParsedBinariesAndObjects.push_back(DbgBin); 242 } 243 } 244 } 245 if (DbgBin == 0) 246 DbgBin = Bin; 247 BinaryPair Res = std::make_pair(Bin, DbgBin); 248 BinaryForPath[Path] = Res; 249 return Res; 250 } 251 252 ObjectFile * 253 LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) { 254 if (Bin == 0) 255 return 0; 256 ObjectFile *Res = 0; 257 if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) { 258 ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find( 259 std::make_pair(UB, ArchName)); 260 if (I != ObjectFileForArch.end()) 261 return I->second; 262 OwningPtr<ObjectFile> ParsedObj; 263 if (!UB->getObjectForArch(Triple(ArchName).getArch(), ParsedObj)) { 264 Res = ParsedObj.take(); 265 ParsedBinariesAndObjects.push_back(Res); 266 } 267 ObjectFileForArch[std::make_pair(UB, ArchName)] = Res; 268 } else if (Bin->isObject()) { 269 Res = cast<ObjectFile>(Bin); 270 } 271 return Res; 272 } 273 274 ModuleInfo * 275 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { 276 ModuleMapTy::iterator I = Modules.find(ModuleName); 277 if (I != Modules.end()) 278 return I->second; 279 std::string BinaryName = ModuleName; 280 std::string ArchName = Opts.DefaultArch; 281 size_t ColonPos = ModuleName.find_last_of(':'); 282 // Verify that substring after colon form a valid arch name. 283 if (ColonPos != std::string::npos) { 284 std::string ArchStr = ModuleName.substr(ColonPos + 1); 285 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 286 BinaryName = ModuleName.substr(0, ColonPos); 287 ArchName = ArchStr; 288 } 289 } 290 BinaryPair Binaries = getOrCreateBinary(BinaryName); 291 ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName); 292 ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName); 293 294 if (Obj == 0) { 295 // Failed to find valid object file. 296 Modules.insert(make_pair(ModuleName, (ModuleInfo *)0)); 297 return 0; 298 } 299 DIContext *Context = DIContext::getDWARFContext(DbgObj); 300 assert(Context); 301 ModuleInfo *Info = new ModuleInfo(Obj, Context); 302 Modules.insert(make_pair(ModuleName, Info)); 303 return Info; 304 } 305 306 std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { 307 // By default, DILineInfo contains "<invalid>" for function/filename it 308 // cannot fetch. We replace it to "??" to make our output closer to addr2line. 309 static const std::string kDILineInfoBadString = "<invalid>"; 310 std::stringstream Result; 311 if (Opts.PrintFunctions) { 312 std::string FunctionName = LineInfo.getFunctionName(); 313 if (FunctionName == kDILineInfoBadString) 314 FunctionName = kBadString; 315 else if (Opts.Demangle) 316 FunctionName = DemangleName(FunctionName); 317 Result << FunctionName << "\n"; 318 } 319 std::string Filename = LineInfo.getFileName(); 320 if (Filename == kDILineInfoBadString) 321 Filename = kBadString; 322 Result << Filename << ":" << LineInfo.getLine() << ":" << LineInfo.getColumn() 323 << "\n"; 324 return Result.str(); 325 } 326 327 #if !defined(_MSC_VER) 328 // Assume that __cxa_demangle is provided by libcxxabi (except for Windows). 329 extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, 330 size_t *length, int *status); 331 #endif 332 333 std::string LLVMSymbolizer::DemangleName(const std::string &Name) { 334 #if !defined(_MSC_VER) 335 int status = 0; 336 char *DemangledName = __cxa_demangle(Name.c_str(), 0, 0, &status); 337 if (status != 0) 338 return Name; 339 std::string Result = DemangledName; 340 free(DemangledName); 341 return Result; 342 #else 343 return Name; 344 #endif 345 } 346 347 } // namespace symbolize 348 } // namespace llvm 349