Home | History | Annotate | Download | only in llvm-symbolizer
      1 //===-- LLVMSymbolize.cpp -------------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Implementation for LLVM symbolization library.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "LLVMSymbolize.h"
     15 #include "llvm/ADT/STLExtras.h"
     16 #include "llvm/Config/config.h"
     17 #include "llvm/Object/ELFObjectFile.h"
     18 #include "llvm/Object/MachO.h"
     19 #include "llvm/Support/Casting.h"
     20 #include "llvm/Support/Compression.h"
     21 #include "llvm/Support/DataExtractor.h"
     22 #include "llvm/Support/Errc.h"
     23 #include "llvm/Support/FileSystem.h"
     24 #include "llvm/Support/MemoryBuffer.h"
     25 #include "llvm/Support/Path.h"
     26 #include <sstream>
     27 #include <stdlib.h>
     28 
     29 namespace llvm {
     30 namespace symbolize {
     31 
     32 static bool error(std::error_code ec) {
     33   if (!ec)
     34     return false;
     35   errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n";
     36   return true;
     37 }
     38 
     39 static DILineInfoSpecifier
     40 getDILineInfoSpecifier(const LLVMSymbolizer::Options &Opts) {
     41   return DILineInfoSpecifier(
     42       DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
     43       Opts.PrintFunctions);
     44 }
     45 
     46 ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
     47     : Module(Obj), DebugInfoContext(DICtx) {
     48   for (const SymbolRef &Symbol : Module->symbols()) {
     49     addSymbol(Symbol);
     50   }
     51   bool NoSymbolTable = (Module->symbol_begin() == Module->symbol_end());
     52   if (NoSymbolTable && Module->isELF()) {
     53     // Fallback to dynamic symbol table, if regular symbol table is stripped.
     54     std::pair<symbol_iterator, symbol_iterator> IDyn =
     55         getELFDynamicSymbolIterators(Module);
     56     for (symbol_iterator si = IDyn.first, se = IDyn.second; si != se; ++si) {
     57       addSymbol(*si);
     58     }
     59   }
     60 }
     61 
     62 void ModuleInfo::addSymbol(const SymbolRef &Symbol) {
     63   SymbolRef::Type SymbolType;
     64   if (error(Symbol.getType(SymbolType)))
     65     return;
     66   if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
     67     return;
     68   uint64_t SymbolAddress;
     69   if (error(Symbol.getAddress(SymbolAddress)) ||
     70       SymbolAddress == UnknownAddressOrSize)
     71     return;
     72   uint64_t SymbolSize;
     73   // Getting symbol size is linear for Mach-O files, so assume that symbol
     74   // occupies the memory range up to the following symbol.
     75   if (isa<MachOObjectFile>(Module))
     76     SymbolSize = 0;
     77   else if (error(Symbol.getSize(SymbolSize)) ||
     78            SymbolSize == UnknownAddressOrSize)
     79     return;
     80   StringRef SymbolName;
     81   if (error(Symbol.getName(SymbolName)))
     82     return;
     83   // Mach-O symbol table names have leading underscore, skip it.
     84   if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
     85     SymbolName = SymbolName.drop_front();
     86   // FIXME: If a function has alias, there are two entries in symbol table
     87   // with same address size. Make sure we choose the correct one.
     88   SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
     89   SymbolDesc SD = { SymbolAddress, SymbolSize };
     90   M.insert(std::make_pair(SD, SymbolName));
     91 }
     92 
     93 bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
     94                                         std::string &Name, uint64_t &Addr,
     95                                         uint64_t &Size) const {
     96   const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects;
     97   if (M.empty())
     98     return false;
     99   SymbolDesc SD = { Address, Address };
    100   SymbolMapTy::const_iterator it = M.upper_bound(SD);
    101   if (it == M.begin())
    102     return false;
    103   --it;
    104   if (it->first.Size != 0 && it->first.Addr + it->first.Size <= Address)
    105     return false;
    106   Name = it->second.str();
    107   Addr = it->first.Addr;
    108   Size = it->first.Size;
    109   return true;
    110 }
    111 
    112 DILineInfo ModuleInfo::symbolizeCode(
    113     uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
    114   DILineInfo LineInfo;
    115   if (DebugInfoContext) {
    116     LineInfo = DebugInfoContext->getLineInfoForAddress(
    117         ModuleOffset, getDILineInfoSpecifier(Opts));
    118   }
    119   // Override function name from symbol table if necessary.
    120   if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) {
    121     std::string FunctionName;
    122     uint64_t Start, Size;
    123     if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
    124                                FunctionName, Start, Size)) {
    125       LineInfo.FunctionName = FunctionName;
    126     }
    127   }
    128   return LineInfo;
    129 }
    130 
    131 DIInliningInfo ModuleInfo::symbolizeInlinedCode(
    132     uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
    133   DIInliningInfo InlinedContext;
    134   if (DebugInfoContext) {
    135     InlinedContext = DebugInfoContext->getInliningInfoForAddress(
    136         ModuleOffset, getDILineInfoSpecifier(Opts));
    137   }
    138   // Make sure there is at least one frame in context.
    139   if (InlinedContext.getNumberOfFrames() == 0) {
    140     InlinedContext.addFrame(DILineInfo());
    141   }
    142   // Override the function name in lower frame with name from symbol table.
    143   if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) {
    144     DIInliningInfo PatchedInlinedContext;
    145     for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
    146       DILineInfo LineInfo = InlinedContext.getFrame(i);
    147       if (i == n - 1) {
    148         std::string FunctionName;
    149         uint64_t Start, Size;
    150         if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
    151                                    FunctionName, Start, Size)) {
    152           LineInfo.FunctionName = FunctionName;
    153         }
    154       }
    155       PatchedInlinedContext.addFrame(LineInfo);
    156     }
    157     InlinedContext = PatchedInlinedContext;
    158   }
    159   return InlinedContext;
    160 }
    161 
    162 bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name,
    163                                uint64_t &Start, uint64_t &Size) const {
    164   return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start,
    165                                 Size);
    166 }
    167 
    168 const char LLVMSymbolizer::kBadString[] = "??";
    169 
    170 std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
    171                                           uint64_t ModuleOffset) {
    172   ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
    173   if (!Info)
    174     return printDILineInfo(DILineInfo());
    175   if (Opts.PrintInlining) {
    176     DIInliningInfo InlinedContext =
    177         Info->symbolizeInlinedCode(ModuleOffset, Opts);
    178     uint32_t FramesNum = InlinedContext.getNumberOfFrames();
    179     assert(FramesNum > 0);
    180     std::string Result;
    181     for (uint32_t i = 0; i < FramesNum; i++) {
    182       DILineInfo LineInfo = InlinedContext.getFrame(i);
    183       Result += printDILineInfo(LineInfo);
    184     }
    185     return Result;
    186   }
    187   DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts);
    188   return printDILineInfo(LineInfo);
    189 }
    190 
    191 std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
    192                                           uint64_t ModuleOffset) {
    193   std::string Name = kBadString;
    194   uint64_t Start = 0;
    195   uint64_t Size = 0;
    196   if (Opts.UseSymbolTable) {
    197     if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) {
    198       if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle)
    199         Name = DemangleName(Name);
    200     }
    201   }
    202   std::stringstream ss;
    203   ss << Name << "\n" << Start << " " << Size << "\n";
    204   return ss.str();
    205 }
    206 
    207 void LLVMSymbolizer::flush() {
    208   DeleteContainerSeconds(Modules);
    209   BinaryForPath.clear();
    210   ObjectFileForArch.clear();
    211 }
    212 
    213 static std::string getDarwinDWARFResourceForPath(const std::string &Path) {
    214   StringRef Basename = sys::path::filename(Path);
    215   const std::string &DSymDirectory = Path + ".dSYM";
    216   SmallString<16> ResourceName = StringRef(DSymDirectory);
    217   sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
    218   sys::path::append(ResourceName, Basename);
    219   return ResourceName.str();
    220 }
    221 
    222 static bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
    223   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
    224       MemoryBuffer::getFileOrSTDIN(Path);
    225   if (!MB)
    226     return false;
    227   return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
    228 }
    229 
    230 static bool findDebugBinary(const std::string &OrigPath,
    231                             const std::string &DebuglinkName, uint32_t CRCHash,
    232                             std::string &Result) {
    233   std::string OrigRealPath = OrigPath;
    234 #if defined(HAVE_REALPATH)
    235   if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
    236     OrigRealPath = RP;
    237     free(RP);
    238   }
    239 #endif
    240   SmallString<16> OrigDir(OrigRealPath);
    241   llvm::sys::path::remove_filename(OrigDir);
    242   SmallString<16> DebugPath = OrigDir;
    243   // Try /path/to/original_binary/debuglink_name
    244   llvm::sys::path::append(DebugPath, DebuglinkName);
    245   if (checkFileCRC(DebugPath, CRCHash)) {
    246     Result = DebugPath.str();
    247     return true;
    248   }
    249   // Try /path/to/original_binary/.debug/debuglink_name
    250   DebugPath = OrigRealPath;
    251   llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
    252   if (checkFileCRC(DebugPath, CRCHash)) {
    253     Result = DebugPath.str();
    254     return true;
    255   }
    256   // Try /usr/lib/debug/path/to/original_binary/debuglink_name
    257   DebugPath = "/usr/lib/debug";
    258   llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
    259                           DebuglinkName);
    260   if (checkFileCRC(DebugPath, CRCHash)) {
    261     Result = DebugPath.str();
    262     return true;
    263   }
    264   return false;
    265 }
    266 
    267 static bool getGNUDebuglinkContents(const Binary *Bin, std::string &DebugName,
    268                                     uint32_t &CRCHash) {
    269   const ObjectFile *Obj = dyn_cast<ObjectFile>(Bin);
    270   if (!Obj)
    271     return false;
    272   for (const SectionRef &Section : Obj->sections()) {
    273     StringRef Name;
    274     Section.getName(Name);
    275     Name = Name.substr(Name.find_first_not_of("._"));
    276     if (Name == "gnu_debuglink") {
    277       StringRef Data;
    278       Section.getContents(Data);
    279       DataExtractor DE(Data, Obj->isLittleEndian(), 0);
    280       uint32_t Offset = 0;
    281       if (const char *DebugNameStr = DE.getCStr(&Offset)) {
    282         // 4-byte align the offset.
    283         Offset = (Offset + 3) & ~0x3;
    284         if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
    285           DebugName = DebugNameStr;
    286           CRCHash = DE.getU32(&Offset);
    287           return true;
    288         }
    289       }
    290       break;
    291     }
    292   }
    293   return false;
    294 }
    295 
    296 LLVMSymbolizer::BinaryPair
    297 LLVMSymbolizer::getOrCreateBinary(const std::string &Path) {
    298   BinaryMapTy::iterator I = BinaryForPath.find(Path);
    299   if (I != BinaryForPath.end())
    300     return I->second;
    301   Binary *Bin = nullptr;
    302   Binary *DbgBin = nullptr;
    303   ErrorOr<Binary *> BinaryOrErr = createBinary(Path);
    304   if (!error(BinaryOrErr.getError())) {
    305     std::unique_ptr<Binary> ParsedBinary(BinaryOrErr.get());
    306     // Check if it's a universal binary.
    307     Bin = ParsedBinary.get();
    308     ParsedBinariesAndObjects.push_back(std::move(ParsedBinary));
    309     if (Bin->isMachO() || Bin->isMachOUniversalBinary()) {
    310       // On Darwin we may find DWARF in separate object file in
    311       // resource directory.
    312       const std::string &ResourcePath =
    313           getDarwinDWARFResourceForPath(Path);
    314       BinaryOrErr = createBinary(ResourcePath);
    315       std::error_code EC = BinaryOrErr.getError();
    316       if (EC != errc::no_such_file_or_directory && !error(EC)) {
    317         DbgBin = BinaryOrErr.get();
    318         ParsedBinariesAndObjects.push_back(std::unique_ptr<Binary>(DbgBin));
    319       }
    320     }
    321     // Try to locate the debug binary using .gnu_debuglink section.
    322     if (!DbgBin) {
    323       std::string DebuglinkName;
    324       uint32_t CRCHash;
    325       std::string DebugBinaryPath;
    326       if (getGNUDebuglinkContents(Bin, DebuglinkName, CRCHash) &&
    327           findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) {
    328         BinaryOrErr = createBinary(DebugBinaryPath);
    329         if (!error(BinaryOrErr.getError())) {
    330           DbgBin = BinaryOrErr.get();
    331           ParsedBinariesAndObjects.push_back(std::unique_ptr<Binary>(DbgBin));
    332         }
    333       }
    334     }
    335   }
    336   if (!DbgBin)
    337     DbgBin = Bin;
    338   BinaryPair Res = std::make_pair(Bin, DbgBin);
    339   BinaryForPath[Path] = Res;
    340   return Res;
    341 }
    342 
    343 ObjectFile *
    344 LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) {
    345   if (!Bin)
    346     return nullptr;
    347   ObjectFile *Res = nullptr;
    348   if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) {
    349     ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find(
    350         std::make_pair(UB, ArchName));
    351     if (I != ObjectFileForArch.end())
    352       return I->second;
    353     ErrorOr<std::unique_ptr<ObjectFile>> ParsedObj =
    354         UB->getObjectForArch(Triple(ArchName).getArch());
    355     if (ParsedObj) {
    356       Res = ParsedObj.get().get();
    357       ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get()));
    358     }
    359     ObjectFileForArch[std::make_pair(UB, ArchName)] = Res;
    360   } else if (Bin->isObject()) {
    361     Res = cast<ObjectFile>(Bin);
    362   }
    363   return Res;
    364 }
    365 
    366 ModuleInfo *
    367 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
    368   ModuleMapTy::iterator I = Modules.find(ModuleName);
    369   if (I != Modules.end())
    370     return I->second;
    371   std::string BinaryName = ModuleName;
    372   std::string ArchName = Opts.DefaultArch;
    373   size_t ColonPos = ModuleName.find_last_of(':');
    374   // Verify that substring after colon form a valid arch name.
    375   if (ColonPos != std::string::npos) {
    376     std::string ArchStr = ModuleName.substr(ColonPos + 1);
    377     if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
    378       BinaryName = ModuleName.substr(0, ColonPos);
    379       ArchName = ArchStr;
    380     }
    381   }
    382   BinaryPair Binaries = getOrCreateBinary(BinaryName);
    383   ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName);
    384   ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName);
    385 
    386   if (!Obj) {
    387     // Failed to find valid object file.
    388     Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr));
    389     return nullptr;
    390   }
    391   DIContext *Context = DIContext::getDWARFContext(DbgObj);
    392   assert(Context);
    393   ModuleInfo *Info = new ModuleInfo(Obj, Context);
    394   Modules.insert(make_pair(ModuleName, Info));
    395   return Info;
    396 }
    397 
    398 std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const {
    399   // By default, DILineInfo contains "<invalid>" for function/filename it
    400   // cannot fetch. We replace it to "??" to make our output closer to addr2line.
    401   static const std::string kDILineInfoBadString = "<invalid>";
    402   std::stringstream Result;
    403   if (Opts.PrintFunctions != FunctionNameKind::None) {
    404     std::string FunctionName = LineInfo.FunctionName;
    405     if (FunctionName == kDILineInfoBadString)
    406       FunctionName = kBadString;
    407     else if (Opts.Demangle)
    408       FunctionName = DemangleName(FunctionName);
    409     Result << FunctionName << "\n";
    410   }
    411   std::string Filename = LineInfo.FileName;
    412   if (Filename == kDILineInfoBadString)
    413     Filename = kBadString;
    414   Result << Filename << ":" << LineInfo.Line << ":" << LineInfo.Column << "\n";
    415   return Result.str();
    416 }
    417 
    418 #if !defined(_MSC_VER)
    419 // Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
    420 extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
    421                                 size_t *length, int *status);
    422 #endif
    423 
    424 std::string LLVMSymbolizer::DemangleName(const std::string &Name) {
    425 #if !defined(_MSC_VER)
    426   // We can spoil names of symbols with C linkage, so use an heuristic
    427   // approach to check if the name should be demangled.
    428   if (Name.substr(0, 2) != "_Z")
    429     return Name;
    430   int status = 0;
    431   char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
    432   if (status != 0)
    433     return Name;
    434   std::string Result = DemangledName;
    435   free(DemangledName);
    436   return Result;
    437 #else
    438   return Name;
    439 #endif
    440 }
    441 
    442 } // namespace symbolize
    443 } // namespace llvm
    444