Home | History | Annotate | Download | only in sanitizer_common
      1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is shared between AddressSanitizer and ThreadSanitizer
     11 // run-time libraries.
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "sanitizer_allocator_internal.h"
     15 #include "sanitizer_internal_defs.h"
     16 #include "sanitizer_symbolizer_internal.h"
     17 
     18 namespace __sanitizer {
     19 
     20 const char *ExtractToken(const char *str, const char *delims, char **result) {
     21   uptr prefix_len = internal_strcspn(str, delims);
     22   *result = (char*)InternalAlloc(prefix_len + 1);
     23   internal_memcpy(*result, str, prefix_len);
     24   (*result)[prefix_len] = '\0';
     25   const char *prefix_end = str + prefix_len;
     26   if (*prefix_end != '\0') prefix_end++;
     27   return prefix_end;
     28 }
     29 
     30 const char *ExtractInt(const char *str, const char *delims, int *result) {
     31   char *buff;
     32   const char *ret = ExtractToken(str, delims, &buff);
     33   if (buff != 0) {
     34     *result = (int)internal_atoll(buff);
     35   }
     36   InternalFree(buff);
     37   return ret;
     38 }
     39 
     40 const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
     41   char *buff;
     42   const char *ret = ExtractToken(str, delims, &buff);
     43   if (buff != 0) {
     44     *result = (uptr)internal_atoll(buff);
     45   }
     46   InternalFree(buff);
     47   return ret;
     48 }
     49 
     50 const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
     51                                       char **result) {
     52   const char *found_delimiter = internal_strstr(str, delimiter);
     53   uptr prefix_len =
     54       found_delimiter ? found_delimiter - str : internal_strlen(str);
     55   *result = (char *)InternalAlloc(prefix_len + 1);
     56   internal_memcpy(*result, str, prefix_len);
     57   (*result)[prefix_len] = '\0';
     58   const char *prefix_end = str + prefix_len;
     59   if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter);
     60   return prefix_end;
     61 }
     62 
     63 SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
     64   BlockingMutexLock l(&mu_);
     65   const char *module_name;
     66   uptr module_offset;
     67   SymbolizedStack *res = SymbolizedStack::New(addr);
     68   if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset))
     69     return res;
     70   // Always fill data about module name and offset.
     71   res->info.FillModuleInfo(module_name, module_offset);
     72   for (auto &tool : tools_) {
     73     SymbolizerScope sym_scope(this);
     74     if (tool.SymbolizePC(addr, res)) {
     75       return res;
     76     }
     77   }
     78   return res;
     79 }
     80 
     81 bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
     82   BlockingMutexLock l(&mu_);
     83   const char *module_name;
     84   uptr module_offset;
     85   if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset))
     86     return false;
     87   info->Clear();
     88   info->module = internal_strdup(module_name);
     89   info->module_offset = module_offset;
     90   for (auto &tool : tools_) {
     91     SymbolizerScope sym_scope(this);
     92     if (tool.SymbolizeData(addr, info)) {
     93       return true;
     94     }
     95   }
     96   return true;
     97 }
     98 
     99 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
    100                                              uptr *module_address) {
    101   BlockingMutexLock l(&mu_);
    102   const char *internal_module_name = nullptr;
    103   if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
    104                                          module_address))
    105     return false;
    106 
    107   if (module_name)
    108     *module_name = module_names_.GetOwnedCopy(internal_module_name);
    109   return true;
    110 }
    111 
    112 void Symbolizer::Flush() {
    113   BlockingMutexLock l(&mu_);
    114   for (auto &tool : tools_) {
    115     SymbolizerScope sym_scope(this);
    116     tool.Flush();
    117   }
    118 }
    119 
    120 const char *Symbolizer::Demangle(const char *name) {
    121   BlockingMutexLock l(&mu_);
    122   for (auto &tool : tools_) {
    123     SymbolizerScope sym_scope(this);
    124     if (const char *demangled = tool.Demangle(name))
    125       return demangled;
    126   }
    127   return PlatformDemangle(name);
    128 }
    129 
    130 void Symbolizer::PrepareForSandboxing() {
    131   BlockingMutexLock l(&mu_);
    132   PlatformPrepareForSandboxing();
    133 }
    134 
    135 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
    136                                                    const char **module_name,
    137                                                    uptr *module_offset) {
    138   const LoadedModule *module = FindModuleForAddress(address);
    139   if (module == nullptr)
    140     return false;
    141   *module_name = module->full_name();
    142   *module_offset = address - module->base_address();
    143   return true;
    144 }
    145 
    146 const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
    147   bool modules_were_reloaded = false;
    148   if (!modules_fresh_) {
    149     modules_.init();
    150     RAW_CHECK(modules_.size() > 0);
    151     modules_fresh_ = true;
    152     modules_were_reloaded = true;
    153   }
    154   for (uptr i = 0; i < modules_.size(); i++) {
    155     if (modules_[i].containsAddress(address)) {
    156       return &modules_[i];
    157     }
    158   }
    159   // Reload the modules and look up again, if we haven't tried it yet.
    160   if (!modules_were_reloaded) {
    161     // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
    162     // It's too aggressive to reload the list of modules each time we fail
    163     // to find a module for a given address.
    164     modules_fresh_ = false;
    165     return FindModuleForAddress(address);
    166   }
    167   return 0;
    168 }
    169 
    170 Symbolizer *Symbolizer::GetOrInit() {
    171   SpinMutexLock l(&init_mu_);
    172   if (symbolizer_)
    173     return symbolizer_;
    174   symbolizer_ = PlatformInit();
    175   CHECK(symbolizer_);
    176   return symbolizer_;
    177 }
    178 
    179 // For now we assume the following protocol:
    180 // For each request of the form
    181 //   <module_name> <module_offset>
    182 // passed to STDIN, external symbolizer prints to STDOUT response:
    183 //   <function_name>
    184 //   <file_name>:<line_number>:<column_number>
    185 //   <function_name>
    186 //   <file_name>:<line_number>:<column_number>
    187 //   ...
    188 //   <empty line>
    189 class LLVMSymbolizerProcess : public SymbolizerProcess {
    190  public:
    191   explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {}
    192 
    193  private:
    194   bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
    195     // Empty line marks the end of llvm-symbolizer output.
    196     return length >= 2 && buffer[length - 1] == '\n' &&
    197            buffer[length - 2] == '\n';
    198   }
    199 
    200   void GetArgV(const char *path_to_binary,
    201                const char *(&argv)[kArgVMax]) const override {
    202 #if defined(__x86_64h__)
    203     const char* const kSymbolizerArch = "--default-arch=x86_64h";
    204 #elif defined(__x86_64__)
    205     const char* const kSymbolizerArch = "--default-arch=x86_64";
    206 #elif defined(__i386__)
    207     const char* const kSymbolizerArch = "--default-arch=i386";
    208 #elif defined(__aarch64__)
    209     const char* const kSymbolizerArch = "--default-arch=arm64";
    210 #elif defined(__arm__)
    211     const char* const kSymbolizerArch = "--default-arch=arm";
    212 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    213     const char* const kSymbolizerArch = "--default-arch=powerpc64";
    214 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    215     const char* const kSymbolizerArch = "--default-arch=powerpc64le";
    216 #elif defined(__s390x__)
    217     const char* const kSymbolizerArch = "--default-arch=s390x";
    218 #elif defined(__s390__)
    219     const char* const kSymbolizerArch = "--default-arch=s390";
    220 #else
    221     const char* const kSymbolizerArch = "--default-arch=unknown";
    222 #endif
    223 
    224     const char *const inline_flag = common_flags()->symbolize_inline_frames
    225                                         ? "--inlining=true"
    226                                         : "--inlining=false";
    227     int i = 0;
    228     argv[i++] = path_to_binary;
    229     argv[i++] = inline_flag;
    230     argv[i++] = kSymbolizerArch;
    231     argv[i++] = nullptr;
    232   }
    233 };
    234 
    235 LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
    236     : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
    237 
    238 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
    239 // Windows, so extract tokens from the right hand side first. The column info is
    240 // also optional.
    241 static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
    242   char *file_line_info = 0;
    243   str = ExtractToken(str, "\n", &file_line_info);
    244   CHECK(file_line_info);
    245   // Parse the last :<int>, which must be there.
    246   char *last_colon = internal_strrchr(file_line_info, ':');
    247   CHECK(last_colon);
    248   int line_or_column = internal_atoll(last_colon + 1);
    249   // Truncate the string at the last colon and find the next-to-last colon.
    250   *last_colon = '\0';
    251   last_colon = internal_strrchr(file_line_info, ':');
    252   if (last_colon && IsDigit(last_colon[1])) {
    253     // If the second-to-last colon is followed by a digit, it must be the line
    254     // number, and the previous parsed number was a column.
    255     info->line = internal_atoll(last_colon + 1);
    256     info->column = line_or_column;
    257     *last_colon = '\0';
    258   } else {
    259     // Otherwise, we have line info but no column info.
    260     info->line = line_or_column;
    261     info->column = 0;
    262   }
    263   ExtractToken(file_line_info, "", &info->file);
    264   InternalFree(file_line_info);
    265   return str;
    266 }
    267 
    268 // Parses one or more two-line strings in the following format:
    269 //   <function_name>
    270 //   <file_name>:<line_number>[:<column_number>]
    271 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
    272 // them use the same output format.
    273 void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
    274   bool top_frame = true;
    275   SymbolizedStack *last = res;
    276   while (true) {
    277     char *function_name = 0;
    278     str = ExtractToken(str, "\n", &function_name);
    279     CHECK(function_name);
    280     if (function_name[0] == '\0') {
    281       // There are no more frames.
    282       InternalFree(function_name);
    283       break;
    284     }
    285     SymbolizedStack *cur;
    286     if (top_frame) {
    287       cur = res;
    288       top_frame = false;
    289     } else {
    290       cur = SymbolizedStack::New(res->info.address);
    291       cur->info.FillModuleInfo(res->info.module, res->info.module_offset);
    292       last->next = cur;
    293       last = cur;
    294     }
    295 
    296     AddressInfo *info = &cur->info;
    297     info->function = function_name;
    298     str = ParseFileLineInfo(info, str);
    299 
    300     // Functions and filenames can be "??", in which case we write 0
    301     // to address info to mark that names are unknown.
    302     if (0 == internal_strcmp(info->function, "??")) {
    303       InternalFree(info->function);
    304       info->function = 0;
    305     }
    306     if (0 == internal_strcmp(info->file, "??")) {
    307       InternalFree(info->file);
    308       info->file = 0;
    309     }
    310   }
    311 }
    312 
    313 // Parses a two-line string in the following format:
    314 //   <symbol_name>
    315 //   <start_address> <size>
    316 // Used by LLVMSymbolizer and InternalSymbolizer.
    317 void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
    318   str = ExtractToken(str, "\n", &info->name);
    319   str = ExtractUptr(str, " ", &info->start);
    320   str = ExtractUptr(str, "\n", &info->size);
    321 }
    322 
    323 bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
    324   if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module,
    325                                     stack->info.module_offset)) {
    326     ParseSymbolizePCOutput(buf, stack);
    327     return true;
    328   }
    329   return false;
    330 }
    331 
    332 bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
    333   if (const char *buf =
    334           SendCommand(/*is_data*/ true, info->module, info->module_offset)) {
    335     ParseSymbolizeDataOutput(buf, info);
    336     info->start += (addr - info->module_offset); // Add the base address.
    337     return true;
    338   }
    339   return false;
    340 }
    341 
    342 const char *LLVMSymbolizer::SendCommand(bool is_data, const char *module_name,
    343                                         uptr module_offset) {
    344   CHECK(module_name);
    345   internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
    346                     is_data ? "DATA " : "", module_name, module_offset);
    347   return symbolizer_process_->SendCommand(buffer_);
    348 }
    349 
    350 SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty)
    351     : path_(path),
    352       input_fd_(kInvalidFd),
    353       output_fd_(kInvalidFd),
    354       times_restarted_(0),
    355       failed_to_start_(false),
    356       reported_invalid_path_(false),
    357       use_forkpty_(use_forkpty) {
    358   CHECK(path_);
    359   CHECK_NE(path_[0], '\0');
    360 }
    361 
    362 const char *SymbolizerProcess::SendCommand(const char *command) {
    363   for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
    364     // Start or restart symbolizer if we failed to send command to it.
    365     if (const char *res = SendCommandImpl(command))
    366       return res;
    367     Restart();
    368   }
    369   if (!failed_to_start_) {
    370     Report("WARNING: Failed to use and restart external symbolizer!\n");
    371     failed_to_start_ = true;
    372   }
    373   return 0;
    374 }
    375 
    376 const char *SymbolizerProcess::SendCommandImpl(const char *command) {
    377   if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
    378       return 0;
    379   if (!WriteToSymbolizer(command, internal_strlen(command)))
    380       return 0;
    381   if (!ReadFromSymbolizer(buffer_, kBufferSize))
    382       return 0;
    383   return buffer_;
    384 }
    385 
    386 bool SymbolizerProcess::Restart() {
    387   if (input_fd_ != kInvalidFd)
    388     CloseFile(input_fd_);
    389   if (output_fd_ != kInvalidFd)
    390     CloseFile(output_fd_);
    391   return StartSymbolizerSubprocess();
    392 }
    393 
    394 bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) {
    395   if (max_length == 0)
    396     return true;
    397   uptr read_len = 0;
    398   while (true) {
    399     uptr just_read = 0;
    400     bool success = ReadFromFile(input_fd_, buffer + read_len,
    401                                 max_length - read_len - 1, &just_read);
    402     // We can't read 0 bytes, as we don't expect external symbolizer to close
    403     // its stdout.
    404     if (!success || just_read == 0) {
    405       Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
    406       return false;
    407     }
    408     read_len += just_read;
    409     if (ReachedEndOfOutput(buffer, read_len))
    410       break;
    411   }
    412   buffer[read_len] = '\0';
    413   return true;
    414 }
    415 
    416 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
    417   if (length == 0)
    418     return true;
    419   uptr write_len = 0;
    420   bool success = WriteToFile(output_fd_, buffer, length, &write_len);
    421   if (!success || write_len != length) {
    422     Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
    423     return false;
    424   }
    425   return true;
    426 }
    427 
    428 }  // namespace __sanitizer
    429