Home | History | Annotate | Download | only in sanitizer_common
      1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is shared between AddressSanitizer and ThreadSanitizer
     11 // run-time libraries. See sanitizer_symbolizer.h for details.
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "sanitizer_allocator_internal.h"
     15 #include "sanitizer_common.h"
     16 #include "sanitizer_placement_new.h"
     17 #include "sanitizer_procmaps.h"
     18 #include "sanitizer_symbolizer.h"
     19 
     20 namespace __sanitizer {
     21 
     22 void AddressInfo::Clear() {
     23   InternalFree(module);
     24   InternalFree(function);
     25   InternalFree(file);
     26   internal_memset(this, 0, sizeof(AddressInfo));
     27 }
     28 
     29 LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
     30   full_name_ = internal_strdup(module_name);
     31   base_address_ = base_address;
     32   n_ranges_ = 0;
     33 }
     34 
     35 void LoadedModule::addAddressRange(uptr beg, uptr end) {
     36   CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
     37   ranges_[n_ranges_].beg = beg;
     38   ranges_[n_ranges_].end = end;
     39   n_ranges_++;
     40 }
     41 
     42 bool LoadedModule::containsAddress(uptr address) const {
     43   for (uptr i = 0; i < n_ranges_; i++) {
     44     if (ranges_[i].beg <= address && address < ranges_[i].end)
     45       return true;
     46   }
     47   return false;
     48 }
     49 
     50 // Extracts the prefix of "str" that consists of any characters not
     51 // present in "delims" string, and copies this prefix to "result", allocating
     52 // space for it.
     53 // Returns a pointer to "str" after skipping extracted prefix and first
     54 // delimiter char.
     55 static const char *ExtractToken(const char *str, const char *delims,
     56                                 char **result) {
     57   uptr prefix_len = internal_strcspn(str, delims);
     58   *result = (char*)InternalAlloc(prefix_len + 1);
     59   internal_memcpy(*result, str, prefix_len);
     60   (*result)[prefix_len] = '\0';
     61   const char *prefix_end = str + prefix_len;
     62   if (*prefix_end != '\0') prefix_end++;
     63   return prefix_end;
     64 }
     65 
     66 // Same as ExtractToken, but converts extracted token to integer.
     67 static const char *ExtractInt(const char *str, const char *delims,
     68                               int *result) {
     69   char *buff;
     70   const char *ret = ExtractToken(str, delims, &buff);
     71   if (buff != 0) {
     72     *result = (int)internal_atoll(buff);
     73   }
     74   InternalFree(buff);
     75   return ret;
     76 }
     77 
     78 static const char *ExtractUptr(const char *str, const char *delims,
     79                                uptr *result) {
     80   char *buff;
     81   const char *ret = ExtractToken(str, delims, &buff);
     82   if (buff != 0) {
     83     *result = (uptr)internal_atoll(buff);
     84   }
     85   InternalFree(buff);
     86   return ret;
     87 }
     88 
     89 // ExternalSymbolizer encapsulates communication between the tool and
     90 // external symbolizer program, running in a different subprocess,
     91 // For now we assume the following protocol:
     92 // For each request of the form
     93 //   <module_name> <module_offset>
     94 // passed to STDIN, external symbolizer prints to STDOUT response:
     95 //   <function_name>
     96 //   <file_name>:<line_number>:<column_number>
     97 //   <function_name>
     98 //   <file_name>:<line_number>:<column_number>
     99 //   ...
    100 //   <empty line>
    101 class ExternalSymbolizer {
    102  public:
    103   ExternalSymbolizer(const char *path, int input_fd, int output_fd)
    104       : path_(path),
    105         input_fd_(input_fd),
    106         output_fd_(output_fd),
    107         times_restarted_(0) {
    108     CHECK(path_);
    109     CHECK_NE(input_fd_, kInvalidFd);
    110     CHECK_NE(output_fd_, kInvalidFd);
    111   }
    112 
    113   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
    114     CHECK(module_name);
    115     internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
    116                       is_data ? "DATA " : "", module_name, module_offset);
    117     if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
    118       return 0;
    119     if (!readFromSymbolizer(buffer_, kBufferSize))
    120       return 0;
    121     return buffer_;
    122   }
    123 
    124   bool Restart() {
    125     if (times_restarted_ >= kMaxTimesRestarted) return false;
    126     times_restarted_++;
    127     internal_close(input_fd_);
    128     internal_close(output_fd_);
    129     return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
    130   }
    131 
    132   void Flush() {
    133   }
    134 
    135  private:
    136   bool readFromSymbolizer(char *buffer, uptr max_length) {
    137     if (max_length == 0)
    138       return true;
    139     uptr read_len = 0;
    140     while (true) {
    141       uptr just_read = internal_read(input_fd_, buffer + read_len,
    142                                      max_length - read_len);
    143       // We can't read 0 bytes, as we don't expect external symbolizer to close
    144       // its stdout.
    145       if (just_read == 0 || just_read == (uptr)-1) {
    146         Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
    147         return false;
    148       }
    149       read_len += just_read;
    150       // Empty line marks the end of symbolizer output.
    151       if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
    152                            buffer[read_len - 2] == '\n') {
    153         break;
    154       }
    155     }
    156     return true;
    157   }
    158 
    159   bool writeToSymbolizer(const char *buffer, uptr length) {
    160     if (length == 0)
    161       return true;
    162     uptr write_len = internal_write(output_fd_, buffer, length);
    163     if (write_len == 0 || write_len == (uptr)-1) {
    164       Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
    165       return false;
    166     }
    167     return true;
    168   }
    169 
    170   const char *path_;
    171   int input_fd_;
    172   int output_fd_;
    173 
    174   static const uptr kBufferSize = 16 * 1024;
    175   char buffer_[kBufferSize];
    176 
    177   static const uptr kMaxTimesRestarted = 5;
    178   uptr times_restarted_;
    179 };
    180 
    181 static LowLevelAllocator symbolizer_allocator;  // Linker initialized.
    182 
    183 #if SANITIZER_SUPPORTS_WEAK_HOOKS
    184 extern "C" {
    185 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
    186 bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
    187                                 char *Buffer, int MaxLength);
    188 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
    189 bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
    190                                 char *Buffer, int MaxLength);
    191 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
    192 void __sanitizer_symbolize_flush();
    193 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
    194 int __sanitizer_symbolize_demangle(const char *Name, char *Buffer,
    195                                    int MaxLength);
    196 }  // extern "C"
    197 
    198 class InternalSymbolizer {
    199  public:
    200   typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
    201 
    202   static InternalSymbolizer *get() {
    203     if (__sanitizer_symbolize_code != 0 &&
    204         __sanitizer_symbolize_data != 0) {
    205       void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
    206       return new(mem) InternalSymbolizer();
    207     }
    208     return 0;
    209   }
    210 
    211   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
    212     SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
    213                                                 : __sanitizer_symbolize_code;
    214     if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
    215       return buffer_;
    216     return 0;
    217   }
    218 
    219   void Flush() {
    220     if (__sanitizer_symbolize_flush)
    221       __sanitizer_symbolize_flush();
    222   }
    223 
    224   const char *Demangle(const char *name) {
    225     if (__sanitizer_symbolize_demangle) {
    226       for (uptr res_length = 1024;
    227            res_length <= InternalSizeClassMap::kMaxSize;) {
    228         char *res_buff = static_cast<char*>(InternalAlloc(res_length));
    229         uptr req_length =
    230             __sanitizer_symbolize_demangle(name, res_buff, res_length);
    231         if (req_length > res_length) {
    232           res_length = req_length + 1;
    233           InternalFree(res_buff);
    234           continue;
    235         }
    236         return res_buff;
    237       }
    238     }
    239     return name;
    240   }
    241 
    242  private:
    243   InternalSymbolizer() { }
    244 
    245   static const int kBufferSize = 16 * 1024;
    246   static const int kMaxDemangledNameSize = 1024;
    247   char buffer_[kBufferSize];
    248 };
    249 #else  // SANITIZER_SUPPORTS_WEAK_HOOKS
    250 
    251 class InternalSymbolizer {
    252  public:
    253   static InternalSymbolizer *get() { return 0; }
    254   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
    255     return 0;
    256   }
    257   void Flush() { }
    258   const char *Demangle(const char *name) { return name; }
    259 };
    260 
    261 #endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
    262 
    263 class Symbolizer {
    264   // This class has no constructor, as global constructors are forbidden in
    265   // sanitizer_common. It should be linker initialized instead.
    266  public:
    267   uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
    268     if (max_frames == 0)
    269       return 0;
    270     LoadedModule *module = FindModuleForAddress(addr);
    271     if (module == 0)
    272       return 0;
    273     const char *module_name = module->full_name();
    274     uptr module_offset = addr - module->base_address();
    275     const char *str = SendCommand(false, module_name, module_offset);
    276     if (str == 0) {
    277       // External symbolizer was not initialized or failed. Fill only data
    278       // about module name and offset.
    279       AddressInfo *info = &frames[0];
    280       info->Clear();
    281       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
    282       return 1;
    283     }
    284     uptr frame_id = 0;
    285     for (frame_id = 0; frame_id < max_frames; frame_id++) {
    286       AddressInfo *info = &frames[frame_id];
    287       char *function_name = 0;
    288       str = ExtractToken(str, "\n", &function_name);
    289       CHECK(function_name);
    290       if (function_name[0] == '\0') {
    291         // There are no more frames.
    292         break;
    293       }
    294       info->Clear();
    295       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
    296       info->function = function_name;
    297       // Parse <file>:<line>:<column> buffer.
    298       char *file_line_info = 0;
    299       str = ExtractToken(str, "\n", &file_line_info);
    300       CHECK(file_line_info);
    301       const char *line_info = ExtractToken(file_line_info, ":", &info->file);
    302       line_info = ExtractInt(line_info, ":", &info->line);
    303       line_info = ExtractInt(line_info, "", &info->column);
    304       InternalFree(file_line_info);
    305 
    306       // Functions and filenames can be "??", in which case we write 0
    307       // to address info to mark that names are unknown.
    308       if (0 == internal_strcmp(info->function, "??")) {
    309         InternalFree(info->function);
    310         info->function = 0;
    311       }
    312       if (0 == internal_strcmp(info->file, "??")) {
    313         InternalFree(info->file);
    314         info->file = 0;
    315       }
    316     }
    317     if (frame_id == 0) {
    318       // Make sure we return at least one frame.
    319       AddressInfo *info = &frames[0];
    320       info->Clear();
    321       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
    322       frame_id = 1;
    323     }
    324     return frame_id;
    325   }
    326 
    327   bool SymbolizeData(uptr addr, DataInfo *info) {
    328     LoadedModule *module = FindModuleForAddress(addr);
    329     if (module == 0)
    330       return false;
    331     const char *module_name = module->full_name();
    332     uptr module_offset = addr - module->base_address();
    333     internal_memset(info, 0, sizeof(*info));
    334     info->address = addr;
    335     info->module = internal_strdup(module_name);
    336     info->module_offset = module_offset;
    337     const char *str = SendCommand(true, module_name, module_offset);
    338     if (str == 0)
    339       return true;
    340     str = ExtractToken(str, "\n", &info->name);
    341     str = ExtractUptr(str, " ", &info->start);
    342     str = ExtractUptr(str, "\n", &info->size);
    343     info->start += module->base_address();
    344     return true;
    345   }
    346 
    347   bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
    348     int input_fd, output_fd;
    349     if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
    350       return false;
    351     void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
    352     external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
    353                                                        input_fd, output_fd);
    354     return true;
    355   }
    356 
    357   bool IsSymbolizerAvailable() {
    358     if (internal_symbolizer_ == 0)
    359       internal_symbolizer_ = InternalSymbolizer::get();
    360     return internal_symbolizer_ || external_symbolizer_;
    361   }
    362 
    363   void Flush() {
    364     if (internal_symbolizer_)
    365       internal_symbolizer_->Flush();
    366     if (external_symbolizer_)
    367       external_symbolizer_->Flush();
    368   }
    369 
    370   const char *Demangle(const char *name) {
    371     if (IsSymbolizerAvailable() && internal_symbolizer_ != 0)
    372       return internal_symbolizer_->Demangle(name);
    373     return DemangleCXXABI(name);
    374   }
    375 
    376  private:
    377   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
    378     // First, try to use internal symbolizer.
    379     if (!IsSymbolizerAvailable()) {
    380       return 0;
    381     }
    382     if (internal_symbolizer_) {
    383       return internal_symbolizer_->SendCommand(is_data, module_name,
    384                                                module_offset);
    385     }
    386     // Otherwise, fall back to external symbolizer.
    387     if (external_symbolizer_ == 0) {
    388       ReportExternalSymbolizerError(
    389           "WARNING: Trying to symbolize code, but external "
    390           "symbolizer is not initialized!\n");
    391       return 0;
    392     }
    393     for (;;) {
    394       char *reply = external_symbolizer_->SendCommand(is_data, module_name,
    395           module_offset);
    396       if (reply)
    397         return reply;
    398       // Try to restart symbolizer subprocess. If we don't succeed, forget
    399       // about it and don't try to use it later.
    400       if (!external_symbolizer_->Restart()) {
    401         ReportExternalSymbolizerError(
    402             "WARNING: Failed to use and restart external symbolizer!\n");
    403         external_symbolizer_ = 0;
    404         return 0;
    405       }
    406     }
    407   }
    408 
    409   LoadedModule *FindModuleForAddress(uptr address) {
    410     bool modules_were_reloaded = false;
    411     if (modules_ == 0 || !modules_fresh_) {
    412       modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
    413           kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
    414       CHECK(modules_);
    415       n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts,
    416                                     /* filter */ 0);
    417       // FIXME: Return this check when GetListOfModules is implemented on Mac.
    418       // CHECK_GT(n_modules_, 0);
    419       CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
    420       modules_fresh_ = true;
    421       modules_were_reloaded = true;
    422     }
    423     for (uptr i = 0; i < n_modules_; i++) {
    424       if (modules_[i].containsAddress(address)) {
    425         return &modules_[i];
    426       }
    427     }
    428     // Reload the modules and look up again, if we haven't tried it yet.
    429     if (!modules_were_reloaded) {
    430       // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
    431       // It's too aggressive to reload the list of modules each time we fail
    432       // to find a module for a given address.
    433       modules_fresh_ = false;
    434       return FindModuleForAddress(address);
    435     }
    436     return 0;
    437   }
    438 
    439   void ReportExternalSymbolizerError(const char *msg) {
    440     // Don't use atomics here for now, as SymbolizeCode can't be called
    441     // from multiple threads anyway.
    442     static bool reported;
    443     if (!reported) {
    444       Report(msg);
    445       reported = true;
    446     }
    447   }
    448 
    449   // 16K loaded modules should be enough for everyone.
    450   static const uptr kMaxNumberOfModuleContexts = 1 << 14;
    451   LoadedModule *modules_;  // Array of module descriptions is leaked.
    452   uptr n_modules_;
    453   // If stale, need to reload the modules before looking up addresses.
    454   bool modules_fresh_;
    455 
    456   ExternalSymbolizer *external_symbolizer_;  // Leaked.
    457   InternalSymbolizer *internal_symbolizer_;  // Leaked.
    458 };
    459 
    460 static Symbolizer symbolizer;  // Linker initialized.
    461 
    462 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
    463   return symbolizer.SymbolizeCode(address, frames, max_frames);
    464 }
    465 
    466 bool SymbolizeData(uptr address, DataInfo *info) {
    467   return symbolizer.SymbolizeData(address, info);
    468 }
    469 
    470 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
    471   return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
    472 }
    473 
    474 bool IsSymbolizerAvailable() {
    475   return symbolizer.IsSymbolizerAvailable();
    476 }
    477 
    478 void FlushSymbolizer() {
    479   symbolizer.Flush();
    480 }
    481 
    482 const char *Demangle(const char *name) {
    483   return symbolizer.Demangle(name);
    484 }
    485 
    486 }  // namespace __sanitizer
    487