Home | History | Annotate | Download | only in sanitizer_common
      1 //===-- sanitizer_symbolizer.cc -------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is shared between AddressSanitizer and ThreadSanitizer
     11 // run-time libraries. See sanitizer_symbolizer.h for details.
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "sanitizer_common.h"
     15 #include "sanitizer_placement_new.h"
     16 #include "sanitizer_procmaps.h"
     17 #include "sanitizer_symbolizer.h"
     18 
     19 namespace __sanitizer {
     20 
     21 void AddressInfo::Clear() {
     22   InternalFree(module);
     23   InternalFree(function);
     24   InternalFree(file);
     25   internal_memset(this, 0, sizeof(AddressInfo));
     26 }
     27 
     28 LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
     29   full_name_ = internal_strdup(module_name);
     30   base_address_ = base_address;
     31   n_ranges_ = 0;
     32 }
     33 
     34 void LoadedModule::addAddressRange(uptr beg, uptr end) {
     35   CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
     36   ranges_[n_ranges_].beg = beg;
     37   ranges_[n_ranges_].end = end;
     38   n_ranges_++;
     39 }
     40 
     41 bool LoadedModule::containsAddress(uptr address) const {
     42   for (uptr i = 0; i < n_ranges_; i++) {
     43     if (ranges_[i].beg <= address && address < ranges_[i].end)
     44       return true;
     45   }
     46   return false;
     47 }
     48 
     49 // Extracts the prefix of "str" that consists of any characters not
     50 // present in "delims" string, and copies this prefix to "result", allocating
     51 // space for it.
     52 // Returns a pointer to "str" after skipping extracted prefix and first
     53 // delimiter char.
     54 static const char *ExtractToken(const char *str, const char *delims,
     55                                 char **result) {
     56   uptr prefix_len = internal_strcspn(str, delims);
     57   *result = (char*)InternalAlloc(prefix_len + 1);
     58   internal_memcpy(*result, str, prefix_len);
     59   (*result)[prefix_len] = '\0';
     60   const char *prefix_end = str + prefix_len;
     61   if (*prefix_end != '\0') prefix_end++;
     62   return prefix_end;
     63 }
     64 
     65 // Same as ExtractToken, but converts extracted token to integer.
     66 static const char *ExtractInt(const char *str, const char *delims,
     67                               int *result) {
     68   char *buff;
     69   const char *ret = ExtractToken(str, delims, &buff);
     70   if (buff != 0) {
     71     *result = internal_atoll(buff);
     72   }
     73   InternalFree(buff);
     74   return ret;
     75 }
     76 
     77 // ExternalSymbolizer encapsulates communication between the tool and
     78 // external symbolizer program, running in a different subprocess,
     79 // For now we assume the following protocol:
     80 // For each request of the form
     81 //   <module_name> <module_offset>
     82 // passed to STDIN, external symbolizer prints to STDOUT response:
     83 //   <function_name>
     84 //   <file_name>:<line_number>:<column_number>
     85 //   <function_name>
     86 //   <file_name>:<line_number>:<column_number>
     87 //   ...
     88 //   <empty line>
     89 class ExternalSymbolizer {
     90  public:
     91   ExternalSymbolizer(const char *path, int input_fd, int output_fd)
     92       : path_(path),
     93         input_fd_(input_fd),
     94         output_fd_(output_fd),
     95         times_restarted_(0) {
     96     CHECK(path_);
     97     CHECK_NE(input_fd_, kInvalidFd);
     98     CHECK_NE(output_fd_, kInvalidFd);
     99   }
    100 
    101   // Returns the number of frames for a given address, or zero if
    102   // symbolization failed.
    103   uptr SymbolizeCode(uptr addr, const char *module_name, uptr module_offset,
    104                      AddressInfo *frames, uptr max_frames) {
    105     CHECK(module_name);
    106     // FIXME: Make sure this buffer always has sufficient size to hold
    107     // large debug info.
    108     static const int kMaxBufferSize = 4096;
    109     InternalScopedBuffer<char> buffer(kMaxBufferSize);
    110     char *buffer_data = buffer.data();
    111     internal_snprintf(buffer_data, kMaxBufferSize, "%s 0x%zx\n",
    112                       module_name, module_offset);
    113     if (!writeToSymbolizer(buffer_data, internal_strlen(buffer_data)))
    114       return 0;
    115 
    116     if (!readFromSymbolizer(buffer_data, kMaxBufferSize))
    117       return 0;
    118     const char *str = buffer_data;
    119     uptr frame_id;
    120     CHECK_GT(max_frames, 0);
    121     for (frame_id = 0; frame_id < max_frames; frame_id++) {
    122       AddressInfo *info = &frames[frame_id];
    123       char *function_name = 0;
    124       str = ExtractToken(str, "\n", &function_name);
    125       CHECK(function_name);
    126       if (function_name[0] == '\0') {
    127         // There are no more frames.
    128         break;
    129       }
    130       info->Clear();
    131       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
    132       info->function = function_name;
    133       // Parse <file>:<line>:<column> buffer.
    134       char *file_line_info = 0;
    135       str = ExtractToken(str, "\n", &file_line_info);
    136       CHECK(file_line_info);
    137       const char *line_info = ExtractToken(file_line_info, ":", &info->file);
    138       line_info = ExtractInt(line_info, ":", &info->line);
    139       line_info = ExtractInt(line_info, "", &info->column);
    140       InternalFree(file_line_info);
    141 
    142       // Functions and filenames can be "??", in which case we write 0
    143       // to address info to mark that names are unknown.
    144       if (0 == internal_strcmp(info->function, "??")) {
    145         InternalFree(info->function);
    146         info->function = 0;
    147       }
    148       if (0 == internal_strcmp(info->file, "??")) {
    149         InternalFree(info->file);
    150         info->file = 0;
    151       }
    152     }
    153     if (frame_id == 0) {
    154       // Make sure we return at least one frame.
    155       AddressInfo *info = &frames[0];
    156       info->Clear();
    157       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
    158       frame_id = 1;
    159     }
    160     return frame_id;
    161   }
    162 
    163   bool Restart() {
    164     if (times_restarted_ >= kMaxTimesRestarted) return false;
    165     times_restarted_++;
    166     internal_close(input_fd_);
    167     internal_close(output_fd_);
    168     return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
    169   }
    170 
    171  private:
    172   bool readFromSymbolizer(char *buffer, uptr max_length) {
    173     if (max_length == 0)
    174       return true;
    175     uptr read_len = 0;
    176     while (true) {
    177       uptr just_read = internal_read(input_fd_, buffer + read_len,
    178                                      max_length - read_len);
    179       // We can't read 0 bytes, as we don't expect external symbolizer to close
    180       // its stdout.
    181       if (just_read == 0 || just_read == (uptr)-1) {
    182         Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
    183         return false;
    184       }
    185       read_len += just_read;
    186       // Empty line marks the end of symbolizer output.
    187       if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
    188                            buffer[read_len - 2] == '\n') {
    189         break;
    190       }
    191     }
    192     return true;
    193   }
    194   bool writeToSymbolizer(const char *buffer, uptr length) {
    195     if (length == 0)
    196       return true;
    197     uptr write_len = internal_write(output_fd_, buffer, length);
    198     if (write_len == 0 || write_len == (uptr)-1) {
    199       Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
    200       return false;
    201     }
    202     return true;
    203   }
    204 
    205   const char *path_;
    206   int input_fd_;
    207   int output_fd_;
    208 
    209   static const uptr kMaxTimesRestarted = 5;
    210   uptr times_restarted_;
    211 };
    212 
    213 static LowLevelAllocator symbolizer_allocator;  // Linker initialized.
    214 
    215 class Symbolizer {
    216  public:
    217   uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
    218     if (max_frames == 0)
    219       return 0;
    220     LoadedModule *module = FindModuleForAddress(addr);
    221     if (module == 0)
    222       return 0;
    223     const char *module_name = module->full_name();
    224     uptr module_offset = addr - module->base_address();
    225     uptr actual_frames = 0;
    226     if (external_symbolizer_ == 0) {
    227       ReportExternalSymbolizerError(
    228           "WARNING: Trying to symbolize code, but external "
    229           "symbolizer is not initialized!\n");
    230     } else {
    231       while (true) {
    232         actual_frames = external_symbolizer_->SymbolizeCode(
    233             addr, module_name, module_offset, frames, max_frames);
    234         if (actual_frames > 0) {
    235           // Symbolization was successful.
    236           break;
    237         }
    238         // Try to restart symbolizer subprocess. If we don't succeed, forget
    239         // about it and don't try to use it later.
    240         if (!external_symbolizer_->Restart()) {
    241           ReportExternalSymbolizerError(
    242               "WARNING: Failed to use and restart external symbolizer!\n");
    243           external_symbolizer_ = 0;
    244           break;
    245         }
    246       }
    247     }
    248     if (external_symbolizer_ == 0) {
    249       // External symbolizer was not initialized or failed. Fill only data
    250       // about module name and offset.
    251       AddressInfo *info = &frames[0];
    252       info->Clear();
    253       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
    254       return 1;
    255     }
    256     // Otherwise, the data was filled by external symbolizer.
    257     return actual_frames;
    258   }
    259   bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
    260     int input_fd, output_fd;
    261     if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
    262       return false;
    263     void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
    264     external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
    265                                                        input_fd, output_fd);
    266     return true;
    267   }
    268 
    269  private:
    270   LoadedModule *FindModuleForAddress(uptr address) {
    271     if (modules_ == 0) {
    272       modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
    273           kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
    274       CHECK(modules_);
    275       n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts);
    276       CHECK_GT(n_modules_, 0);
    277       CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
    278     }
    279     for (uptr i = 0; i < n_modules_; i++) {
    280       if (modules_[i].containsAddress(address)) {
    281         return &modules_[i];
    282       }
    283     }
    284     return 0;
    285   }
    286   void ReportExternalSymbolizerError(const char *msg) {
    287     // Don't use atomics here for now, as SymbolizeCode can't be called
    288     // from multiple threads anyway.
    289     static bool reported;
    290     if (!reported) {
    291       Report(msg);
    292       reported = true;
    293     }
    294   }
    295 
    296   static const uptr kMaxNumberOfModuleContexts = 4096;
    297   LoadedModule *modules_;  // Array of module descriptions is leaked.
    298   uptr n_modules_;
    299 
    300   ExternalSymbolizer *external_symbolizer_;  // Leaked.
    301 };
    302 
    303 static Symbolizer symbolizer;  // Linker initialized.
    304 
    305 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
    306   return symbolizer.SymbolizeCode(address, frames, max_frames);
    307 }
    308 
    309 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
    310   return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
    311 }
    312 
    313 }  // namespace __sanitizer
    314