Home | History | Annotate | Download | only in sanitizer_common
      1 //===-- sanitizer_symbolizer.cc -------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is shared between AddressSanitizer and ThreadSanitizer
     11 // run-time libraries. See sanitizer_symbolizer.h for details.
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "sanitizer_common.h"
     15 #include "sanitizer_placement_new.h"
     16 #include "sanitizer_procmaps.h"
     17 #include "sanitizer_symbolizer.h"
     18 
     19 namespace __sanitizer {
     20 
     21 void AddressInfo::Clear() {
     22   InternalFree(module);
     23   InternalFree(function);
     24   InternalFree(file);
     25   internal_memset(this, 0, sizeof(AddressInfo));
     26 }
     27 
     28 LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
     29   full_name_ = internal_strdup(module_name);
     30   base_address_ = base_address;
     31   n_ranges_ = 0;
     32 }
     33 
     34 void LoadedModule::addAddressRange(uptr beg, uptr end) {
     35   CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
     36   ranges_[n_ranges_].beg = beg;
     37   ranges_[n_ranges_].end = end;
     38   n_ranges_++;
     39 }
     40 
     41 bool LoadedModule::containsAddress(uptr address) const {
     42   for (uptr i = 0; i < n_ranges_; i++) {
     43     if (ranges_[i].beg <= address && address < ranges_[i].end)
     44       return true;
     45   }
     46   return false;
     47 }
     48 
     49 // Extracts the prefix of "str" that consists of any characters not
     50 // present in "delims" string, and copies this prefix to "result", allocating
     51 // space for it.
     52 // Returns a pointer to "str" after skipping extracted prefix and first
     53 // delimiter char.
     54 static const char *ExtractToken(const char *str, const char *delims,
     55                                 char **result) {
     56   uptr prefix_len = internal_strcspn(str, delims);
     57   *result = (char*)InternalAlloc(prefix_len + 1);
     58   internal_memcpy(*result, str, prefix_len);
     59   (*result)[prefix_len] = '\0';
     60   const char *prefix_end = str + prefix_len;
     61   if (*prefix_end != '\0') prefix_end++;
     62   return prefix_end;
     63 }
     64 
     65 // Same as ExtractToken, but converts extracted token to integer.
     66 static const char *ExtractInt(const char *str, const char *delims,
     67                               int *result) {
     68   char *buff;
     69   const char *ret = ExtractToken(str, delims, &buff);
     70   if (buff != 0) {
     71     *result = (int)internal_atoll(buff);
     72   }
     73   InternalFree(buff);
     74   return ret;
     75 }
     76 
     77 static const char *ExtractUptr(const char *str, const char *delims,
     78                                uptr *result) {
     79   char *buff;
     80   const char *ret = ExtractToken(str, delims, &buff);
     81   if (buff != 0) {
     82     *result = (uptr)internal_atoll(buff);
     83   }
     84   InternalFree(buff);
     85   return ret;
     86 }
     87 
     88 // ExternalSymbolizer encapsulates communication between the tool and
     89 // external symbolizer program, running in a different subprocess,
     90 // For now we assume the following protocol:
     91 // For each request of the form
     92 //   <module_name> <module_offset>
     93 // passed to STDIN, external symbolizer prints to STDOUT response:
     94 //   <function_name>
     95 //   <file_name>:<line_number>:<column_number>
     96 //   <function_name>
     97 //   <file_name>:<line_number>:<column_number>
     98 //   ...
     99 //   <empty line>
    100 class ExternalSymbolizer {
    101  public:
    102   ExternalSymbolizer(const char *path, int input_fd, int output_fd)
    103       : path_(path),
    104         input_fd_(input_fd),
    105         output_fd_(output_fd),
    106         times_restarted_(0) {
    107     CHECK(path_);
    108     CHECK_NE(input_fd_, kInvalidFd);
    109     CHECK_NE(output_fd_, kInvalidFd);
    110   }
    111 
    112   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
    113     CHECK(module_name);
    114     internal_snprintf(buffer_, kBufferSize, "%s%s 0x%zx\n",
    115                       is_data ? "DATA " : "", module_name, module_offset);
    116     if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
    117       return 0;
    118     if (!readFromSymbolizer(buffer_, kBufferSize))
    119       return 0;
    120     return buffer_;
    121   }
    122 
    123   bool Restart() {
    124     if (times_restarted_ >= kMaxTimesRestarted) return false;
    125     times_restarted_++;
    126     internal_close(input_fd_);
    127     internal_close(output_fd_);
    128     return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
    129   }
    130 
    131  private:
    132   bool readFromSymbolizer(char *buffer, uptr max_length) {
    133     if (max_length == 0)
    134       return true;
    135     uptr read_len = 0;
    136     while (true) {
    137       uptr just_read = internal_read(input_fd_, buffer + read_len,
    138                                      max_length - read_len);
    139       // We can't read 0 bytes, as we don't expect external symbolizer to close
    140       // its stdout.
    141       if (just_read == 0 || just_read == (uptr)-1) {
    142         Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
    143         return false;
    144       }
    145       read_len += just_read;
    146       // Empty line marks the end of symbolizer output.
    147       if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
    148                            buffer[read_len - 2] == '\n') {
    149         break;
    150       }
    151     }
    152     return true;
    153   }
    154 
    155   bool writeToSymbolizer(const char *buffer, uptr length) {
    156     if (length == 0)
    157       return true;
    158     uptr write_len = internal_write(output_fd_, buffer, length);
    159     if (write_len == 0 || write_len == (uptr)-1) {
    160       Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
    161       return false;
    162     }
    163     return true;
    164   }
    165 
    166   const char *path_;
    167   int input_fd_;
    168   int output_fd_;
    169 
    170   static const uptr kBufferSize = 16 * 1024;
    171   char buffer_[kBufferSize];
    172 
    173   static const uptr kMaxTimesRestarted = 5;
    174   uptr times_restarted_;
    175 };
    176 
    177 static LowLevelAllocator symbolizer_allocator;  // Linker initialized.
    178 
    179 #if SANITIZER_SUPPORTS_WEAK_HOOKS
    180 extern "C" {
    181 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
    182 bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
    183                                 char *Buffer, int MaxLength);
    184 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
    185 bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
    186                                 char *Buffer, int MaxLength);
    187 }  // extern "C"
    188 
    189 class InternalSymbolizer {
    190  public:
    191   typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
    192   static InternalSymbolizer *get() {
    193     if (__sanitizer_symbolize_code != 0 &&
    194         __sanitizer_symbolize_data != 0) {
    195       void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
    196       return new(mem) InternalSymbolizer();
    197     }
    198     return 0;
    199   }
    200   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
    201     SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
    202                                                 : __sanitizer_symbolize_code;
    203     if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
    204       return buffer_;
    205     return 0;
    206   }
    207 
    208  private:
    209   InternalSymbolizer() { }
    210 
    211   static const int kBufferSize = 16 * 1024;
    212   char buffer_[kBufferSize];
    213 };
    214 #else  // SANITIZER_SUPPORTS_WEAK_HOOKS
    215 
    216 class InternalSymbolizer {
    217  public:
    218   static InternalSymbolizer *get() { return 0; }
    219   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
    220     return 0;
    221   }
    222 };
    223 
    224 #endif  // SANITIZER_SUPPORTS_WEAK_HOOKS
    225 
    226 class Symbolizer {
    227  public:
    228   uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
    229     if (max_frames == 0)
    230       return 0;
    231     LoadedModule *module = FindModuleForAddress(addr);
    232     if (module == 0)
    233       return 0;
    234     const char *module_name = module->full_name();
    235     uptr module_offset = addr - module->base_address();
    236     const char *str = SendCommand(false, module_name, module_offset);
    237     if (str == 0) {
    238       // External symbolizer was not initialized or failed. Fill only data
    239       // about module name and offset.
    240       AddressInfo *info = &frames[0];
    241       info->Clear();
    242       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
    243       return 1;
    244     }
    245     uptr frame_id = 0;
    246     for (frame_id = 0; frame_id < max_frames; frame_id++) {
    247       AddressInfo *info = &frames[frame_id];
    248       char *function_name = 0;
    249       str = ExtractToken(str, "\n", &function_name);
    250       CHECK(function_name);
    251       if (function_name[0] == '\0') {
    252         // There are no more frames.
    253         break;
    254       }
    255       info->Clear();
    256       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
    257       info->function = function_name;
    258       // Parse <file>:<line>:<column> buffer.
    259       char *file_line_info = 0;
    260       str = ExtractToken(str, "\n", &file_line_info);
    261       CHECK(file_line_info);
    262       const char *line_info = ExtractToken(file_line_info, ":", &info->file);
    263       line_info = ExtractInt(line_info, ":", &info->line);
    264       line_info = ExtractInt(line_info, "", &info->column);
    265       InternalFree(file_line_info);
    266 
    267       // Functions and filenames can be "??", in which case we write 0
    268       // to address info to mark that names are unknown.
    269       if (0 == internal_strcmp(info->function, "??")) {
    270         InternalFree(info->function);
    271         info->function = 0;
    272       }
    273       if (0 == internal_strcmp(info->file, "??")) {
    274         InternalFree(info->file);
    275         info->file = 0;
    276       }
    277     }
    278     if (frame_id == 0) {
    279       // Make sure we return at least one frame.
    280       AddressInfo *info = &frames[0];
    281       info->Clear();
    282       info->FillAddressAndModuleInfo(addr, module_name, module_offset);
    283       frame_id = 1;
    284     }
    285     return frame_id;
    286   }
    287 
    288   bool SymbolizeData(uptr addr, DataInfo *info) {
    289     LoadedModule *module = FindModuleForAddress(addr);
    290     if (module == 0)
    291       return false;
    292     const char *module_name = module->full_name();
    293     uptr module_offset = addr - module->base_address();
    294     internal_memset(info, 0, sizeof(*info));
    295     info->address = addr;
    296     info->module = internal_strdup(module_name);
    297     info->module_offset = module_offset;
    298     const char *str = SendCommand(true, module_name, module_offset);
    299     if (str == 0)
    300       return true;
    301     str = ExtractToken(str, "\n", &info->name);
    302     str = ExtractUptr(str, " ", &info->start);
    303     str = ExtractUptr(str, "\n", &info->size);
    304     info->start += module->base_address();
    305     return true;
    306   }
    307 
    308   bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
    309     int input_fd, output_fd;
    310     if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
    311       return false;
    312     void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
    313     external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
    314                                                        input_fd, output_fd);
    315     return true;
    316   }
    317 
    318   bool IsSymbolizerAvailable() {
    319     if (internal_symbolizer_ == 0)
    320       internal_symbolizer_ = InternalSymbolizer::get();
    321     return internal_symbolizer_ || external_symbolizer_;
    322   }
    323 
    324  private:
    325   char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
    326     // First, try to use internal symbolizer.
    327     if (!IsSymbolizerAvailable()) {
    328       return 0;
    329     }
    330     if (internal_symbolizer_) {
    331       return internal_symbolizer_->SendCommand(is_data, module_name,
    332                                                module_offset);
    333     }
    334     // Otherwise, fall back to external symbolizer.
    335     if (external_symbolizer_ == 0) {
    336       ReportExternalSymbolizerError(
    337           "WARNING: Trying to symbolize code, but external "
    338           "symbolizer is not initialized!\n");
    339       return 0;
    340     }
    341     for (;;) {
    342       char *reply = external_symbolizer_->SendCommand(is_data, module_name,
    343           module_offset);
    344       if (reply)
    345         return reply;
    346       // Try to restart symbolizer subprocess. If we don't succeed, forget
    347       // about it and don't try to use it later.
    348       if (!external_symbolizer_->Restart()) {
    349         ReportExternalSymbolizerError(
    350             "WARNING: Failed to use and restart external symbolizer!\n");
    351         external_symbolizer_ = 0;
    352         return 0;
    353       }
    354     }
    355   }
    356 
    357   LoadedModule *FindModuleForAddress(uptr address) {
    358     if (modules_ == 0) {
    359       modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
    360           kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
    361       CHECK(modules_);
    362       n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts);
    363       // FIXME: Return this check when GetListOfModules is implemented on Mac.
    364       // CHECK_GT(n_modules_, 0);
    365       CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
    366     }
    367     for (uptr i = 0; i < n_modules_; i++) {
    368       if (modules_[i].containsAddress(address)) {
    369         return &modules_[i];
    370       }
    371     }
    372     return 0;
    373   }
    374   void ReportExternalSymbolizerError(const char *msg) {
    375     // Don't use atomics here for now, as SymbolizeCode can't be called
    376     // from multiple threads anyway.
    377     static bool reported;
    378     if (!reported) {
    379       Report(msg);
    380       reported = true;
    381     }
    382   }
    383 
    384   // 16K loaded modules should be enough for everyone.
    385   static const uptr kMaxNumberOfModuleContexts = 1 << 14;
    386   LoadedModule *modules_;  // Array of module descriptions is leaked.
    387   uptr n_modules_;
    388 
    389   ExternalSymbolizer *external_symbolizer_;  // Leaked.
    390   InternalSymbolizer *internal_symbolizer_;  // Leaked.
    391 };
    392 
    393 static Symbolizer symbolizer;  // Linker initialized.
    394 
    395 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
    396   return symbolizer.SymbolizeCode(address, frames, max_frames);
    397 }
    398 
    399 bool SymbolizeData(uptr address, DataInfo *info) {
    400   return symbolizer.SymbolizeData(address, info);
    401 }
    402 
    403 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
    404   return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
    405 }
    406 
    407 bool IsSymbolizerAvailable() {
    408   return symbolizer.IsSymbolizerAvailable();
    409 }
    410 
    411 }  // namespace __sanitizer
    412