1 //===-- sanitizer_symbolizer.cc -------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is shared between AddressSanitizer and ThreadSanitizer 11 // run-time libraries. See sanitizer_symbolizer.h for details. 12 //===----------------------------------------------------------------------===// 13 14 #include "sanitizer_common.h" 15 #include "sanitizer_placement_new.h" 16 #include "sanitizer_procmaps.h" 17 #include "sanitizer_symbolizer.h" 18 19 namespace __sanitizer { 20 21 void AddressInfo::Clear() { 22 InternalFree(module); 23 InternalFree(function); 24 InternalFree(file); 25 internal_memset(this, 0, sizeof(AddressInfo)); 26 } 27 28 LoadedModule::LoadedModule(const char *module_name, uptr base_address) { 29 full_name_ = internal_strdup(module_name); 30 base_address_ = base_address; 31 n_ranges_ = 0; 32 } 33 34 void LoadedModule::addAddressRange(uptr beg, uptr end) { 35 CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges); 36 ranges_[n_ranges_].beg = beg; 37 ranges_[n_ranges_].end = end; 38 n_ranges_++; 39 } 40 41 bool LoadedModule::containsAddress(uptr address) const { 42 for (uptr i = 0; i < n_ranges_; i++) { 43 if (ranges_[i].beg <= address && address < ranges_[i].end) 44 return true; 45 } 46 return false; 47 } 48 49 // Extracts the prefix of "str" that consists of any characters not 50 // present in "delims" string, and copies this prefix to "result", allocating 51 // space for it. 52 // Returns a pointer to "str" after skipping extracted prefix and first 53 // delimiter char. 54 static const char *ExtractToken(const char *str, const char *delims, 55 char **result) { 56 uptr prefix_len = internal_strcspn(str, delims); 57 *result = (char*)InternalAlloc(prefix_len + 1); 58 internal_memcpy(*result, str, prefix_len); 59 (*result)[prefix_len] = '\0'; 60 const char *prefix_end = str + prefix_len; 61 if (*prefix_end != '\0') prefix_end++; 62 return prefix_end; 63 } 64 65 // Same as ExtractToken, but converts extracted token to integer. 66 static const char *ExtractInt(const char *str, const char *delims, 67 int *result) { 68 char *buff; 69 const char *ret = ExtractToken(str, delims, &buff); 70 if (buff != 0) { 71 *result = (int)internal_atoll(buff); 72 } 73 InternalFree(buff); 74 return ret; 75 } 76 77 static const char *ExtractUptr(const char *str, const char *delims, 78 uptr *result) { 79 char *buff; 80 const char *ret = ExtractToken(str, delims, &buff); 81 if (buff != 0) { 82 *result = (uptr)internal_atoll(buff); 83 } 84 InternalFree(buff); 85 return ret; 86 } 87 88 // ExternalSymbolizer encapsulates communication between the tool and 89 // external symbolizer program, running in a different subprocess, 90 // For now we assume the following protocol: 91 // For each request of the form 92 // <module_name> <module_offset> 93 // passed to STDIN, external symbolizer prints to STDOUT response: 94 // <function_name> 95 // <file_name>:<line_number>:<column_number> 96 // <function_name> 97 // <file_name>:<line_number>:<column_number> 98 // ... 99 // <empty line> 100 class ExternalSymbolizer { 101 public: 102 ExternalSymbolizer(const char *path, int input_fd, int output_fd) 103 : path_(path), 104 input_fd_(input_fd), 105 output_fd_(output_fd), 106 times_restarted_(0) { 107 CHECK(path_); 108 CHECK_NE(input_fd_, kInvalidFd); 109 CHECK_NE(output_fd_, kInvalidFd); 110 } 111 112 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 113 CHECK(module_name); 114 internal_snprintf(buffer_, kBufferSize, "%s%s 0x%zx\n", 115 is_data ? "DATA " : "", module_name, module_offset); 116 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_))) 117 return 0; 118 if (!readFromSymbolizer(buffer_, kBufferSize)) 119 return 0; 120 return buffer_; 121 } 122 123 bool Restart() { 124 if (times_restarted_ >= kMaxTimesRestarted) return false; 125 times_restarted_++; 126 internal_close(input_fd_); 127 internal_close(output_fd_); 128 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_); 129 } 130 131 private: 132 bool readFromSymbolizer(char *buffer, uptr max_length) { 133 if (max_length == 0) 134 return true; 135 uptr read_len = 0; 136 while (true) { 137 uptr just_read = internal_read(input_fd_, buffer + read_len, 138 max_length - read_len); 139 // We can't read 0 bytes, as we don't expect external symbolizer to close 140 // its stdout. 141 if (just_read == 0 || just_read == (uptr)-1) { 142 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 143 return false; 144 } 145 read_len += just_read; 146 // Empty line marks the end of symbolizer output. 147 if (read_len >= 2 && buffer[read_len - 1] == '\n' && 148 buffer[read_len - 2] == '\n') { 149 break; 150 } 151 } 152 return true; 153 } 154 155 bool writeToSymbolizer(const char *buffer, uptr length) { 156 if (length == 0) 157 return true; 158 uptr write_len = internal_write(output_fd_, buffer, length); 159 if (write_len == 0 || write_len == (uptr)-1) { 160 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 161 return false; 162 } 163 return true; 164 } 165 166 const char *path_; 167 int input_fd_; 168 int output_fd_; 169 170 static const uptr kBufferSize = 16 * 1024; 171 char buffer_[kBufferSize]; 172 173 static const uptr kMaxTimesRestarted = 5; 174 uptr times_restarted_; 175 }; 176 177 static LowLevelAllocator symbolizer_allocator; // Linker initialized. 178 179 #if SANITIZER_SUPPORTS_WEAK_HOOKS 180 extern "C" { 181 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 182 bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, 183 char *Buffer, int MaxLength); 184 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 185 bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, 186 char *Buffer, int MaxLength); 187 } // extern "C" 188 189 class InternalSymbolizer { 190 public: 191 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int); 192 static InternalSymbolizer *get() { 193 if (__sanitizer_symbolize_code != 0 && 194 __sanitizer_symbolize_data != 0) { 195 void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer)); 196 return new(mem) InternalSymbolizer(); 197 } 198 return 0; 199 } 200 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 201 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data 202 : __sanitizer_symbolize_code; 203 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize)) 204 return buffer_; 205 return 0; 206 } 207 208 private: 209 InternalSymbolizer() { } 210 211 static const int kBufferSize = 16 * 1024; 212 char buffer_[kBufferSize]; 213 }; 214 #else // SANITIZER_SUPPORTS_WEAK_HOOKS 215 216 class InternalSymbolizer { 217 public: 218 static InternalSymbolizer *get() { return 0; } 219 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 220 return 0; 221 } 222 }; 223 224 #endif // SANITIZER_SUPPORTS_WEAK_HOOKS 225 226 class Symbolizer { 227 public: 228 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) { 229 if (max_frames == 0) 230 return 0; 231 LoadedModule *module = FindModuleForAddress(addr); 232 if (module == 0) 233 return 0; 234 const char *module_name = module->full_name(); 235 uptr module_offset = addr - module->base_address(); 236 const char *str = SendCommand(false, module_name, module_offset); 237 if (str == 0) { 238 // External symbolizer was not initialized or failed. Fill only data 239 // about module name and offset. 240 AddressInfo *info = &frames[0]; 241 info->Clear(); 242 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 243 return 1; 244 } 245 uptr frame_id = 0; 246 for (frame_id = 0; frame_id < max_frames; frame_id++) { 247 AddressInfo *info = &frames[frame_id]; 248 char *function_name = 0; 249 str = ExtractToken(str, "\n", &function_name); 250 CHECK(function_name); 251 if (function_name[0] == '\0') { 252 // There are no more frames. 253 break; 254 } 255 info->Clear(); 256 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 257 info->function = function_name; 258 // Parse <file>:<line>:<column> buffer. 259 char *file_line_info = 0; 260 str = ExtractToken(str, "\n", &file_line_info); 261 CHECK(file_line_info); 262 const char *line_info = ExtractToken(file_line_info, ":", &info->file); 263 line_info = ExtractInt(line_info, ":", &info->line); 264 line_info = ExtractInt(line_info, "", &info->column); 265 InternalFree(file_line_info); 266 267 // Functions and filenames can be "??", in which case we write 0 268 // to address info to mark that names are unknown. 269 if (0 == internal_strcmp(info->function, "??")) { 270 InternalFree(info->function); 271 info->function = 0; 272 } 273 if (0 == internal_strcmp(info->file, "??")) { 274 InternalFree(info->file); 275 info->file = 0; 276 } 277 } 278 if (frame_id == 0) { 279 // Make sure we return at least one frame. 280 AddressInfo *info = &frames[0]; 281 info->Clear(); 282 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 283 frame_id = 1; 284 } 285 return frame_id; 286 } 287 288 bool SymbolizeData(uptr addr, DataInfo *info) { 289 LoadedModule *module = FindModuleForAddress(addr); 290 if (module == 0) 291 return false; 292 const char *module_name = module->full_name(); 293 uptr module_offset = addr - module->base_address(); 294 internal_memset(info, 0, sizeof(*info)); 295 info->address = addr; 296 info->module = internal_strdup(module_name); 297 info->module_offset = module_offset; 298 const char *str = SendCommand(true, module_name, module_offset); 299 if (str == 0) 300 return true; 301 str = ExtractToken(str, "\n", &info->name); 302 str = ExtractUptr(str, " ", &info->start); 303 str = ExtractUptr(str, "\n", &info->size); 304 info->start += module->base_address(); 305 return true; 306 } 307 308 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 309 int input_fd, output_fd; 310 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd)) 311 return false; 312 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer)); 313 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer, 314 input_fd, output_fd); 315 return true; 316 } 317 318 bool IsSymbolizerAvailable() { 319 if (internal_symbolizer_ == 0) 320 internal_symbolizer_ = InternalSymbolizer::get(); 321 return internal_symbolizer_ || external_symbolizer_; 322 } 323 324 private: 325 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 326 // First, try to use internal symbolizer. 327 if (!IsSymbolizerAvailable()) { 328 return 0; 329 } 330 if (internal_symbolizer_) { 331 return internal_symbolizer_->SendCommand(is_data, module_name, 332 module_offset); 333 } 334 // Otherwise, fall back to external symbolizer. 335 if (external_symbolizer_ == 0) { 336 ReportExternalSymbolizerError( 337 "WARNING: Trying to symbolize code, but external " 338 "symbolizer is not initialized!\n"); 339 return 0; 340 } 341 for (;;) { 342 char *reply = external_symbolizer_->SendCommand(is_data, module_name, 343 module_offset); 344 if (reply) 345 return reply; 346 // Try to restart symbolizer subprocess. If we don't succeed, forget 347 // about it and don't try to use it later. 348 if (!external_symbolizer_->Restart()) { 349 ReportExternalSymbolizerError( 350 "WARNING: Failed to use and restart external symbolizer!\n"); 351 external_symbolizer_ = 0; 352 return 0; 353 } 354 } 355 } 356 357 LoadedModule *FindModuleForAddress(uptr address) { 358 if (modules_ == 0) { 359 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate( 360 kMaxNumberOfModuleContexts * sizeof(LoadedModule))); 361 CHECK(modules_); 362 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts); 363 // FIXME: Return this check when GetListOfModules is implemented on Mac. 364 // CHECK_GT(n_modules_, 0); 365 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts); 366 } 367 for (uptr i = 0; i < n_modules_; i++) { 368 if (modules_[i].containsAddress(address)) { 369 return &modules_[i]; 370 } 371 } 372 return 0; 373 } 374 void ReportExternalSymbolizerError(const char *msg) { 375 // Don't use atomics here for now, as SymbolizeCode can't be called 376 // from multiple threads anyway. 377 static bool reported; 378 if (!reported) { 379 Report(msg); 380 reported = true; 381 } 382 } 383 384 // 16K loaded modules should be enough for everyone. 385 static const uptr kMaxNumberOfModuleContexts = 1 << 14; 386 LoadedModule *modules_; // Array of module descriptions is leaked. 387 uptr n_modules_; 388 389 ExternalSymbolizer *external_symbolizer_; // Leaked. 390 InternalSymbolizer *internal_symbolizer_; // Leaked. 391 }; 392 393 static Symbolizer symbolizer; // Linker initialized. 394 395 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) { 396 return symbolizer.SymbolizeCode(address, frames, max_frames); 397 } 398 399 bool SymbolizeData(uptr address, DataInfo *info) { 400 return symbolizer.SymbolizeData(address, info); 401 } 402 403 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 404 return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer); 405 } 406 407 bool IsSymbolizerAvailable() { 408 return symbolizer.IsSymbolizerAvailable(); 409 } 410 411 } // namespace __sanitizer 412