1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is shared between AddressSanitizer and ThreadSanitizer 11 // run-time libraries. See sanitizer_symbolizer.h for details. 12 //===----------------------------------------------------------------------===// 13 14 #include "sanitizer_allocator_internal.h" 15 #include "sanitizer_common.h" 16 #include "sanitizer_placement_new.h" 17 #include "sanitizer_procmaps.h" 18 #include "sanitizer_symbolizer.h" 19 20 namespace __sanitizer { 21 22 void AddressInfo::Clear() { 23 InternalFree(module); 24 InternalFree(function); 25 InternalFree(file); 26 internal_memset(this, 0, sizeof(AddressInfo)); 27 } 28 29 LoadedModule::LoadedModule(const char *module_name, uptr base_address) { 30 full_name_ = internal_strdup(module_name); 31 base_address_ = base_address; 32 n_ranges_ = 0; 33 } 34 35 void LoadedModule::addAddressRange(uptr beg, uptr end) { 36 CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges); 37 ranges_[n_ranges_].beg = beg; 38 ranges_[n_ranges_].end = end; 39 n_ranges_++; 40 } 41 42 bool LoadedModule::containsAddress(uptr address) const { 43 for (uptr i = 0; i < n_ranges_; i++) { 44 if (ranges_[i].beg <= address && address < ranges_[i].end) 45 return true; 46 } 47 return false; 48 } 49 50 // Extracts the prefix of "str" that consists of any characters not 51 // present in "delims" string, and copies this prefix to "result", allocating 52 // space for it. 53 // Returns a pointer to "str" after skipping extracted prefix and first 54 // delimiter char. 55 static const char *ExtractToken(const char *str, const char *delims, 56 char **result) { 57 uptr prefix_len = internal_strcspn(str, delims); 58 *result = (char*)InternalAlloc(prefix_len + 1); 59 internal_memcpy(*result, str, prefix_len); 60 (*result)[prefix_len] = '\0'; 61 const char *prefix_end = str + prefix_len; 62 if (*prefix_end != '\0') prefix_end++; 63 return prefix_end; 64 } 65 66 // Same as ExtractToken, but converts extracted token to integer. 67 static const char *ExtractInt(const char *str, const char *delims, 68 int *result) { 69 char *buff; 70 const char *ret = ExtractToken(str, delims, &buff); 71 if (buff != 0) { 72 *result = (int)internal_atoll(buff); 73 } 74 InternalFree(buff); 75 return ret; 76 } 77 78 static const char *ExtractUptr(const char *str, const char *delims, 79 uptr *result) { 80 char *buff; 81 const char *ret = ExtractToken(str, delims, &buff); 82 if (buff != 0) { 83 *result = (uptr)internal_atoll(buff); 84 } 85 InternalFree(buff); 86 return ret; 87 } 88 89 // ExternalSymbolizer encapsulates communication between the tool and 90 // external symbolizer program, running in a different subprocess, 91 // For now we assume the following protocol: 92 // For each request of the form 93 // <module_name> <module_offset> 94 // passed to STDIN, external symbolizer prints to STDOUT response: 95 // <function_name> 96 // <file_name>:<line_number>:<column_number> 97 // <function_name> 98 // <file_name>:<line_number>:<column_number> 99 // ... 100 // <empty line> 101 class ExternalSymbolizer { 102 public: 103 ExternalSymbolizer(const char *path, int input_fd, int output_fd) 104 : path_(path), 105 input_fd_(input_fd), 106 output_fd_(output_fd), 107 times_restarted_(0) { 108 CHECK(path_); 109 CHECK_NE(input_fd_, kInvalidFd); 110 CHECK_NE(output_fd_, kInvalidFd); 111 } 112 113 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 114 CHECK(module_name); 115 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", 116 is_data ? "DATA " : "", module_name, module_offset); 117 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_))) 118 return 0; 119 if (!readFromSymbolizer(buffer_, kBufferSize)) 120 return 0; 121 return buffer_; 122 } 123 124 bool Restart() { 125 if (times_restarted_ >= kMaxTimesRestarted) return false; 126 times_restarted_++; 127 internal_close(input_fd_); 128 internal_close(output_fd_); 129 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_); 130 } 131 132 void Flush() { 133 } 134 135 private: 136 bool readFromSymbolizer(char *buffer, uptr max_length) { 137 if (max_length == 0) 138 return true; 139 uptr read_len = 0; 140 while (true) { 141 uptr just_read = internal_read(input_fd_, buffer + read_len, 142 max_length - read_len); 143 // We can't read 0 bytes, as we don't expect external symbolizer to close 144 // its stdout. 145 if (just_read == 0 || just_read == (uptr)-1) { 146 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 147 return false; 148 } 149 read_len += just_read; 150 // Empty line marks the end of symbolizer output. 151 if (read_len >= 2 && buffer[read_len - 1] == '\n' && 152 buffer[read_len - 2] == '\n') { 153 break; 154 } 155 } 156 return true; 157 } 158 159 bool writeToSymbolizer(const char *buffer, uptr length) { 160 if (length == 0) 161 return true; 162 uptr write_len = internal_write(output_fd_, buffer, length); 163 if (write_len == 0 || write_len == (uptr)-1) { 164 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 165 return false; 166 } 167 return true; 168 } 169 170 const char *path_; 171 int input_fd_; 172 int output_fd_; 173 174 static const uptr kBufferSize = 16 * 1024; 175 char buffer_[kBufferSize]; 176 177 static const uptr kMaxTimesRestarted = 5; 178 uptr times_restarted_; 179 }; 180 181 static LowLevelAllocator symbolizer_allocator; // Linker initialized. 182 183 #if SANITIZER_SUPPORTS_WEAK_HOOKS 184 extern "C" { 185 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 186 bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, 187 char *Buffer, int MaxLength); 188 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 189 bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, 190 char *Buffer, int MaxLength); 191 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 192 void __sanitizer_symbolize_flush(); 193 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE 194 int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, 195 int MaxLength); 196 } // extern "C" 197 198 class InternalSymbolizer { 199 public: 200 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int); 201 202 static InternalSymbolizer *get() { 203 if (__sanitizer_symbolize_code != 0 && 204 __sanitizer_symbolize_data != 0) { 205 void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer)); 206 return new(mem) InternalSymbolizer(); 207 } 208 return 0; 209 } 210 211 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 212 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data 213 : __sanitizer_symbolize_code; 214 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize)) 215 return buffer_; 216 return 0; 217 } 218 219 void Flush() { 220 if (__sanitizer_symbolize_flush) 221 __sanitizer_symbolize_flush(); 222 } 223 224 const char *Demangle(const char *name) { 225 if (__sanitizer_symbolize_demangle) { 226 for (uptr res_length = 1024; 227 res_length <= InternalSizeClassMap::kMaxSize;) { 228 char *res_buff = static_cast<char*>(InternalAlloc(res_length)); 229 uptr req_length = 230 __sanitizer_symbolize_demangle(name, res_buff, res_length); 231 if (req_length > res_length) { 232 res_length = req_length + 1; 233 InternalFree(res_buff); 234 continue; 235 } 236 return res_buff; 237 } 238 } 239 return name; 240 } 241 242 private: 243 InternalSymbolizer() { } 244 245 static const int kBufferSize = 16 * 1024; 246 static const int kMaxDemangledNameSize = 1024; 247 char buffer_[kBufferSize]; 248 }; 249 #else // SANITIZER_SUPPORTS_WEAK_HOOKS 250 251 class InternalSymbolizer { 252 public: 253 static InternalSymbolizer *get() { return 0; } 254 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 255 return 0; 256 } 257 void Flush() { } 258 const char *Demangle(const char *name) { return name; } 259 }; 260 261 #endif // SANITIZER_SUPPORTS_WEAK_HOOKS 262 263 class Symbolizer { 264 // This class has no constructor, as global constructors are forbidden in 265 // sanitizer_common. It should be linker initialized instead. 266 public: 267 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) { 268 if (max_frames == 0) 269 return 0; 270 LoadedModule *module = FindModuleForAddress(addr); 271 if (module == 0) 272 return 0; 273 const char *module_name = module->full_name(); 274 uptr module_offset = addr - module->base_address(); 275 const char *str = SendCommand(false, module_name, module_offset); 276 if (str == 0) { 277 // External symbolizer was not initialized or failed. Fill only data 278 // about module name and offset. 279 AddressInfo *info = &frames[0]; 280 info->Clear(); 281 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 282 return 1; 283 } 284 uptr frame_id = 0; 285 for (frame_id = 0; frame_id < max_frames; frame_id++) { 286 AddressInfo *info = &frames[frame_id]; 287 char *function_name = 0; 288 str = ExtractToken(str, "\n", &function_name); 289 CHECK(function_name); 290 if (function_name[0] == '\0') { 291 // There are no more frames. 292 break; 293 } 294 info->Clear(); 295 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 296 info->function = function_name; 297 // Parse <file>:<line>:<column> buffer. 298 char *file_line_info = 0; 299 str = ExtractToken(str, "\n", &file_line_info); 300 CHECK(file_line_info); 301 const char *line_info = ExtractToken(file_line_info, ":", &info->file); 302 line_info = ExtractInt(line_info, ":", &info->line); 303 line_info = ExtractInt(line_info, "", &info->column); 304 InternalFree(file_line_info); 305 306 // Functions and filenames can be "??", in which case we write 0 307 // to address info to mark that names are unknown. 308 if (0 == internal_strcmp(info->function, "??")) { 309 InternalFree(info->function); 310 info->function = 0; 311 } 312 if (0 == internal_strcmp(info->file, "??")) { 313 InternalFree(info->file); 314 info->file = 0; 315 } 316 } 317 if (frame_id == 0) { 318 // Make sure we return at least one frame. 319 AddressInfo *info = &frames[0]; 320 info->Clear(); 321 info->FillAddressAndModuleInfo(addr, module_name, module_offset); 322 frame_id = 1; 323 } 324 return frame_id; 325 } 326 327 bool SymbolizeData(uptr addr, DataInfo *info) { 328 LoadedModule *module = FindModuleForAddress(addr); 329 if (module == 0) 330 return false; 331 const char *module_name = module->full_name(); 332 uptr module_offset = addr - module->base_address(); 333 internal_memset(info, 0, sizeof(*info)); 334 info->address = addr; 335 info->module = internal_strdup(module_name); 336 info->module_offset = module_offset; 337 const char *str = SendCommand(true, module_name, module_offset); 338 if (str == 0) 339 return true; 340 str = ExtractToken(str, "\n", &info->name); 341 str = ExtractUptr(str, " ", &info->start); 342 str = ExtractUptr(str, "\n", &info->size); 343 info->start += module->base_address(); 344 return true; 345 } 346 347 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 348 int input_fd, output_fd; 349 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd)) 350 return false; 351 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer)); 352 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer, 353 input_fd, output_fd); 354 return true; 355 } 356 357 bool IsSymbolizerAvailable() { 358 if (internal_symbolizer_ == 0) 359 internal_symbolizer_ = InternalSymbolizer::get(); 360 return internal_symbolizer_ || external_symbolizer_; 361 } 362 363 void Flush() { 364 if (internal_symbolizer_) 365 internal_symbolizer_->Flush(); 366 if (external_symbolizer_) 367 external_symbolizer_->Flush(); 368 } 369 370 const char *Demangle(const char *name) { 371 if (IsSymbolizerAvailable() && internal_symbolizer_ != 0) 372 return internal_symbolizer_->Demangle(name); 373 return DemangleCXXABI(name); 374 } 375 376 private: 377 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) { 378 // First, try to use internal symbolizer. 379 if (!IsSymbolizerAvailable()) { 380 return 0; 381 } 382 if (internal_symbolizer_) { 383 return internal_symbolizer_->SendCommand(is_data, module_name, 384 module_offset); 385 } 386 // Otherwise, fall back to external symbolizer. 387 if (external_symbolizer_ == 0) { 388 ReportExternalSymbolizerError( 389 "WARNING: Trying to symbolize code, but external " 390 "symbolizer is not initialized!\n"); 391 return 0; 392 } 393 for (;;) { 394 char *reply = external_symbolizer_->SendCommand(is_data, module_name, 395 module_offset); 396 if (reply) 397 return reply; 398 // Try to restart symbolizer subprocess. If we don't succeed, forget 399 // about it and don't try to use it later. 400 if (!external_symbolizer_->Restart()) { 401 ReportExternalSymbolizerError( 402 "WARNING: Failed to use and restart external symbolizer!\n"); 403 external_symbolizer_ = 0; 404 return 0; 405 } 406 } 407 } 408 409 LoadedModule *FindModuleForAddress(uptr address) { 410 bool modules_were_reloaded = false; 411 if (modules_ == 0 || !modules_fresh_) { 412 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate( 413 kMaxNumberOfModuleContexts * sizeof(LoadedModule))); 414 CHECK(modules_); 415 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts, 416 /* filter */ 0); 417 // FIXME: Return this check when GetListOfModules is implemented on Mac. 418 // CHECK_GT(n_modules_, 0); 419 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts); 420 modules_fresh_ = true; 421 modules_were_reloaded = true; 422 } 423 for (uptr i = 0; i < n_modules_; i++) { 424 if (modules_[i].containsAddress(address)) { 425 return &modules_[i]; 426 } 427 } 428 // Reload the modules and look up again, if we haven't tried it yet. 429 if (!modules_were_reloaded) { 430 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors. 431 // It's too aggressive to reload the list of modules each time we fail 432 // to find a module for a given address. 433 modules_fresh_ = false; 434 return FindModuleForAddress(address); 435 } 436 return 0; 437 } 438 439 void ReportExternalSymbolizerError(const char *msg) { 440 // Don't use atomics here for now, as SymbolizeCode can't be called 441 // from multiple threads anyway. 442 static bool reported; 443 if (!reported) { 444 Report(msg); 445 reported = true; 446 } 447 } 448 449 // 16K loaded modules should be enough for everyone. 450 static const uptr kMaxNumberOfModuleContexts = 1 << 14; 451 LoadedModule *modules_; // Array of module descriptions is leaked. 452 uptr n_modules_; 453 // If stale, need to reload the modules before looking up addresses. 454 bool modules_fresh_; 455 456 ExternalSymbolizer *external_symbolizer_; // Leaked. 457 InternalSymbolizer *internal_symbolizer_; // Leaked. 458 }; 459 460 static Symbolizer symbolizer; // Linker initialized. 461 462 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) { 463 return symbolizer.SymbolizeCode(address, frames, max_frames); 464 } 465 466 bool SymbolizeData(uptr address, DataInfo *info) { 467 return symbolizer.SymbolizeData(address, info); 468 } 469 470 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) { 471 return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer); 472 } 473 474 bool IsSymbolizerAvailable() { 475 return symbolizer.IsSymbolizerAvailable(); 476 } 477 478 void FlushSymbolizer() { 479 symbolizer.Flush(); 480 } 481 482 const char *Demangle(const char *name) { 483 return symbolizer.Demangle(name); 484 } 485 486 } // namespace __sanitizer 487