1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is shared between AddressSanitizer and ThreadSanitizer 11 // run-time libraries. 12 //===----------------------------------------------------------------------===// 13 14 #include "sanitizer_allocator_internal.h" 15 #include "sanitizer_internal_defs.h" 16 #include "sanitizer_symbolizer_internal.h" 17 18 namespace __sanitizer { 19 20 const char *ExtractToken(const char *str, const char *delims, char **result) { 21 uptr prefix_len = internal_strcspn(str, delims); 22 *result = (char*)InternalAlloc(prefix_len + 1); 23 internal_memcpy(*result, str, prefix_len); 24 (*result)[prefix_len] = '\0'; 25 const char *prefix_end = str + prefix_len; 26 if (*prefix_end != '\0') prefix_end++; 27 return prefix_end; 28 } 29 30 const char *ExtractInt(const char *str, const char *delims, int *result) { 31 char *buff; 32 const char *ret = ExtractToken(str, delims, &buff); 33 if (buff != 0) { 34 *result = (int)internal_atoll(buff); 35 } 36 InternalFree(buff); 37 return ret; 38 } 39 40 const char *ExtractUptr(const char *str, const char *delims, uptr *result) { 41 char *buff; 42 const char *ret = ExtractToken(str, delims, &buff); 43 if (buff != 0) { 44 *result = (uptr)internal_atoll(buff); 45 } 46 InternalFree(buff); 47 return ret; 48 } 49 50 const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, 51 char **result) { 52 const char *found_delimiter = internal_strstr(str, delimiter); 53 uptr prefix_len = 54 found_delimiter ? found_delimiter - str : internal_strlen(str); 55 *result = (char *)InternalAlloc(prefix_len + 1); 56 internal_memcpy(*result, str, prefix_len); 57 (*result)[prefix_len] = '\0'; 58 const char *prefix_end = str + prefix_len; 59 if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter); 60 return prefix_end; 61 } 62 63 SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) { 64 BlockingMutexLock l(&mu_); 65 const char *module_name; 66 uptr module_offset; 67 SymbolizedStack *res = SymbolizedStack::New(addr); 68 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset)) 69 return res; 70 // Always fill data about module name and offset. 71 res->info.FillModuleInfo(module_name, module_offset); 72 for (auto iter = Iterator(&tools_); iter.hasNext();) { 73 auto *tool = iter.next(); 74 SymbolizerScope sym_scope(this); 75 if (tool->SymbolizePC(addr, res)) { 76 return res; 77 } 78 } 79 return res; 80 } 81 82 bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) { 83 BlockingMutexLock l(&mu_); 84 const char *module_name; 85 uptr module_offset; 86 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset)) 87 return false; 88 info->Clear(); 89 info->module = internal_strdup(module_name); 90 info->module_offset = module_offset; 91 for (auto iter = Iterator(&tools_); iter.hasNext();) { 92 auto *tool = iter.next(); 93 SymbolizerScope sym_scope(this); 94 if (tool->SymbolizeData(addr, info)) { 95 return true; 96 } 97 } 98 return true; 99 } 100 101 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name, 102 uptr *module_address) { 103 BlockingMutexLock l(&mu_); 104 const char *internal_module_name = nullptr; 105 if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name, 106 module_address)) 107 return false; 108 109 if (module_name) 110 *module_name = module_names_.GetOwnedCopy(internal_module_name); 111 return true; 112 } 113 114 void Symbolizer::Flush() { 115 BlockingMutexLock l(&mu_); 116 for (auto iter = Iterator(&tools_); iter.hasNext();) { 117 auto *tool = iter.next(); 118 SymbolizerScope sym_scope(this); 119 tool->Flush(); 120 } 121 } 122 123 const char *Symbolizer::Demangle(const char *name) { 124 BlockingMutexLock l(&mu_); 125 for (auto iter = Iterator(&tools_); iter.hasNext();) { 126 auto *tool = iter.next(); 127 SymbolizerScope sym_scope(this); 128 if (const char *demangled = tool->Demangle(name)) 129 return demangled; 130 } 131 return PlatformDemangle(name); 132 } 133 134 void Symbolizer::PrepareForSandboxing() { 135 BlockingMutexLock l(&mu_); 136 PlatformPrepareForSandboxing(); 137 } 138 139 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address, 140 const char **module_name, 141 uptr *module_offset) { 142 LoadedModule *module = FindModuleForAddress(address); 143 if (module == 0) 144 return false; 145 *module_name = module->full_name(); 146 *module_offset = address - module->base_address(); 147 return true; 148 } 149 150 LoadedModule *Symbolizer::FindModuleForAddress(uptr address) { 151 bool modules_were_reloaded = false; 152 if (!modules_fresh_) { 153 for (uptr i = 0; i < n_modules_; i++) 154 modules_[i].clear(); 155 n_modules_ = 156 GetListOfModules(modules_, kMaxNumberOfModules, /* filter */ nullptr); 157 CHECK_GT(n_modules_, 0); 158 CHECK_LT(n_modules_, kMaxNumberOfModules); 159 modules_fresh_ = true; 160 modules_were_reloaded = true; 161 } 162 for (uptr i = 0; i < n_modules_; i++) { 163 if (modules_[i].containsAddress(address)) { 164 return &modules_[i]; 165 } 166 } 167 // Reload the modules and look up again, if we haven't tried it yet. 168 if (!modules_were_reloaded) { 169 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors. 170 // It's too aggressive to reload the list of modules each time we fail 171 // to find a module for a given address. 172 modules_fresh_ = false; 173 return FindModuleForAddress(address); 174 } 175 return 0; 176 } 177 178 Symbolizer *Symbolizer::GetOrInit() { 179 SpinMutexLock l(&init_mu_); 180 if (symbolizer_) 181 return symbolizer_; 182 symbolizer_ = PlatformInit(); 183 CHECK(symbolizer_); 184 return symbolizer_; 185 } 186 187 // For now we assume the following protocol: 188 // For each request of the form 189 // <module_name> <module_offset> 190 // passed to STDIN, external symbolizer prints to STDOUT response: 191 // <function_name> 192 // <file_name>:<line_number>:<column_number> 193 // <function_name> 194 // <file_name>:<line_number>:<column_number> 195 // ... 196 // <empty line> 197 class LLVMSymbolizerProcess : public SymbolizerProcess { 198 public: 199 explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {} 200 201 private: 202 bool ReachedEndOfOutput(const char *buffer, uptr length) const override { 203 // Empty line marks the end of llvm-symbolizer output. 204 return length >= 2 && buffer[length - 1] == '\n' && 205 buffer[length - 2] == '\n'; 206 } 207 208 void GetArgV(const char *path_to_binary, 209 const char *(&argv)[kArgVMax]) const override { 210 #if defined(__x86_64h__) 211 const char* const kSymbolizerArch = "--default-arch=x86_64h"; 212 #elif defined(__x86_64__) 213 const char* const kSymbolizerArch = "--default-arch=x86_64"; 214 #elif defined(__i386__) 215 const char* const kSymbolizerArch = "--default-arch=i386"; 216 #elif defined(__powerpc64__) && defined(__BIG_ENDIAN__) 217 const char* const kSymbolizerArch = "--default-arch=powerpc64"; 218 #elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) 219 const char* const kSymbolizerArch = "--default-arch=powerpc64le"; 220 #else 221 const char* const kSymbolizerArch = "--default-arch=unknown"; 222 #endif 223 224 const char *const inline_flag = common_flags()->symbolize_inline_frames 225 ? "--inlining=true" 226 : "--inlining=false"; 227 int i = 0; 228 argv[i++] = path_to_binary; 229 argv[i++] = inline_flag; 230 argv[i++] = kSymbolizerArch; 231 argv[i++] = nullptr; 232 } 233 }; 234 235 LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator) 236 : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {} 237 238 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on 239 // Windows, so extract tokens from the right hand side first. The column info is 240 // also optional. 241 static const char *ParseFileLineInfo(AddressInfo *info, const char *str) { 242 char *file_line_info = 0; 243 str = ExtractToken(str, "\n", &file_line_info); 244 CHECK(file_line_info); 245 // Parse the last :<int>, which must be there. 246 char *last_colon = internal_strrchr(file_line_info, ':'); 247 CHECK(last_colon); 248 int line_or_column = internal_atoll(last_colon + 1); 249 // Truncate the string at the last colon and find the next-to-last colon. 250 *last_colon = '\0'; 251 last_colon = internal_strrchr(file_line_info, ':'); 252 if (last_colon && IsDigit(last_colon[1])) { 253 // If the second-to-last colon is followed by a digit, it must be the line 254 // number, and the previous parsed number was a column. 255 info->line = internal_atoll(last_colon + 1); 256 info->column = line_or_column; 257 *last_colon = '\0'; 258 } else { 259 // Otherwise, we have line info but no column info. 260 info->line = line_or_column; 261 info->column = 0; 262 } 263 ExtractToken(file_line_info, "", &info->file); 264 InternalFree(file_line_info); 265 return str; 266 } 267 268 // Parses one or more two-line strings in the following format: 269 // <function_name> 270 // <file_name>:<line_number>[:<column_number>] 271 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of 272 // them use the same output format. 273 void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) { 274 bool top_frame = true; 275 SymbolizedStack *last = res; 276 while (true) { 277 char *function_name = 0; 278 str = ExtractToken(str, "\n", &function_name); 279 CHECK(function_name); 280 if (function_name[0] == '\0') { 281 // There are no more frames. 282 InternalFree(function_name); 283 break; 284 } 285 SymbolizedStack *cur; 286 if (top_frame) { 287 cur = res; 288 top_frame = false; 289 } else { 290 cur = SymbolizedStack::New(res->info.address); 291 cur->info.FillModuleInfo(res->info.module, res->info.module_offset); 292 last->next = cur; 293 last = cur; 294 } 295 296 AddressInfo *info = &cur->info; 297 info->function = function_name; 298 str = ParseFileLineInfo(info, str); 299 300 // Functions and filenames can be "??", in which case we write 0 301 // to address info to mark that names are unknown. 302 if (0 == internal_strcmp(info->function, "??")) { 303 InternalFree(info->function); 304 info->function = 0; 305 } 306 if (0 == internal_strcmp(info->file, "??")) { 307 InternalFree(info->file); 308 info->file = 0; 309 } 310 } 311 } 312 313 // Parses a two-line string in the following format: 314 // <symbol_name> 315 // <start_address> <size> 316 // Used by LLVMSymbolizer and InternalSymbolizer. 317 void ParseSymbolizeDataOutput(const char *str, DataInfo *info) { 318 str = ExtractToken(str, "\n", &info->name); 319 str = ExtractUptr(str, " ", &info->start); 320 str = ExtractUptr(str, "\n", &info->size); 321 } 322 323 bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { 324 if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module, 325 stack->info.module_offset)) { 326 ParseSymbolizePCOutput(buf, stack); 327 return true; 328 } 329 return false; 330 } 331 332 bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { 333 if (const char *buf = 334 SendCommand(/*is_data*/ true, info->module, info->module_offset)) { 335 ParseSymbolizeDataOutput(buf, info); 336 info->start += (addr - info->module_offset); // Add the base address. 337 return true; 338 } 339 return false; 340 } 341 342 const char *LLVMSymbolizer::SendCommand(bool is_data, const char *module_name, 343 uptr module_offset) { 344 CHECK(module_name); 345 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", 346 is_data ? "DATA " : "", module_name, module_offset); 347 return symbolizer_process_->SendCommand(buffer_); 348 } 349 350 SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty) 351 : path_(path), 352 input_fd_(kInvalidFd), 353 output_fd_(kInvalidFd), 354 times_restarted_(0), 355 failed_to_start_(false), 356 reported_invalid_path_(false), 357 use_forkpty_(use_forkpty) { 358 CHECK(path_); 359 CHECK_NE(path_[0], '\0'); 360 } 361 362 const char *SymbolizerProcess::SendCommand(const char *command) { 363 for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { 364 // Start or restart symbolizer if we failed to send command to it. 365 if (const char *res = SendCommandImpl(command)) 366 return res; 367 Restart(); 368 } 369 if (!failed_to_start_) { 370 Report("WARNING: Failed to use and restart external symbolizer!\n"); 371 failed_to_start_ = true; 372 } 373 return 0; 374 } 375 376 const char *SymbolizerProcess::SendCommandImpl(const char *command) { 377 if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd) 378 return 0; 379 if (!WriteToSymbolizer(command, internal_strlen(command))) 380 return 0; 381 if (!ReadFromSymbolizer(buffer_, kBufferSize)) 382 return 0; 383 return buffer_; 384 } 385 386 bool SymbolizerProcess::Restart() { 387 if (input_fd_ != kInvalidFd) 388 CloseFile(input_fd_); 389 if (output_fd_ != kInvalidFd) 390 CloseFile(output_fd_); 391 return StartSymbolizerSubprocess(); 392 } 393 394 bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) { 395 if (max_length == 0) 396 return true; 397 uptr read_len = 0; 398 while (true) { 399 uptr just_read = 0; 400 bool success = ReadFromFile(input_fd_, buffer + read_len, 401 max_length - read_len - 1, &just_read); 402 // We can't read 0 bytes, as we don't expect external symbolizer to close 403 // its stdout. 404 if (!success || just_read == 0) { 405 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 406 return false; 407 } 408 read_len += just_read; 409 if (ReachedEndOfOutput(buffer, read_len)) 410 break; 411 } 412 buffer[read_len] = '\0'; 413 return true; 414 } 415 416 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) { 417 if (length == 0) 418 return true; 419 uptr write_len = 0; 420 bool success = WriteToFile(output_fd_, buffer, length, &write_len); 421 if (!success || write_len != length) { 422 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 423 return false; 424 } 425 return true; 426 } 427 428 } // namespace __sanitizer 429