1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is shared between AddressSanitizer and ThreadSanitizer 11 // run-time libraries. 12 //===----------------------------------------------------------------------===// 13 14 #include "sanitizer_allocator_internal.h" 15 #include "sanitizer_internal_defs.h" 16 #include "sanitizer_symbolizer_internal.h" 17 18 namespace __sanitizer { 19 20 const char *ExtractToken(const char *str, const char *delims, char **result) { 21 uptr prefix_len = internal_strcspn(str, delims); 22 *result = (char*)InternalAlloc(prefix_len + 1); 23 internal_memcpy(*result, str, prefix_len); 24 (*result)[prefix_len] = '\0'; 25 const char *prefix_end = str + prefix_len; 26 if (*prefix_end != '\0') prefix_end++; 27 return prefix_end; 28 } 29 30 const char *ExtractInt(const char *str, const char *delims, int *result) { 31 char *buff; 32 const char *ret = ExtractToken(str, delims, &buff); 33 if (buff != 0) { 34 *result = (int)internal_atoll(buff); 35 } 36 InternalFree(buff); 37 return ret; 38 } 39 40 const char *ExtractUptr(const char *str, const char *delims, uptr *result) { 41 char *buff; 42 const char *ret = ExtractToken(str, delims, &buff); 43 if (buff != 0) { 44 *result = (uptr)internal_atoll(buff); 45 } 46 InternalFree(buff); 47 return ret; 48 } 49 50 const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, 51 char **result) { 52 const char *found_delimiter = internal_strstr(str, delimiter); 53 uptr prefix_len = 54 found_delimiter ? found_delimiter - str : internal_strlen(str); 55 *result = (char *)InternalAlloc(prefix_len + 1); 56 internal_memcpy(*result, str, prefix_len); 57 (*result)[prefix_len] = '\0'; 58 const char *prefix_end = str + prefix_len; 59 if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter); 60 return prefix_end; 61 } 62 63 SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) { 64 BlockingMutexLock l(&mu_); 65 const char *module_name; 66 uptr module_offset; 67 SymbolizedStack *res = SymbolizedStack::New(addr); 68 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset)) 69 return res; 70 // Always fill data about module name and offset. 71 res->info.FillModuleInfo(module_name, module_offset); 72 for (auto &tool : tools_) { 73 SymbolizerScope sym_scope(this); 74 if (tool.SymbolizePC(addr, res)) { 75 return res; 76 } 77 } 78 return res; 79 } 80 81 bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) { 82 BlockingMutexLock l(&mu_); 83 const char *module_name; 84 uptr module_offset; 85 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset)) 86 return false; 87 info->Clear(); 88 info->module = internal_strdup(module_name); 89 info->module_offset = module_offset; 90 for (auto &tool : tools_) { 91 SymbolizerScope sym_scope(this); 92 if (tool.SymbolizeData(addr, info)) { 93 return true; 94 } 95 } 96 return true; 97 } 98 99 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name, 100 uptr *module_address) { 101 BlockingMutexLock l(&mu_); 102 const char *internal_module_name = nullptr; 103 if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name, 104 module_address)) 105 return false; 106 107 if (module_name) 108 *module_name = module_names_.GetOwnedCopy(internal_module_name); 109 return true; 110 } 111 112 void Symbolizer::Flush() { 113 BlockingMutexLock l(&mu_); 114 for (auto &tool : tools_) { 115 SymbolizerScope sym_scope(this); 116 tool.Flush(); 117 } 118 } 119 120 const char *Symbolizer::Demangle(const char *name) { 121 BlockingMutexLock l(&mu_); 122 for (auto &tool : tools_) { 123 SymbolizerScope sym_scope(this); 124 if (const char *demangled = tool.Demangle(name)) 125 return demangled; 126 } 127 return PlatformDemangle(name); 128 } 129 130 void Symbolizer::PrepareForSandboxing() { 131 BlockingMutexLock l(&mu_); 132 PlatformPrepareForSandboxing(); 133 } 134 135 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address, 136 const char **module_name, 137 uptr *module_offset) { 138 const LoadedModule *module = FindModuleForAddress(address); 139 if (module == nullptr) 140 return false; 141 *module_name = module->full_name(); 142 *module_offset = address - module->base_address(); 143 return true; 144 } 145 146 const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) { 147 bool modules_were_reloaded = false; 148 if (!modules_fresh_) { 149 modules_.init(); 150 RAW_CHECK(modules_.size() > 0); 151 modules_fresh_ = true; 152 modules_were_reloaded = true; 153 } 154 for (uptr i = 0; i < modules_.size(); i++) { 155 if (modules_[i].containsAddress(address)) { 156 return &modules_[i]; 157 } 158 } 159 // Reload the modules and look up again, if we haven't tried it yet. 160 if (!modules_were_reloaded) { 161 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors. 162 // It's too aggressive to reload the list of modules each time we fail 163 // to find a module for a given address. 164 modules_fresh_ = false; 165 return FindModuleForAddress(address); 166 } 167 return 0; 168 } 169 170 Symbolizer *Symbolizer::GetOrInit() { 171 SpinMutexLock l(&init_mu_); 172 if (symbolizer_) 173 return symbolizer_; 174 symbolizer_ = PlatformInit(); 175 CHECK(symbolizer_); 176 return symbolizer_; 177 } 178 179 // For now we assume the following protocol: 180 // For each request of the form 181 // <module_name> <module_offset> 182 // passed to STDIN, external symbolizer prints to STDOUT response: 183 // <function_name> 184 // <file_name>:<line_number>:<column_number> 185 // <function_name> 186 // <file_name>:<line_number>:<column_number> 187 // ... 188 // <empty line> 189 class LLVMSymbolizerProcess : public SymbolizerProcess { 190 public: 191 explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {} 192 193 private: 194 bool ReachedEndOfOutput(const char *buffer, uptr length) const override { 195 // Empty line marks the end of llvm-symbolizer output. 196 return length >= 2 && buffer[length - 1] == '\n' && 197 buffer[length - 2] == '\n'; 198 } 199 200 void GetArgV(const char *path_to_binary, 201 const char *(&argv)[kArgVMax]) const override { 202 #if defined(__x86_64h__) 203 const char* const kSymbolizerArch = "--default-arch=x86_64h"; 204 #elif defined(__x86_64__) 205 const char* const kSymbolizerArch = "--default-arch=x86_64"; 206 #elif defined(__i386__) 207 const char* const kSymbolizerArch = "--default-arch=i386"; 208 #elif defined(__aarch64__) 209 const char* const kSymbolizerArch = "--default-arch=arm64"; 210 #elif defined(__arm__) 211 const char* const kSymbolizerArch = "--default-arch=arm"; 212 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 213 const char* const kSymbolizerArch = "--default-arch=powerpc64"; 214 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 215 const char* const kSymbolizerArch = "--default-arch=powerpc64le"; 216 #elif defined(__s390x__) 217 const char* const kSymbolizerArch = "--default-arch=s390x"; 218 #elif defined(__s390__) 219 const char* const kSymbolizerArch = "--default-arch=s390"; 220 #else 221 const char* const kSymbolizerArch = "--default-arch=unknown"; 222 #endif 223 224 const char *const inline_flag = common_flags()->symbolize_inline_frames 225 ? "--inlining=true" 226 : "--inlining=false"; 227 int i = 0; 228 argv[i++] = path_to_binary; 229 argv[i++] = inline_flag; 230 argv[i++] = kSymbolizerArch; 231 argv[i++] = nullptr; 232 } 233 }; 234 235 LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator) 236 : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {} 237 238 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on 239 // Windows, so extract tokens from the right hand side first. The column info is 240 // also optional. 241 static const char *ParseFileLineInfo(AddressInfo *info, const char *str) { 242 char *file_line_info = 0; 243 str = ExtractToken(str, "\n", &file_line_info); 244 CHECK(file_line_info); 245 // Parse the last :<int>, which must be there. 246 char *last_colon = internal_strrchr(file_line_info, ':'); 247 CHECK(last_colon); 248 int line_or_column = internal_atoll(last_colon + 1); 249 // Truncate the string at the last colon and find the next-to-last colon. 250 *last_colon = '\0'; 251 last_colon = internal_strrchr(file_line_info, ':'); 252 if (last_colon && IsDigit(last_colon[1])) { 253 // If the second-to-last colon is followed by a digit, it must be the line 254 // number, and the previous parsed number was a column. 255 info->line = internal_atoll(last_colon + 1); 256 info->column = line_or_column; 257 *last_colon = '\0'; 258 } else { 259 // Otherwise, we have line info but no column info. 260 info->line = line_or_column; 261 info->column = 0; 262 } 263 ExtractToken(file_line_info, "", &info->file); 264 InternalFree(file_line_info); 265 return str; 266 } 267 268 // Parses one or more two-line strings in the following format: 269 // <function_name> 270 // <file_name>:<line_number>[:<column_number>] 271 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of 272 // them use the same output format. 273 void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) { 274 bool top_frame = true; 275 SymbolizedStack *last = res; 276 while (true) { 277 char *function_name = 0; 278 str = ExtractToken(str, "\n", &function_name); 279 CHECK(function_name); 280 if (function_name[0] == '\0') { 281 // There are no more frames. 282 InternalFree(function_name); 283 break; 284 } 285 SymbolizedStack *cur; 286 if (top_frame) { 287 cur = res; 288 top_frame = false; 289 } else { 290 cur = SymbolizedStack::New(res->info.address); 291 cur->info.FillModuleInfo(res->info.module, res->info.module_offset); 292 last->next = cur; 293 last = cur; 294 } 295 296 AddressInfo *info = &cur->info; 297 info->function = function_name; 298 str = ParseFileLineInfo(info, str); 299 300 // Functions and filenames can be "??", in which case we write 0 301 // to address info to mark that names are unknown. 302 if (0 == internal_strcmp(info->function, "??")) { 303 InternalFree(info->function); 304 info->function = 0; 305 } 306 if (0 == internal_strcmp(info->file, "??")) { 307 InternalFree(info->file); 308 info->file = 0; 309 } 310 } 311 } 312 313 // Parses a two-line string in the following format: 314 // <symbol_name> 315 // <start_address> <size> 316 // Used by LLVMSymbolizer and InternalSymbolizer. 317 void ParseSymbolizeDataOutput(const char *str, DataInfo *info) { 318 str = ExtractToken(str, "\n", &info->name); 319 str = ExtractUptr(str, " ", &info->start); 320 str = ExtractUptr(str, "\n", &info->size); 321 } 322 323 bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { 324 if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module, 325 stack->info.module_offset)) { 326 ParseSymbolizePCOutput(buf, stack); 327 return true; 328 } 329 return false; 330 } 331 332 bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { 333 if (const char *buf = 334 SendCommand(/*is_data*/ true, info->module, info->module_offset)) { 335 ParseSymbolizeDataOutput(buf, info); 336 info->start += (addr - info->module_offset); // Add the base address. 337 return true; 338 } 339 return false; 340 } 341 342 const char *LLVMSymbolizer::SendCommand(bool is_data, const char *module_name, 343 uptr module_offset) { 344 CHECK(module_name); 345 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", 346 is_data ? "DATA " : "", module_name, module_offset); 347 return symbolizer_process_->SendCommand(buffer_); 348 } 349 350 SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty) 351 : path_(path), 352 input_fd_(kInvalidFd), 353 output_fd_(kInvalidFd), 354 times_restarted_(0), 355 failed_to_start_(false), 356 reported_invalid_path_(false), 357 use_forkpty_(use_forkpty) { 358 CHECK(path_); 359 CHECK_NE(path_[0], '\0'); 360 } 361 362 const char *SymbolizerProcess::SendCommand(const char *command) { 363 for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { 364 // Start or restart symbolizer if we failed to send command to it. 365 if (const char *res = SendCommandImpl(command)) 366 return res; 367 Restart(); 368 } 369 if (!failed_to_start_) { 370 Report("WARNING: Failed to use and restart external symbolizer!\n"); 371 failed_to_start_ = true; 372 } 373 return 0; 374 } 375 376 const char *SymbolizerProcess::SendCommandImpl(const char *command) { 377 if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd) 378 return 0; 379 if (!WriteToSymbolizer(command, internal_strlen(command))) 380 return 0; 381 if (!ReadFromSymbolizer(buffer_, kBufferSize)) 382 return 0; 383 return buffer_; 384 } 385 386 bool SymbolizerProcess::Restart() { 387 if (input_fd_ != kInvalidFd) 388 CloseFile(input_fd_); 389 if (output_fd_ != kInvalidFd) 390 CloseFile(output_fd_); 391 return StartSymbolizerSubprocess(); 392 } 393 394 bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) { 395 if (max_length == 0) 396 return true; 397 uptr read_len = 0; 398 while (true) { 399 uptr just_read = 0; 400 bool success = ReadFromFile(input_fd_, buffer + read_len, 401 max_length - read_len - 1, &just_read); 402 // We can't read 0 bytes, as we don't expect external symbolizer to close 403 // its stdout. 404 if (!success || just_read == 0) { 405 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 406 return false; 407 } 408 read_len += just_read; 409 if (ReachedEndOfOutput(buffer, read_len)) 410 break; 411 } 412 buffer[read_len] = '\0'; 413 return true; 414 } 415 416 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) { 417 if (length == 0) 418 return true; 419 uptr write_len = 0; 420 bool success = WriteToFile(output_fd_, buffer, length, &write_len); 421 if (!success || write_len != length) { 422 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 423 return false; 424 } 425 return true; 426 } 427 428 } // namespace __sanitizer 429