1 // Copyright (c) 2010 Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 // 30 // basic_source_line_resolver.cc: BasicSourceLineResolver implementation. 31 // 32 // See basic_source_line_resolver.h and basic_source_line_resolver_types.h 33 // for documentation. 34 35 #include <assert.h> 36 #include <stdio.h> 37 #include <string.h> 38 #include <sys/types.h> 39 #include <sys/stat.h> 40 41 #include <limits> 42 #include <map> 43 #include <utility> 44 #include <vector> 45 46 #include "google_breakpad/processor/basic_source_line_resolver.h" 47 #include "processor/basic_source_line_resolver_types.h" 48 #include "processor/module_factory.h" 49 50 #include "processor/tokenize.h" 51 52 using std::map; 53 using std::vector; 54 using std::make_pair; 55 56 namespace google_breakpad { 57 58 #ifdef _WIN32 59 #define strtok_r strtok_s 60 #define strtoull _strtoui64 61 #endif 62 63 static const char *kWhitespace = " \r\n"; 64 static const int kMaxErrorsPrinted = 5; 65 static const int kMaxErrorsBeforeBailing = 100; 66 67 BasicSourceLineResolver::BasicSourceLineResolver() : 68 SourceLineResolverBase(new BasicModuleFactory) { } 69 70 // static 71 void BasicSourceLineResolver::Module::LogParseError( 72 const string &message, 73 int line_number, 74 int *num_errors) { 75 if (++(*num_errors) <= kMaxErrorsPrinted) { 76 if (line_number > 0) { 77 BPLOG(ERROR) << "Line " << line_number << ": " << message; 78 } else { 79 BPLOG(ERROR) << message; 80 } 81 } 82 } 83 84 bool BasicSourceLineResolver::Module::LoadMapFromMemory( 85 char *memory_buffer, 86 size_t memory_buffer_size) { 87 linked_ptr<Function> cur_func; 88 int line_number = 0; 89 int num_errors = 0; 90 char *save_ptr; 91 92 // If the length is 0, we can still pretend we have a symbol file. This is 93 // for scenarios that want to test symbol lookup, but don't necessarily care 94 // if certain modules do not have any information, like system libraries. 95 if (memory_buffer_size == 0) { 96 return true; 97 } 98 99 // Make sure the last character is null terminator. 100 size_t last_null_terminator = memory_buffer_size - 1; 101 if (memory_buffer[last_null_terminator] != '\0') { 102 memory_buffer[last_null_terminator] = '\0'; 103 } 104 105 // Skip any null terminators at the end of the memory buffer, and make sure 106 // there are no other null terminators in the middle of the memory buffer. 107 bool has_null_terminator_in_the_middle = false; 108 while (last_null_terminator > 0 && 109 memory_buffer[last_null_terminator - 1] == '\0') { 110 last_null_terminator--; 111 } 112 for (size_t i = 0; i < last_null_terminator; i++) { 113 if (memory_buffer[i] == '\0') { 114 memory_buffer[i] = '_'; 115 has_null_terminator_in_the_middle = true; 116 } 117 } 118 if (has_null_terminator_in_the_middle) { 119 LogParseError( 120 "Null terminator is not expected in the middle of the symbol data", 121 line_number, 122 &num_errors); 123 } 124 125 char *buffer; 126 buffer = strtok_r(memory_buffer, "\r\n", &save_ptr); 127 128 while (buffer != NULL) { 129 ++line_number; 130 131 if (strncmp(buffer, "FILE ", 5) == 0) { 132 if (!ParseFile(buffer)) { 133 LogParseError("ParseFile on buffer failed", line_number, &num_errors); 134 } 135 } else if (strncmp(buffer, "STACK ", 6) == 0) { 136 if (!ParseStackInfo(buffer)) { 137 LogParseError("ParseStackInfo failed", line_number, &num_errors); 138 } 139 } else if (strncmp(buffer, "FUNC ", 5) == 0) { 140 cur_func.reset(ParseFunction(buffer)); 141 if (!cur_func.get()) { 142 LogParseError("ParseFunction failed", line_number, &num_errors); 143 } else { 144 // StoreRange will fail if the function has an invalid address or size. 145 // We'll silently ignore this, the function and any corresponding lines 146 // will be destroyed when cur_func is released. 147 functions_.StoreRange(cur_func->address, cur_func->size, cur_func); 148 } 149 } else if (strncmp(buffer, "PUBLIC ", 7) == 0) { 150 // Clear cur_func: public symbols don't contain line number information. 151 cur_func.reset(); 152 153 if (!ParsePublicSymbol(buffer)) { 154 LogParseError("ParsePublicSymbol failed", line_number, &num_errors); 155 } 156 } else if (strncmp(buffer, "MODULE ", 7) == 0) { 157 // Ignore these. They're not of any use to BasicSourceLineResolver, 158 // which is fed modules by a SymbolSupplier. These lines are present to 159 // aid other tools in properly placing symbol files so that they can 160 // be accessed by a SymbolSupplier. 161 // 162 // MODULE <guid> <age> <filename> 163 } else if (strncmp(buffer, "INFO ", 5) == 0) { 164 // Ignore these as well, they're similarly just for housekeeping. 165 // 166 // INFO CODE_ID <code id> <filename> 167 } else { 168 if (!cur_func.get()) { 169 LogParseError("Found source line data without a function", 170 line_number, &num_errors); 171 } else { 172 Line *line = ParseLine(buffer); 173 if (!line) { 174 LogParseError("ParseLine failed", line_number, &num_errors); 175 } else { 176 cur_func->lines.StoreRange(line->address, line->size, 177 linked_ptr<Line>(line)); 178 } 179 } 180 } 181 if (num_errors > kMaxErrorsBeforeBailing) { 182 break; 183 } 184 buffer = strtok_r(NULL, "\r\n", &save_ptr); 185 } 186 is_corrupt_ = num_errors > 0; 187 return true; 188 } 189 190 void BasicSourceLineResolver::Module::LookupAddress(StackFrame *frame) const { 191 MemAddr address = frame->instruction - frame->module->base_address(); 192 193 // First, look for a FUNC record that covers address. Use 194 // RetrieveNearestRange instead of RetrieveRange so that, if there 195 // is no such function, we can use the next function to bound the 196 // extent of the PUBLIC symbol we find, below. This does mean we 197 // need to check that address indeed falls within the function we 198 // find; do the range comparison in an overflow-friendly way. 199 linked_ptr<Function> func; 200 linked_ptr<PublicSymbol> public_symbol; 201 MemAddr function_base; 202 MemAddr function_size; 203 MemAddr public_address; 204 if (functions_.RetrieveNearestRange(address, &func, 205 &function_base, &function_size) && 206 address >= function_base && address - function_base < function_size) { 207 frame->function_name = func->name; 208 frame->function_base = frame->module->base_address() + function_base; 209 210 linked_ptr<Line> line; 211 MemAddr line_base; 212 if (func->lines.RetrieveRange(address, &line, &line_base, NULL)) { 213 FileMap::const_iterator it = files_.find(line->source_file_id); 214 if (it != files_.end()) { 215 frame->source_file_name = files_.find(line->source_file_id)->second; 216 } 217 frame->source_line = line->line; 218 frame->source_line_base = frame->module->base_address() + line_base; 219 } 220 } else if (public_symbols_.Retrieve(address, 221 &public_symbol, &public_address) && 222 (!func.get() || public_address > function_base)) { 223 frame->function_name = public_symbol->name; 224 frame->function_base = frame->module->base_address() + public_address; 225 } 226 } 227 228 WindowsFrameInfo *BasicSourceLineResolver::Module::FindWindowsFrameInfo( 229 const StackFrame *frame) const { 230 MemAddr address = frame->instruction - frame->module->base_address(); 231 scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo()); 232 233 // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and 234 // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order. 235 // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that 236 // includes its own program string. 237 // WindowsFrameInfo::STACK_INFO_FPO is the older type 238 // corresponding to the FPO_DATA struct. See stackwalker_x86.cc. 239 linked_ptr<WindowsFrameInfo> frame_info; 240 if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA] 241 .RetrieveRange(address, &frame_info)) 242 || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO] 243 .RetrieveRange(address, &frame_info))) { 244 result->CopyFrom(*frame_info.get()); 245 return result.release(); 246 } 247 248 // Even without a relevant STACK line, many functions contain 249 // information about how much space their parameters consume on the 250 // stack. Use RetrieveNearestRange instead of RetrieveRange, so that 251 // we can use the function to bound the extent of the PUBLIC symbol, 252 // below. However, this does mean we need to check that ADDRESS 253 // falls within the retrieved function's range; do the range 254 // comparison in an overflow-friendly way. 255 linked_ptr<Function> function; 256 MemAddr function_base, function_size; 257 if (functions_.RetrieveNearestRange(address, &function, 258 &function_base, &function_size) && 259 address >= function_base && address - function_base < function_size) { 260 result->parameter_size = function->parameter_size; 261 result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE; 262 return result.release(); 263 } 264 265 // PUBLIC symbols might have a parameter size. Use the function we 266 // found above to limit the range the public symbol covers. 267 linked_ptr<PublicSymbol> public_symbol; 268 MemAddr public_address; 269 if (public_symbols_.Retrieve(address, &public_symbol, &public_address) && 270 (!function.get() || public_address > function_base)) { 271 result->parameter_size = public_symbol->parameter_size; 272 } 273 274 return NULL; 275 } 276 277 CFIFrameInfo *BasicSourceLineResolver::Module::FindCFIFrameInfo( 278 const StackFrame *frame) const { 279 MemAddr address = frame->instruction - frame->module->base_address(); 280 MemAddr initial_base, initial_size; 281 string initial_rules; 282 283 // Find the initial rule whose range covers this address. That 284 // provides an initial set of register recovery rules. Then, walk 285 // forward from the initial rule's starting address to frame's 286 // instruction address, applying delta rules. 287 if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules, 288 &initial_base, &initial_size)) { 289 return NULL; 290 } 291 292 // Create a frame info structure, and populate it with the rules from 293 // the STACK CFI INIT record. 294 scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo()); 295 if (!ParseCFIRuleSet(initial_rules, rules.get())) 296 return NULL; 297 298 // Find the first delta rule that falls within the initial rule's range. 299 map<MemAddr, string>::const_iterator delta = 300 cfi_delta_rules_.lower_bound(initial_base); 301 302 // Apply delta rules up to and including the frame's address. 303 while (delta != cfi_delta_rules_.end() && delta->first <= address) { 304 ParseCFIRuleSet(delta->second, rules.get()); 305 delta++; 306 } 307 308 return rules.release(); 309 } 310 311 bool BasicSourceLineResolver::Module::ParseFile(char *file_line) { 312 long index; 313 char *filename; 314 if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) { 315 files_.insert(make_pair(index, string(filename))); 316 return true; 317 } 318 return false; 319 } 320 321 BasicSourceLineResolver::Function* 322 BasicSourceLineResolver::Module::ParseFunction(char *function_line) { 323 uint64_t address; 324 uint64_t size; 325 long stack_param_size; 326 char *name; 327 if (SymbolParseHelper::ParseFunction(function_line, &address, &size, 328 &stack_param_size, &name)) { 329 return new Function(name, address, size, stack_param_size); 330 } 331 return NULL; 332 } 333 334 BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine( 335 char *line_line) { 336 uint64_t address; 337 uint64_t size; 338 long line_number; 339 long source_file; 340 341 if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number, 342 &source_file)) { 343 return new Line(address, size, source_file, line_number); 344 } 345 return NULL; 346 } 347 348 bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) { 349 uint64_t address; 350 long stack_param_size; 351 char *name; 352 353 if (SymbolParseHelper::ParsePublicSymbol(public_line, &address, 354 &stack_param_size, &name)) { 355 // A few public symbols show up with an address of 0. This has been seen 356 // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow, 357 // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict 358 // with one another if they were allowed into the public_symbols_ map, 359 // but since the address is obviously invalid, gracefully accept them 360 // as input without putting them into the map. 361 if (address == 0) { 362 return true; 363 } 364 365 linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address, 366 stack_param_size)); 367 return public_symbols_.Store(address, symbol); 368 } 369 return false; 370 } 371 372 bool BasicSourceLineResolver::Module::ParseStackInfo(char *stack_info_line) { 373 // Skip "STACK " prefix. 374 stack_info_line += 6; 375 376 // Find the token indicating what sort of stack frame walking 377 // information this is. 378 while (*stack_info_line == ' ') 379 stack_info_line++; 380 const char *platform = stack_info_line; 381 while (!strchr(kWhitespace, *stack_info_line)) 382 stack_info_line++; 383 *stack_info_line++ = '\0'; 384 385 // MSVC stack frame info. 386 if (strcmp(platform, "WIN") == 0) { 387 int type = 0; 388 uint64_t rva, code_size; 389 linked_ptr<WindowsFrameInfo> 390 stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line, 391 type, 392 rva, 393 code_size)); 394 if (stack_frame_info == NULL) 395 return false; 396 397 // TODO(mmentovai): I wanted to use StoreRange's return value as this 398 // method's return value, but MSVC infrequently outputs stack info that 399 // violates the containment rules. This happens with a section of code 400 // in strncpy_s in test_app.cc (testdata/minidump2). There, problem looks 401 // like this: 402 // STACK WIN 4 4242 1a a 0 ... (STACK WIN 4 base size prolog 0 ...) 403 // STACK WIN 4 4243 2e 9 0 ... 404 // ContainedRangeMap treats these two blocks as conflicting. In reality, 405 // when the prolog lengths are taken into account, the actual code of 406 // these blocks doesn't conflict. However, we can't take the prolog lengths 407 // into account directly here because we'd wind up with a different set 408 // of range conflicts when MSVC outputs stack info like this: 409 // STACK WIN 4 1040 73 33 0 ... 410 // STACK WIN 4 105a 59 19 0 ... 411 // because in both of these entries, the beginning of the code after the 412 // prolog is at 0x1073, and the last byte of contained code is at 0x10b2. 413 // Perhaps we could get away with storing ranges by rva + prolog_size 414 // if ContainedRangeMap were modified to allow replacement of 415 // already-stored values. 416 417 windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info); 418 return true; 419 } else if (strcmp(platform, "CFI") == 0) { 420 // DWARF CFI stack frame info 421 return ParseCFIFrameInfo(stack_info_line); 422 } else { 423 // Something unrecognized. 424 return false; 425 } 426 } 427 428 bool BasicSourceLineResolver::Module::ParseCFIFrameInfo( 429 char *stack_info_line) { 430 char *cursor; 431 432 // Is this an INIT record or a delta record? 433 char *init_or_address = strtok_r(stack_info_line, " \r\n", &cursor); 434 if (!init_or_address) 435 return false; 436 437 if (strcmp(init_or_address, "INIT") == 0) { 438 // This record has the form "STACK INIT <address> <size> <rules...>". 439 char *address_field = strtok_r(NULL, " \r\n", &cursor); 440 if (!address_field) return false; 441 442 char *size_field = strtok_r(NULL, " \r\n", &cursor); 443 if (!size_field) return false; 444 445 char *initial_rules = strtok_r(NULL, "\r\n", &cursor); 446 if (!initial_rules) return false; 447 448 MemAddr address = strtoul(address_field, NULL, 16); 449 MemAddr size = strtoul(size_field, NULL, 16); 450 cfi_initial_rules_.StoreRange(address, size, initial_rules); 451 return true; 452 } 453 454 // This record has the form "STACK <address> <rules...>". 455 char *address_field = init_or_address; 456 char *delta_rules = strtok_r(NULL, "\r\n", &cursor); 457 if (!delta_rules) return false; 458 MemAddr address = strtoul(address_field, NULL, 16); 459 cfi_delta_rules_[address] = delta_rules; 460 return true; 461 } 462 463 // static 464 bool SymbolParseHelper::ParseFile(char *file_line, long *index, 465 char **filename) { 466 // FILE <id> <filename> 467 assert(strncmp(file_line, "FILE ", 5) == 0); 468 file_line += 5; // skip prefix 469 470 vector<char*> tokens; 471 if (!Tokenize(file_line, kWhitespace, 2, &tokens)) { 472 return false; 473 } 474 475 char *after_number; 476 *index = strtol(tokens[0], &after_number, 10); 477 if (!IsValidAfterNumber(after_number) || *index < 0 || 478 *index == std::numeric_limits<long>::max()) { 479 return false; 480 } 481 482 *filename = tokens[1]; 483 if (!filename) { 484 return false; 485 } 486 487 return true; 488 } 489 490 // static 491 bool SymbolParseHelper::ParseFunction(char *function_line, uint64_t *address, 492 uint64_t *size, long *stack_param_size, 493 char **name) { 494 // FUNC <address> <size> <stack_param_size> <name> 495 assert(strncmp(function_line, "FUNC ", 5) == 0); 496 function_line += 5; // skip prefix 497 498 vector<char*> tokens; 499 if (!Tokenize(function_line, kWhitespace, 4, &tokens)) { 500 return false; 501 } 502 503 char *after_number; 504 *address = strtoull(tokens[0], &after_number, 16); 505 if (!IsValidAfterNumber(after_number) || 506 *address == std::numeric_limits<unsigned long long>::max()) { 507 return false; 508 } 509 *size = strtoull(tokens[1], &after_number, 16); 510 if (!IsValidAfterNumber(after_number) || 511 *size == std::numeric_limits<unsigned long long>::max()) { 512 return false; 513 } 514 *stack_param_size = strtol(tokens[2], &after_number, 16); 515 if (!IsValidAfterNumber(after_number) || 516 *stack_param_size == std::numeric_limits<long>::max() || 517 *stack_param_size < 0) { 518 return false; 519 } 520 *name = tokens[3]; 521 522 return true; 523 } 524 525 // static 526 bool SymbolParseHelper::ParseLine(char *line_line, uint64_t *address, 527 uint64_t *size, long *line_number, 528 long *source_file) { 529 // <address> <size> <line number> <source file id> 530 vector<char*> tokens; 531 if (!Tokenize(line_line, kWhitespace, 4, &tokens)) { 532 return false; 533 } 534 535 char *after_number; 536 *address = strtoull(tokens[0], &after_number, 16); 537 if (!IsValidAfterNumber(after_number) || 538 *address == std::numeric_limits<unsigned long long>::max()) { 539 return false; 540 } 541 *size = strtoull(tokens[1], &after_number, 16); 542 if (!IsValidAfterNumber(after_number) || 543 *size == std::numeric_limits<unsigned long long>::max()) { 544 return false; 545 } 546 *line_number = strtol(tokens[2], &after_number, 10); 547 if (!IsValidAfterNumber(after_number) || 548 *line_number == std::numeric_limits<long>::max()) { 549 return false; 550 } 551 *source_file = strtol(tokens[3], &after_number, 10); 552 if (!IsValidAfterNumber(after_number) || *source_file < 0 || 553 *source_file == std::numeric_limits<long>::max()) { 554 return false; 555 } 556 557 // Valid line numbers normally start from 1, however there are functions that 558 // are associated with a source file but not associated with any line number 559 // (block helper function) and for such functions the symbol file contains 0 560 // for the line numbers. Hence, 0 should be treated as a valid line number. 561 // For more information on block helper functions, please, take a look at: 562 // http://clang.llvm.org/docs/Block-ABI-Apple.html 563 if (*line_number < 0) { 564 return false; 565 } 566 567 return true; 568 } 569 570 // static 571 bool SymbolParseHelper::ParsePublicSymbol(char *public_line, 572 uint64_t *address, 573 long *stack_param_size, 574 char **name) { 575 // PUBLIC <address> <stack_param_size> <name> 576 assert(strncmp(public_line, "PUBLIC ", 7) == 0); 577 public_line += 7; // skip prefix 578 579 vector<char*> tokens; 580 if (!Tokenize(public_line, kWhitespace, 3, &tokens)) { 581 return false; 582 } 583 584 char *after_number; 585 *address = strtoull(tokens[0], &after_number, 16); 586 if (!IsValidAfterNumber(after_number) || 587 *address == std::numeric_limits<unsigned long long>::max()) { 588 return false; 589 } 590 *stack_param_size = strtol(tokens[1], &after_number, 16); 591 if (!IsValidAfterNumber(after_number) || 592 *stack_param_size == std::numeric_limits<long>::max() || 593 *stack_param_size < 0) { 594 return false; 595 } 596 *name = tokens[2]; 597 598 return true; 599 } 600 601 // static 602 bool SymbolParseHelper::IsValidAfterNumber(char *after_number) { 603 if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) { 604 return true; 605 } 606 return false; 607 } 608 609 } // namespace google_breakpad 610