1 // Copyright (c) 2010 Google Inc. All Rights Reserved. 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google Inc. nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 // CFI reader author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com> 30 31 // Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit, 32 // and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details. 33 34 #include "common/dwarf/dwarf2reader.h" 35 36 #include <assert.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <string.h> 40 41 #include <map> 42 #include <memory> 43 #include <stack> 44 #include <string> 45 #include <utility> 46 47 #include "common/dwarf/bytereader-inl.h" 48 #include "common/dwarf/bytereader.h" 49 #include "common/dwarf/line_state_machine.h" 50 #include "common/using_std_string.h" 51 52 namespace dwarf2reader { 53 54 CompilationUnit::CompilationUnit(const SectionMap& sections, uint64 offset, 55 ByteReader* reader, Dwarf2Handler* handler) 56 : offset_from_section_start_(offset), reader_(reader), 57 sections_(sections), handler_(handler), abbrevs_(NULL), 58 string_buffer_(NULL), string_buffer_length_(0) {} 59 60 // Read a DWARF2/3 abbreviation section. 61 // Each abbrev consists of a abbreviation number, a tag, a byte 62 // specifying whether the tag has children, and a list of 63 // attribute/form pairs. 64 // The list of forms is terminated by a 0 for the attribute, and a 65 // zero for the form. The entire abbreviation section is terminated 66 // by a zero for the code. 67 68 void CompilationUnit::ReadAbbrevs() { 69 if (abbrevs_) 70 return; 71 72 // First get the debug_abbrev section. ".debug_abbrev" is the name 73 // recommended in the DWARF spec, and used on Linux; 74 // "__debug_abbrev" is the name used in Mac OS X Mach-O files. 75 SectionMap::const_iterator iter = sections_.find(".debug_abbrev"); 76 if (iter == sections_.end()) 77 iter = sections_.find("__debug_abbrev"); 78 assert(iter != sections_.end()); 79 80 abbrevs_ = new std::vector<Abbrev>; 81 abbrevs_->resize(1); 82 83 // The only way to check whether we are reading over the end of the 84 // buffer would be to first compute the size of the leb128 data by 85 // reading it, then go back and read it again. 86 const char* abbrev_start = iter->second.first + 87 header_.abbrev_offset; 88 const char* abbrevptr = abbrev_start; 89 #ifndef NDEBUG 90 const uint64 abbrev_length = iter->second.second - header_.abbrev_offset; 91 #endif 92 93 while (1) { 94 CompilationUnit::Abbrev abbrev; 95 size_t len; 96 const uint64 number = reader_->ReadUnsignedLEB128(abbrevptr, &len); 97 98 if (number == 0) 99 break; 100 abbrev.number = number; 101 abbrevptr += len; 102 103 assert(abbrevptr < abbrev_start + abbrev_length); 104 const uint64 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len); 105 abbrevptr += len; 106 abbrev.tag = static_cast<enum DwarfTag>(tag); 107 108 assert(abbrevptr < abbrev_start + abbrev_length); 109 abbrev.has_children = reader_->ReadOneByte(abbrevptr); 110 abbrevptr += 1; 111 112 assert(abbrevptr < abbrev_start + abbrev_length); 113 114 while (1) { 115 const uint64 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); 116 abbrevptr += len; 117 118 assert(abbrevptr < abbrev_start + abbrev_length); 119 const uint64 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); 120 abbrevptr += len; 121 if (nametemp == 0 && formtemp == 0) 122 break; 123 124 const enum DwarfAttribute name = 125 static_cast<enum DwarfAttribute>(nametemp); 126 const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp); 127 abbrev.attributes.push_back(std::make_pair(name, form)); 128 } 129 assert(abbrev.number == abbrevs_->size()); 130 abbrevs_->push_back(abbrev); 131 } 132 } 133 134 // Skips a single DIE's attributes. 135 const char* CompilationUnit::SkipDIE(const char* start, 136 const Abbrev& abbrev) { 137 for (AttributeList::const_iterator i = abbrev.attributes.begin(); 138 i != abbrev.attributes.end(); 139 i++) { 140 start = SkipAttribute(start, i->second); 141 } 142 return start; 143 } 144 145 // Skips a single attribute form's data. 146 const char* CompilationUnit::SkipAttribute(const char* start, 147 enum DwarfForm form) { 148 size_t len; 149 150 switch (form) { 151 case DW_FORM_indirect: 152 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, 153 &len)); 154 start += len; 155 return SkipAttribute(start, form); 156 157 case DW_FORM_flag_present: 158 return start; 159 case DW_FORM_data1: 160 case DW_FORM_flag: 161 case DW_FORM_ref1: 162 return start + 1; 163 case DW_FORM_ref2: 164 case DW_FORM_data2: 165 return start + 2; 166 case DW_FORM_ref4: 167 case DW_FORM_data4: 168 return start + 4; 169 case DW_FORM_ref8: 170 case DW_FORM_data8: 171 case DW_FORM_ref_sig8: 172 return start + 8; 173 case DW_FORM_string: 174 return start + strlen(start) + 1; 175 case DW_FORM_udata: 176 case DW_FORM_ref_udata: 177 reader_->ReadUnsignedLEB128(start, &len); 178 return start + len; 179 180 case DW_FORM_sdata: 181 reader_->ReadSignedLEB128(start, &len); 182 return start + len; 183 case DW_FORM_addr: 184 return start + reader_->AddressSize(); 185 case DW_FORM_ref_addr: 186 // DWARF2 and 3/4 differ on whether ref_addr is address size or 187 // offset size. 188 assert(header_.version >= 2); 189 if (header_.version == 2) { 190 return start + reader_->AddressSize(); 191 } else if (header_.version >= 3) { 192 return start + reader_->OffsetSize(); 193 } 194 break; 195 196 case DW_FORM_block1: 197 return start + 1 + reader_->ReadOneByte(start); 198 case DW_FORM_block2: 199 return start + 2 + reader_->ReadTwoBytes(start); 200 case DW_FORM_block4: 201 return start + 4 + reader_->ReadFourBytes(start); 202 case DW_FORM_block: 203 case DW_FORM_exprloc: { 204 uint64 size = reader_->ReadUnsignedLEB128(start, &len); 205 return start + size + len; 206 } 207 case DW_FORM_strp: 208 case DW_FORM_sec_offset: 209 return start + reader_->OffsetSize(); 210 } 211 fprintf(stderr,"Unhandled form type"); 212 return NULL; 213 } 214 215 // Read a DWARF2/3 header. 216 // The header is variable length in DWARF3 (and DWARF2 as extended by 217 // most compilers), and consists of an length field, a version number, 218 // the offset in the .debug_abbrev section for our abbrevs, and an 219 // address size. 220 void CompilationUnit::ReadHeader() { 221 const char* headerptr = buffer_; 222 size_t initial_length_size; 223 224 assert(headerptr + 4 < buffer_ + buffer_length_); 225 const uint64 initial_length 226 = reader_->ReadInitialLength(headerptr, &initial_length_size); 227 headerptr += initial_length_size; 228 header_.length = initial_length; 229 230 assert(headerptr + 2 < buffer_ + buffer_length_); 231 header_.version = reader_->ReadTwoBytes(headerptr); 232 headerptr += 2; 233 234 assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_); 235 header_.abbrev_offset = reader_->ReadOffset(headerptr); 236 headerptr += reader_->OffsetSize(); 237 238 assert(headerptr + 1 < buffer_ + buffer_length_); 239 header_.address_size = reader_->ReadOneByte(headerptr); 240 reader_->SetAddressSize(header_.address_size); 241 headerptr += 1; 242 243 after_header_ = headerptr; 244 245 // This check ensures that we don't have to do checking during the 246 // reading of DIEs. header_.length does not include the size of the 247 // initial length. 248 assert(buffer_ + initial_length_size + header_.length <= 249 buffer_ + buffer_length_); 250 } 251 252 uint64 CompilationUnit::Start() { 253 // First get the debug_info section. ".debug_info" is the name 254 // recommended in the DWARF spec, and used on Linux; "__debug_info" 255 // is the name used in Mac OS X Mach-O files. 256 SectionMap::const_iterator iter = sections_.find(".debug_info"); 257 if (iter == sections_.end()) 258 iter = sections_.find("__debug_info"); 259 assert(iter != sections_.end()); 260 261 // Set up our buffer 262 buffer_ = iter->second.first + offset_from_section_start_; 263 buffer_length_ = iter->second.second - offset_from_section_start_; 264 265 // Read the header 266 ReadHeader(); 267 268 // Figure out the real length from the end of the initial length to 269 // the end of the compilation unit, since that is the value we 270 // return. 271 uint64 ourlength = header_.length; 272 if (reader_->OffsetSize() == 8) 273 ourlength += 12; 274 else 275 ourlength += 4; 276 277 // See if the user wants this compilation unit, and if not, just return. 278 if (!handler_->StartCompilationUnit(offset_from_section_start_, 279 reader_->AddressSize(), 280 reader_->OffsetSize(), 281 header_.length, 282 header_.version)) 283 return ourlength; 284 285 // Otherwise, continue by reading our abbreviation entries. 286 ReadAbbrevs(); 287 288 // Set the string section if we have one. ".debug_str" is the name 289 // recommended in the DWARF spec, and used on Linux; "__debug_str" 290 // is the name used in Mac OS X Mach-O files. 291 iter = sections_.find(".debug_str"); 292 if (iter == sections_.end()) 293 iter = sections_.find("__debug_str"); 294 if (iter != sections_.end()) { 295 string_buffer_ = iter->second.first; 296 string_buffer_length_ = iter->second.second; 297 } 298 299 // Now that we have our abbreviations, start processing DIE's. 300 ProcessDIEs(); 301 302 return ourlength; 303 } 304 305 // If one really wanted, you could merge SkipAttribute and 306 // ProcessAttribute 307 // This is all boring data manipulation and calling of the handler. 308 const char* CompilationUnit::ProcessAttribute( 309 uint64 dieoffset, const char* start, enum DwarfAttribute attr, 310 enum DwarfForm form) { 311 size_t len; 312 313 switch (form) { 314 // DW_FORM_indirect is never used because it is such a space 315 // waster. 316 case DW_FORM_indirect: 317 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, 318 &len)); 319 start += len; 320 return ProcessAttribute(dieoffset, start, attr, form); 321 322 case DW_FORM_flag_present: 323 handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 1); 324 return start; 325 case DW_FORM_data1: 326 case DW_FORM_flag: 327 handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 328 reader_->ReadOneByte(start)); 329 return start + 1; 330 case DW_FORM_data2: 331 handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 332 reader_->ReadTwoBytes(start)); 333 return start + 2; 334 case DW_FORM_data4: 335 handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 336 reader_->ReadFourBytes(start)); 337 return start + 4; 338 case DW_FORM_data8: 339 handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 340 reader_->ReadEightBytes(start)); 341 return start + 8; 342 case DW_FORM_string: { 343 const char* str = start; 344 handler_->ProcessAttributeString(dieoffset, attr, form, 345 str); 346 return start + strlen(str) + 1; 347 } 348 case DW_FORM_udata: 349 handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 350 reader_->ReadUnsignedLEB128(start, 351 &len)); 352 return start + len; 353 354 case DW_FORM_sdata: 355 handler_->ProcessAttributeSigned(dieoffset, attr, form, 356 reader_->ReadSignedLEB128(start, &len)); 357 return start + len; 358 case DW_FORM_addr: 359 handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 360 reader_->ReadAddress(start)); 361 return start + reader_->AddressSize(); 362 case DW_FORM_sec_offset: 363 handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 364 reader_->ReadOffset(start)); 365 return start + reader_->OffsetSize(); 366 367 case DW_FORM_ref1: 368 handler_->ProcessAttributeReference(dieoffset, attr, form, 369 reader_->ReadOneByte(start) 370 + offset_from_section_start_); 371 return start + 1; 372 case DW_FORM_ref2: 373 handler_->ProcessAttributeReference(dieoffset, attr, form, 374 reader_->ReadTwoBytes(start) 375 + offset_from_section_start_); 376 return start + 2; 377 case DW_FORM_ref4: 378 handler_->ProcessAttributeReference(dieoffset, attr, form, 379 reader_->ReadFourBytes(start) 380 + offset_from_section_start_); 381 return start + 4; 382 case DW_FORM_ref8: 383 handler_->ProcessAttributeReference(dieoffset, attr, form, 384 reader_->ReadEightBytes(start) 385 + offset_from_section_start_); 386 return start + 8; 387 case DW_FORM_ref_udata: 388 handler_->ProcessAttributeReference(dieoffset, attr, form, 389 reader_->ReadUnsignedLEB128(start, 390 &len) 391 + offset_from_section_start_); 392 return start + len; 393 case DW_FORM_ref_addr: 394 // DWARF2 and 3/4 differ on whether ref_addr is address size or 395 // offset size. 396 assert(header_.version >= 2); 397 if (header_.version == 2) { 398 handler_->ProcessAttributeReference(dieoffset, attr, form, 399 reader_->ReadAddress(start)); 400 return start + reader_->AddressSize(); 401 } else if (header_.version >= 3) { 402 handler_->ProcessAttributeReference(dieoffset, attr, form, 403 reader_->ReadOffset(start)); 404 return start + reader_->OffsetSize(); 405 } 406 break; 407 case DW_FORM_ref_sig8: 408 handler_->ProcessAttributeSignature(dieoffset, attr, form, 409 reader_->ReadEightBytes(start)); 410 return start + 8; 411 412 case DW_FORM_block1: { 413 uint64 datalen = reader_->ReadOneByte(start); 414 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1, 415 datalen); 416 return start + 1 + datalen; 417 } 418 case DW_FORM_block2: { 419 uint64 datalen = reader_->ReadTwoBytes(start); 420 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2, 421 datalen); 422 return start + 2 + datalen; 423 } 424 case DW_FORM_block4: { 425 uint64 datalen = reader_->ReadFourBytes(start); 426 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4, 427 datalen); 428 return start + 4 + datalen; 429 } 430 case DW_FORM_block: 431 case DW_FORM_exprloc: { 432 uint64 datalen = reader_->ReadUnsignedLEB128(start, &len); 433 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len, 434 datalen); 435 return start + datalen + len; 436 } 437 case DW_FORM_strp: { 438 assert(string_buffer_ != NULL); 439 440 const uint64 offset = reader_->ReadOffset(start); 441 assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_); 442 443 const char* str = string_buffer_ + offset; 444 handler_->ProcessAttributeString(dieoffset, attr, form, 445 str); 446 return start + reader_->OffsetSize(); 447 } 448 } 449 fprintf(stderr, "Unhandled form type\n"); 450 return NULL; 451 } 452 453 const char* CompilationUnit::ProcessDIE(uint64 dieoffset, 454 const char* start, 455 const Abbrev& abbrev) { 456 for (AttributeList::const_iterator i = abbrev.attributes.begin(); 457 i != abbrev.attributes.end(); 458 i++) { 459 start = ProcessAttribute(dieoffset, start, i->first, i->second); 460 } 461 return start; 462 } 463 464 void CompilationUnit::ProcessDIEs() { 465 const char* dieptr = after_header_; 466 size_t len; 467 468 // lengthstart is the place the length field is based on. 469 // It is the point in the header after the initial length field 470 const char* lengthstart = buffer_; 471 472 // In 64 bit dwarf, the initial length is 12 bytes, because of the 473 // 0xffffffff at the start. 474 if (reader_->OffsetSize() == 8) 475 lengthstart += 12; 476 else 477 lengthstart += 4; 478 479 std::stack<uint64> die_stack; 480 481 while (dieptr < (lengthstart + header_.length)) { 482 // We give the user the absolute offset from the beginning of 483 // debug_info, since they need it to deal with ref_addr forms. 484 uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_; 485 486 uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len); 487 488 dieptr += len; 489 490 // Abbrev == 0 represents the end of a list of children, or padding 491 // at the end of the compilation unit. 492 if (abbrev_num == 0) { 493 if (die_stack.size() == 0) 494 // If it is padding, then we are done with the compilation unit's DIEs. 495 return; 496 const uint64 offset = die_stack.top(); 497 die_stack.pop(); 498 handler_->EndDIE(offset); 499 continue; 500 } 501 502 const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num)); 503 const enum DwarfTag tag = abbrev.tag; 504 if (!handler_->StartDIE(absolute_offset, tag)) { 505 dieptr = SkipDIE(dieptr, abbrev); 506 } else { 507 dieptr = ProcessDIE(absolute_offset, dieptr, abbrev); 508 } 509 510 if (abbrev.has_children) { 511 die_stack.push(absolute_offset); 512 } else { 513 handler_->EndDIE(absolute_offset); 514 } 515 } 516 } 517 518 LineInfo::LineInfo(const char* buffer, uint64 buffer_length, 519 ByteReader* reader, LineInfoHandler* handler): 520 handler_(handler), reader_(reader), buffer_(buffer), 521 buffer_length_(buffer_length) { 522 header_.std_opcode_lengths = NULL; 523 } 524 525 uint64 LineInfo::Start() { 526 ReadHeader(); 527 ReadLines(); 528 return after_header_ - buffer_; 529 } 530 531 // The header for a debug_line section is mildly complicated, because 532 // the line info is very tightly encoded. 533 void LineInfo::ReadHeader() { 534 const char* lineptr = buffer_; 535 size_t initial_length_size; 536 537 const uint64 initial_length 538 = reader_->ReadInitialLength(lineptr, &initial_length_size); 539 540 lineptr += initial_length_size; 541 header_.total_length = initial_length; 542 assert(buffer_ + initial_length_size + header_.total_length <= 543 buffer_ + buffer_length_); 544 545 // Address size *must* be set by CU ahead of time. 546 assert(reader_->AddressSize() != 0); 547 548 header_.version = reader_->ReadTwoBytes(lineptr); 549 lineptr += 2; 550 551 header_.prologue_length = reader_->ReadOffset(lineptr); 552 lineptr += reader_->OffsetSize(); 553 554 header_.min_insn_length = reader_->ReadOneByte(lineptr); 555 lineptr += 1; 556 557 header_.default_is_stmt = reader_->ReadOneByte(lineptr); 558 lineptr += 1; 559 560 header_.line_base = *reinterpret_cast<const int8*>(lineptr); 561 lineptr += 1; 562 563 header_.line_range = reader_->ReadOneByte(lineptr); 564 lineptr += 1; 565 566 header_.opcode_base = reader_->ReadOneByte(lineptr); 567 lineptr += 1; 568 569 header_.std_opcode_lengths = new std::vector<unsigned char>; 570 header_.std_opcode_lengths->resize(header_.opcode_base + 1); 571 (*header_.std_opcode_lengths)[0] = 0; 572 for (int i = 1; i < header_.opcode_base; i++) { 573 (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr); 574 lineptr += 1; 575 } 576 577 // It is legal for the directory entry table to be empty. 578 if (*lineptr) { 579 uint32 dirindex = 1; 580 while (*lineptr) { 581 const char* dirname = lineptr; 582 handler_->DefineDir(dirname, dirindex); 583 lineptr += strlen(dirname) + 1; 584 dirindex++; 585 } 586 } 587 lineptr++; 588 589 // It is also legal for the file entry table to be empty. 590 if (*lineptr) { 591 uint32 fileindex = 1; 592 size_t len; 593 while (*lineptr) { 594 const char* filename = lineptr; 595 lineptr += strlen(filename) + 1; 596 597 uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len); 598 lineptr += len; 599 600 uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len); 601 lineptr += len; 602 603 uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len); 604 lineptr += len; 605 handler_->DefineFile(filename, fileindex, static_cast<uint32>(dirindex), 606 mod_time, filelength); 607 fileindex++; 608 } 609 } 610 lineptr++; 611 612 after_header_ = lineptr; 613 } 614 615 /* static */ 616 bool LineInfo::ProcessOneOpcode(ByteReader* reader, 617 LineInfoHandler* handler, 618 const struct LineInfoHeader &header, 619 const char* start, 620 struct LineStateMachine* lsm, 621 size_t* len, 622 uintptr pc, 623 bool *lsm_passes_pc) { 624 size_t oplen = 0; 625 size_t templen; 626 uint8 opcode = reader->ReadOneByte(start); 627 oplen++; 628 start++; 629 630 // If the opcode is great than the opcode_base, it is a special 631 // opcode. Most line programs consist mainly of special opcodes. 632 if (opcode >= header.opcode_base) { 633 opcode -= header.opcode_base; 634 const int64 advance_address = (opcode / header.line_range) 635 * header.min_insn_length; 636 const int32 advance_line = (opcode % header.line_range) 637 + header.line_base; 638 639 // Check if the lsm passes "pc". If so, mark it as passed. 640 if (lsm_passes_pc && 641 lsm->address <= pc && pc < lsm->address + advance_address) { 642 *lsm_passes_pc = true; 643 } 644 645 lsm->address += advance_address; 646 lsm->line_num += advance_line; 647 lsm->basic_block = true; 648 *len = oplen; 649 return true; 650 } 651 652 // Otherwise, we have the regular opcodes 653 switch (opcode) { 654 case DW_LNS_copy: { 655 lsm->basic_block = false; 656 *len = oplen; 657 return true; 658 } 659 660 case DW_LNS_advance_pc: { 661 uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen); 662 oplen += templen; 663 664 // Check if the lsm passes "pc". If so, mark it as passed. 665 if (lsm_passes_pc && lsm->address <= pc && 666 pc < lsm->address + header.min_insn_length * advance_address) { 667 *lsm_passes_pc = true; 668 } 669 670 lsm->address += header.min_insn_length * advance_address; 671 } 672 break; 673 case DW_LNS_advance_line: { 674 const int64 advance_line = reader->ReadSignedLEB128(start, &templen); 675 oplen += templen; 676 lsm->line_num += static_cast<int32>(advance_line); 677 678 // With gcc 4.2.1, we can get the line_no here for the first time 679 // since DW_LNS_advance_line is called after DW_LNE_set_address is 680 // called. So we check if the lsm passes "pc" here, not in 681 // DW_LNE_set_address. 682 if (lsm_passes_pc && lsm->address == pc) { 683 *lsm_passes_pc = true; 684 } 685 } 686 break; 687 case DW_LNS_set_file: { 688 const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen); 689 oplen += templen; 690 lsm->file_num = static_cast<uint32>(fileno); 691 } 692 break; 693 case DW_LNS_set_column: { 694 const uint64 colno = reader->ReadUnsignedLEB128(start, &templen); 695 oplen += templen; 696 lsm->column_num = static_cast<uint32>(colno); 697 } 698 break; 699 case DW_LNS_negate_stmt: { 700 lsm->is_stmt = !lsm->is_stmt; 701 } 702 break; 703 case DW_LNS_set_basic_block: { 704 lsm->basic_block = true; 705 } 706 break; 707 case DW_LNS_fixed_advance_pc: { 708 const uint16 advance_address = reader->ReadTwoBytes(start); 709 oplen += 2; 710 711 // Check if the lsm passes "pc". If so, mark it as passed. 712 if (lsm_passes_pc && 713 lsm->address <= pc && pc < lsm->address + advance_address) { 714 *lsm_passes_pc = true; 715 } 716 717 lsm->address += advance_address; 718 } 719 break; 720 case DW_LNS_const_add_pc: { 721 const int64 advance_address = header.min_insn_length 722 * ((255 - header.opcode_base) 723 / header.line_range); 724 725 // Check if the lsm passes "pc". If so, mark it as passed. 726 if (lsm_passes_pc && 727 lsm->address <= pc && pc < lsm->address + advance_address) { 728 *lsm_passes_pc = true; 729 } 730 731 lsm->address += advance_address; 732 } 733 break; 734 case DW_LNS_extended_op: { 735 const uint64 extended_op_len = reader->ReadUnsignedLEB128(start, 736 &templen); 737 start += templen; 738 oplen += templen + extended_op_len; 739 740 const uint64 extended_op = reader->ReadOneByte(start); 741 start++; 742 743 switch (extended_op) { 744 case DW_LNE_end_sequence: { 745 lsm->end_sequence = true; 746 *len = oplen; 747 return true; 748 } 749 break; 750 case DW_LNE_set_address: { 751 // With gcc 4.2.1, we cannot tell the line_no here since 752 // DW_LNE_set_address is called before DW_LNS_advance_line is 753 // called. So we do not check if the lsm passes "pc" here. See 754 // also the comment in DW_LNS_advance_line. 755 uint64 address = reader->ReadAddress(start); 756 lsm->address = address; 757 } 758 break; 759 case DW_LNE_define_file: { 760 const char* filename = start; 761 762 templen = strlen(filename) + 1; 763 start += templen; 764 765 uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen); 766 oplen += templen; 767 768 const uint64 mod_time = reader->ReadUnsignedLEB128(start, 769 &templen); 770 oplen += templen; 771 772 const uint64 filelength = reader->ReadUnsignedLEB128(start, 773 &templen); 774 oplen += templen; 775 776 if (handler) { 777 handler->DefineFile(filename, -1, static_cast<uint32>(dirindex), 778 mod_time, filelength); 779 } 780 } 781 break; 782 } 783 } 784 break; 785 786 default: { 787 // Ignore unknown opcode silently 788 if (header.std_opcode_lengths) { 789 for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) { 790 reader->ReadUnsignedLEB128(start, &templen); 791 start += templen; 792 oplen += templen; 793 } 794 } 795 } 796 break; 797 } 798 *len = oplen; 799 return false; 800 } 801 802 void LineInfo::ReadLines() { 803 struct LineStateMachine lsm; 804 805 // lengthstart is the place the length field is based on. 806 // It is the point in the header after the initial length field 807 const char* lengthstart = buffer_; 808 809 // In 64 bit dwarf, the initial length is 12 bytes, because of the 810 // 0xffffffff at the start. 811 if (reader_->OffsetSize() == 8) 812 lengthstart += 12; 813 else 814 lengthstart += 4; 815 816 const char* lineptr = after_header_; 817 lsm.Reset(header_.default_is_stmt); 818 819 // The LineInfoHandler interface expects each line's length along 820 // with its address, but DWARF only provides addresses (sans 821 // length), and an end-of-sequence address; one infers the length 822 // from the next address. So we report a line only when we get the 823 // next line's address, or the end-of-sequence address. 824 bool have_pending_line = false; 825 uint64 pending_address = 0; 826 uint32 pending_file_num = 0, pending_line_num = 0, pending_column_num = 0; 827 828 while (lineptr < lengthstart + header_.total_length) { 829 size_t oplength; 830 bool add_row = ProcessOneOpcode(reader_, handler_, header_, 831 lineptr, &lsm, &oplength, (uintptr)-1, 832 NULL); 833 if (add_row) { 834 if (have_pending_line) 835 handler_->AddLine(pending_address, lsm.address - pending_address, 836 pending_file_num, pending_line_num, 837 pending_column_num); 838 if (lsm.end_sequence) { 839 lsm.Reset(header_.default_is_stmt); 840 have_pending_line = false; 841 } else { 842 pending_address = lsm.address; 843 pending_file_num = lsm.file_num; 844 pending_line_num = lsm.line_num; 845 pending_column_num = lsm.column_num; 846 have_pending_line = true; 847 } 848 } 849 lineptr += oplength; 850 } 851 852 after_header_ = lengthstart + header_.total_length; 853 } 854 855 // A DWARF rule for recovering the address or value of a register, or 856 // computing the canonical frame address. There is one subclass of this for 857 // each '*Rule' member function in CallFrameInfo::Handler. 858 // 859 // It's annoying that we have to handle Rules using pointers (because 860 // the concrete instances can have an arbitrary size). They're small, 861 // so it would be much nicer if we could just handle them by value 862 // instead of fretting about ownership and destruction. 863 // 864 // It seems like all these could simply be instances of std::tr1::bind, 865 // except that we need instances to be EqualityComparable, too. 866 // 867 // This could logically be nested within State, but then the qualified names 868 // get horrendous. 869 class CallFrameInfo::Rule { 870 public: 871 virtual ~Rule() { } 872 873 // Tell HANDLER that, at ADDRESS in the program, REG can be recovered using 874 // this rule. If REG is kCFARegister, then this rule describes how to compute 875 // the canonical frame address. Return what the HANDLER member function 876 // returned. 877 virtual bool Handle(Handler *handler, 878 uint64 address, int reg) const = 0; 879 880 // Equality on rules. We use these to decide which rules we need 881 // to report after a DW_CFA_restore_state instruction. 882 virtual bool operator==(const Rule &rhs) const = 0; 883 884 bool operator!=(const Rule &rhs) const { return ! (*this == rhs); } 885 886 // Return a pointer to a copy of this rule. 887 virtual Rule *Copy() const = 0; 888 889 // If this is a base+offset rule, change its base register to REG. 890 // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.) 891 virtual void SetBaseRegister(unsigned reg) { } 892 893 // If this is a base+offset rule, change its offset to OFFSET. Otherwise, 894 // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.) 895 virtual void SetOffset(long long offset) { } 896 }; 897 898 // Rule: the value the register had in the caller cannot be recovered. 899 class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule { 900 public: 901 UndefinedRule() { } 902 ~UndefinedRule() { } 903 bool Handle(Handler *handler, uint64 address, int reg) const { 904 return handler->UndefinedRule(address, reg); 905 } 906 bool operator==(const Rule &rhs) const { 907 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has 908 // been carefully considered; cheap RTTI-like workarounds are forbidden. 909 const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs); 910 return (our_rhs != NULL); 911 } 912 Rule *Copy() const { return new UndefinedRule(*this); } 913 }; 914 915 // Rule: the register's value is the same as that it had in the caller. 916 class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule { 917 public: 918 SameValueRule() { } 919 ~SameValueRule() { } 920 bool Handle(Handler *handler, uint64 address, int reg) const { 921 return handler->SameValueRule(address, reg); 922 } 923 bool operator==(const Rule &rhs) const { 924 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has 925 // been carefully considered; cheap RTTI-like workarounds are forbidden. 926 const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs); 927 return (our_rhs != NULL); 928 } 929 Rule *Copy() const { return new SameValueRule(*this); } 930 }; 931 932 // Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER 933 // may be CallFrameInfo::Handler::kCFARegister. 934 class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule { 935 public: 936 OffsetRule(int base_register, long offset) 937 : base_register_(base_register), offset_(offset) { } 938 ~OffsetRule() { } 939 bool Handle(Handler *handler, uint64 address, int reg) const { 940 return handler->OffsetRule(address, reg, base_register_, offset_); 941 } 942 bool operator==(const Rule &rhs) const { 943 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has 944 // been carefully considered; cheap RTTI-like workarounds are forbidden. 945 const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs); 946 return (our_rhs && 947 base_register_ == our_rhs->base_register_ && 948 offset_ == our_rhs->offset_); 949 } 950 Rule *Copy() const { return new OffsetRule(*this); } 951 // We don't actually need SetBaseRegister or SetOffset here, since they 952 // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it 953 // doesn't make sense to use OffsetRule for computing the CFA: it 954 // computes the address at which a register is saved, not a value. 955 private: 956 int base_register_; 957 long offset_; 958 }; 959 960 // Rule: the value the register had in the caller is the value of 961 // BASE_REGISTER plus offset. BASE_REGISTER may be 962 // CallFrameInfo::Handler::kCFARegister. 963 class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule { 964 public: 965 ValOffsetRule(int base_register, long offset) 966 : base_register_(base_register), offset_(offset) { } 967 ~ValOffsetRule() { } 968 bool Handle(Handler *handler, uint64 address, int reg) const { 969 return handler->ValOffsetRule(address, reg, base_register_, offset_); 970 } 971 bool operator==(const Rule &rhs) const { 972 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has 973 // been carefully considered; cheap RTTI-like workarounds are forbidden. 974 const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs); 975 return (our_rhs && 976 base_register_ == our_rhs->base_register_ && 977 offset_ == our_rhs->offset_); 978 } 979 Rule *Copy() const { return new ValOffsetRule(*this); } 980 void SetBaseRegister(unsigned reg) { base_register_ = reg; } 981 void SetOffset(long long offset) { offset_ = offset; } 982 private: 983 int base_register_; 984 long offset_; 985 }; 986 987 // Rule: the register has been saved in another register REGISTER_NUMBER_. 988 class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule { 989 public: 990 explicit RegisterRule(int register_number) 991 : register_number_(register_number) { } 992 ~RegisterRule() { } 993 bool Handle(Handler *handler, uint64 address, int reg) const { 994 return handler->RegisterRule(address, reg, register_number_); 995 } 996 bool operator==(const Rule &rhs) const { 997 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has 998 // been carefully considered; cheap RTTI-like workarounds are forbidden. 999 const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs); 1000 return (our_rhs && register_number_ == our_rhs->register_number_); 1001 } 1002 Rule *Copy() const { return new RegisterRule(*this); } 1003 private: 1004 int register_number_; 1005 }; 1006 1007 // Rule: EXPRESSION evaluates to the address at which the register is saved. 1008 class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule { 1009 public: 1010 explicit ExpressionRule(const string &expression) 1011 : expression_(expression) { } 1012 ~ExpressionRule() { } 1013 bool Handle(Handler *handler, uint64 address, int reg) const { 1014 return handler->ExpressionRule(address, reg, expression_); 1015 } 1016 bool operator==(const Rule &rhs) const { 1017 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has 1018 // been carefully considered; cheap RTTI-like workarounds are forbidden. 1019 const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs); 1020 return (our_rhs && expression_ == our_rhs->expression_); 1021 } 1022 Rule *Copy() const { return new ExpressionRule(*this); } 1023 private: 1024 string expression_; 1025 }; 1026 1027 // Rule: EXPRESSION evaluates to the address at which the register is saved. 1028 class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule { 1029 public: 1030 explicit ValExpressionRule(const string &expression) 1031 : expression_(expression) { } 1032 ~ValExpressionRule() { } 1033 bool Handle(Handler *handler, uint64 address, int reg) const { 1034 return handler->ValExpressionRule(address, reg, expression_); 1035 } 1036 bool operator==(const Rule &rhs) const { 1037 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has 1038 // been carefully considered; cheap RTTI-like workarounds are forbidden. 1039 const ValExpressionRule *our_rhs = 1040 dynamic_cast<const ValExpressionRule *>(&rhs); 1041 return (our_rhs && expression_ == our_rhs->expression_); 1042 } 1043 Rule *Copy() const { return new ValExpressionRule(*this); } 1044 private: 1045 string expression_; 1046 }; 1047 1048 // A map from register numbers to rules. 1049 class CallFrameInfo::RuleMap { 1050 public: 1051 RuleMap() : cfa_rule_(NULL) { } 1052 RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; } 1053 ~RuleMap() { Clear(); } 1054 1055 RuleMap &operator=(const RuleMap &rhs); 1056 1057 // Set the rule for computing the CFA to RULE. Take ownership of RULE. 1058 void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; } 1059 1060 // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains 1061 // ownership of the rule. We use this for DW_CFA_def_cfa_offset and 1062 // DW_CFA_def_cfa_register, and for detecting references to the CFA before 1063 // a rule for it has been established. 1064 Rule *CFARule() const { return cfa_rule_; } 1065 1066 // Return the rule for REG, or NULL if there is none. The caller takes 1067 // ownership of the result. 1068 Rule *RegisterRule(int reg) const; 1069 1070 // Set the rule for computing REG to RULE. Take ownership of RULE. 1071 void SetRegisterRule(int reg, Rule *rule); 1072 1073 // Make all the appropriate calls to HANDLER as if we were changing from 1074 // this RuleMap to NEW_RULES at ADDRESS. We use this to implement 1075 // DW_CFA_restore_state, where lots of rules can change simultaneously. 1076 // Return true if all handlers returned true; otherwise, return false. 1077 bool HandleTransitionTo(Handler *handler, uint64 address, 1078 const RuleMap &new_rules) const; 1079 1080 private: 1081 // A map from register numbers to Rules. 1082 typedef std::map<int, Rule *> RuleByNumber; 1083 1084 // Remove all register rules and clear cfa_rule_. 1085 void Clear(); 1086 1087 // The rule for computing the canonical frame address. This RuleMap owns 1088 // this rule. 1089 Rule *cfa_rule_; 1090 1091 // A map from register numbers to postfix expressions to recover 1092 // their values. This RuleMap owns the Rules the map refers to. 1093 RuleByNumber registers_; 1094 }; 1095 1096 CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) { 1097 Clear(); 1098 // Since each map owns the rules it refers to, assignment must copy them. 1099 if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy(); 1100 for (RuleByNumber::const_iterator it = rhs.registers_.begin(); 1101 it != rhs.registers_.end(); it++) 1102 registers_[it->first] = it->second->Copy(); 1103 return *this; 1104 } 1105 1106 CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const { 1107 assert(reg != Handler::kCFARegister); 1108 RuleByNumber::const_iterator it = registers_.find(reg); 1109 if (it != registers_.end()) 1110 return it->second->Copy(); 1111 else 1112 return NULL; 1113 } 1114 1115 void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) { 1116 assert(reg != Handler::kCFARegister); 1117 assert(rule); 1118 Rule **slot = ®isters_[reg]; 1119 delete *slot; 1120 *slot = rule; 1121 } 1122 1123 bool CallFrameInfo::RuleMap::HandleTransitionTo( 1124 Handler *handler, 1125 uint64 address, 1126 const RuleMap &new_rules) const { 1127 // Transition from cfa_rule_ to new_rules.cfa_rule_. 1128 if (cfa_rule_ && new_rules.cfa_rule_) { 1129 if (*cfa_rule_ != *new_rules.cfa_rule_ && 1130 !new_rules.cfa_rule_->Handle(handler, address, 1131 Handler::kCFARegister)) 1132 return false; 1133 } else if (cfa_rule_) { 1134 // this RuleMap has a CFA rule but new_rules doesn't. 1135 // CallFrameInfo::Handler has no way to handle this --- and shouldn't; 1136 // it's garbage input. The instruction interpreter should have 1137 // detected this and warned, so take no action here. 1138 } else if (new_rules.cfa_rule_) { 1139 // This shouldn't be possible: NEW_RULES is some prior state, and 1140 // there's no way to remove entries. 1141 assert(0); 1142 } else { 1143 // Both CFA rules are empty. No action needed. 1144 } 1145 1146 // Traverse the two maps in order by register number, and report 1147 // whatever differences we find. 1148 RuleByNumber::const_iterator old_it = registers_.begin(); 1149 RuleByNumber::const_iterator new_it = new_rules.registers_.begin(); 1150 while (old_it != registers_.end() && new_it != new_rules.registers_.end()) { 1151 if (old_it->first < new_it->first) { 1152 // This RuleMap has an entry for old_it->first, but NEW_RULES 1153 // doesn't. 1154 // 1155 // This isn't really the right thing to do, but since CFI generally 1156 // only mentions callee-saves registers, and GCC's convention for 1157 // callee-saves registers is that they are unchanged, it's a good 1158 // approximation. 1159 if (!handler->SameValueRule(address, old_it->first)) 1160 return false; 1161 old_it++; 1162 } else if (old_it->first > new_it->first) { 1163 // NEW_RULES has entry for new_it->first, but this RuleMap 1164 // doesn't. This shouldn't be possible: NEW_RULES is some prior 1165 // state, and there's no way to remove entries. 1166 assert(0); 1167 } else { 1168 // Both maps have an entry for this register. Report the new 1169 // rule if it is different. 1170 if (*old_it->second != *new_it->second && 1171 !new_it->second->Handle(handler, address, new_it->first)) 1172 return false; 1173 new_it++, old_it++; 1174 } 1175 } 1176 // Finish off entries from this RuleMap with no counterparts in new_rules. 1177 while (old_it != registers_.end()) { 1178 if (!handler->SameValueRule(address, old_it->first)) 1179 return false; 1180 old_it++; 1181 } 1182 // Since we only make transitions from a rule set to some previously 1183 // saved rule set, and we can only add rules to the map, NEW_RULES 1184 // must have fewer rules than *this. 1185 assert(new_it == new_rules.registers_.end()); 1186 1187 return true; 1188 } 1189 1190 // Remove all register rules and clear cfa_rule_. 1191 void CallFrameInfo::RuleMap::Clear() { 1192 delete cfa_rule_; 1193 cfa_rule_ = NULL; 1194 for (RuleByNumber::iterator it = registers_.begin(); 1195 it != registers_.end(); it++) 1196 delete it->second; 1197 registers_.clear(); 1198 } 1199 1200 // The state of the call frame information interpreter as it processes 1201 // instructions from a CIE and FDE. 1202 class CallFrameInfo::State { 1203 public: 1204 // Create a call frame information interpreter state with the given 1205 // reporter, reader, handler, and initial call frame info address. 1206 State(ByteReader *reader, Handler *handler, Reporter *reporter, 1207 uint64 address) 1208 : reader_(reader), handler_(handler), reporter_(reporter), 1209 address_(address), entry_(NULL), cursor_(NULL) { } 1210 1211 // Interpret instructions from CIE, save the resulting rule set for 1212 // DW_CFA_restore instructions, and return true. On error, report 1213 // the problem to reporter_ and return false. 1214 bool InterpretCIE(const CIE &cie); 1215 1216 // Interpret instructions from FDE, and return true. On error, 1217 // report the problem to reporter_ and return false. 1218 bool InterpretFDE(const FDE &fde); 1219 1220 private: 1221 // The operands of a CFI instruction, for ParseOperands. 1222 struct Operands { 1223 unsigned register_number; // A register number. 1224 uint64 offset; // An offset or address. 1225 long signed_offset; // A signed offset. 1226 string expression; // A DWARF expression. 1227 }; 1228 1229 // Parse CFI instruction operands from STATE's instruction stream as 1230 // described by FORMAT. On success, populate OPERANDS with the 1231 // results, and return true. On failure, report the problem and 1232 // return false. 1233 // 1234 // Each character of FORMAT should be one of the following: 1235 // 1236 // 'r' unsigned LEB128 register number (OPERANDS->register_number) 1237 // 'o' unsigned LEB128 offset (OPERANDS->offset) 1238 // 's' signed LEB128 offset (OPERANDS->signed_offset) 1239 // 'a' machine-size address (OPERANDS->offset) 1240 // (If the CIE has a 'z' augmentation string, 'a' uses the 1241 // encoding specified by the 'R' argument.) 1242 // '1' a one-byte offset (OPERANDS->offset) 1243 // '2' a two-byte offset (OPERANDS->offset) 1244 // '4' a four-byte offset (OPERANDS->offset) 1245 // '8' an eight-byte offset (OPERANDS->offset) 1246 // 'e' a DW_FORM_block holding a (OPERANDS->expression) 1247 // DWARF expression 1248 bool ParseOperands(const char *format, Operands *operands); 1249 1250 // Interpret one CFI instruction from STATE's instruction stream, update 1251 // STATE, report any rule changes to handler_, and return true. On 1252 // failure, report the problem and return false. 1253 bool DoInstruction(); 1254 1255 // The following Do* member functions are subroutines of DoInstruction, 1256 // factoring out the actual work of operations that have several 1257 // different encodings. 1258 1259 // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and 1260 // return true. On failure, report and return false. (Used for 1261 // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.) 1262 bool DoDefCFA(unsigned base_register, long offset); 1263 1264 // Change the offset of the CFA rule to OFFSET, and return true. On 1265 // failure, report and return false. (Subroutine for 1266 // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.) 1267 bool DoDefCFAOffset(long offset); 1268 1269 // Specify that REG can be recovered using RULE, and return true. On 1270 // failure, report and return false. 1271 bool DoRule(unsigned reg, Rule *rule); 1272 1273 // Specify that REG can be found at OFFSET from the CFA, and return true. 1274 // On failure, report and return false. (Subroutine for DW_CFA_offset, 1275 // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.) 1276 bool DoOffset(unsigned reg, long offset); 1277 1278 // Specify that the caller's value for REG is the CFA plus OFFSET, 1279 // and return true. On failure, report and return false. (Subroutine 1280 // for DW_CFA_val_offset and DW_CFA_val_offset_sf.) 1281 bool DoValOffset(unsigned reg, long offset); 1282 1283 // Restore REG to the rule established in the CIE, and return true. On 1284 // failure, report and return false. (Subroutine for DW_CFA_restore and 1285 // DW_CFA_restore_extended.) 1286 bool DoRestore(unsigned reg); 1287 1288 // Return the section offset of the instruction at cursor. For use 1289 // in error messages. 1290 uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); } 1291 1292 // Report that entry_ is incomplete, and return false. For brevity. 1293 bool ReportIncomplete() { 1294 reporter_->Incomplete(entry_->offset, entry_->kind); 1295 return false; 1296 } 1297 1298 // For reading multi-byte values with the appropriate endianness. 1299 ByteReader *reader_; 1300 1301 // The handler to which we should report the data we find. 1302 Handler *handler_; 1303 1304 // For reporting problems in the info we're parsing. 1305 Reporter *reporter_; 1306 1307 // The code address to which the next instruction in the stream applies. 1308 uint64 address_; 1309 1310 // The entry whose instructions we are currently processing. This is 1311 // first a CIE, and then an FDE. 1312 const Entry *entry_; 1313 1314 // The next instruction to process. 1315 const char *cursor_; 1316 1317 // The current set of rules. 1318 RuleMap rules_; 1319 1320 // The set of rules established by the CIE, used by DW_CFA_restore 1321 // and DW_CFA_restore_extended. We set this after interpreting the 1322 // CIE's instructions. 1323 RuleMap cie_rules_; 1324 1325 // A stack of saved states, for DW_CFA_remember_state and 1326 // DW_CFA_restore_state. 1327 std::stack<RuleMap> saved_rules_; 1328 }; 1329 1330 bool CallFrameInfo::State::InterpretCIE(const CIE &cie) { 1331 entry_ = &cie; 1332 cursor_ = entry_->instructions; 1333 while (cursor_ < entry_->end) 1334 if (!DoInstruction()) 1335 return false; 1336 // Note the rules established by the CIE, for use by DW_CFA_restore 1337 // and DW_CFA_restore_extended. 1338 cie_rules_ = rules_; 1339 return true; 1340 } 1341 1342 bool CallFrameInfo::State::InterpretFDE(const FDE &fde) { 1343 entry_ = &fde; 1344 cursor_ = entry_->instructions; 1345 while (cursor_ < entry_->end) 1346 if (!DoInstruction()) 1347 return false; 1348 return true; 1349 } 1350 1351 bool CallFrameInfo::State::ParseOperands(const char *format, 1352 Operands *operands) { 1353 size_t len; 1354 const char *operand; 1355 1356 for (operand = format; *operand; operand++) { 1357 size_t bytes_left = entry_->end - cursor_; 1358 switch (*operand) { 1359 case 'r': 1360 operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len); 1361 if (len > bytes_left) return ReportIncomplete(); 1362 cursor_ += len; 1363 break; 1364 1365 case 'o': 1366 operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len); 1367 if (len > bytes_left) return ReportIncomplete(); 1368 cursor_ += len; 1369 break; 1370 1371 case 's': 1372 operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len); 1373 if (len > bytes_left) return ReportIncomplete(); 1374 cursor_ += len; 1375 break; 1376 1377 case 'a': 1378 operands->offset = 1379 reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding, 1380 &len); 1381 if (len > bytes_left) return ReportIncomplete(); 1382 cursor_ += len; 1383 break; 1384 1385 case '1': 1386 if (1 > bytes_left) return ReportIncomplete(); 1387 operands->offset = static_cast<unsigned char>(*cursor_++); 1388 break; 1389 1390 case '2': 1391 if (2 > bytes_left) return ReportIncomplete(); 1392 operands->offset = reader_->ReadTwoBytes(cursor_); 1393 cursor_ += 2; 1394 break; 1395 1396 case '4': 1397 if (4 > bytes_left) return ReportIncomplete(); 1398 operands->offset = reader_->ReadFourBytes(cursor_); 1399 cursor_ += 4; 1400 break; 1401 1402 case '8': 1403 if (8 > bytes_left) return ReportIncomplete(); 1404 operands->offset = reader_->ReadEightBytes(cursor_); 1405 cursor_ += 8; 1406 break; 1407 1408 case 'e': { 1409 size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len); 1410 if (len > bytes_left || expression_length > bytes_left - len) 1411 return ReportIncomplete(); 1412 cursor_ += len; 1413 operands->expression = string(cursor_, expression_length); 1414 cursor_ += expression_length; 1415 break; 1416 } 1417 1418 default: 1419 assert(0); 1420 } 1421 } 1422 1423 return true; 1424 } 1425 1426 bool CallFrameInfo::State::DoInstruction() { 1427 CIE *cie = entry_->cie; 1428 Operands ops; 1429 1430 // Our entry's kind should have been set by now. 1431 assert(entry_->kind != kUnknown); 1432 1433 // We shouldn't have been invoked unless there were more 1434 // instructions to parse. 1435 assert(cursor_ < entry_->end); 1436 1437 unsigned opcode = *cursor_++; 1438 if ((opcode & 0xc0) != 0) { 1439 switch (opcode & 0xc0) { 1440 // Advance the address. 1441 case DW_CFA_advance_loc: { 1442 size_t code_offset = opcode & 0x3f; 1443 address_ += code_offset * cie->code_alignment_factor; 1444 break; 1445 } 1446 1447 // Find a register at an offset from the CFA. 1448 case DW_CFA_offset: 1449 if (!ParseOperands("o", &ops) || 1450 !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor)) 1451 return false; 1452 break; 1453 1454 // Restore the rule established for a register by the CIE. 1455 case DW_CFA_restore: 1456 if (!DoRestore(opcode & 0x3f)) return false; 1457 break; 1458 1459 // The 'if' above should have excluded this possibility. 1460 default: 1461 assert(0); 1462 } 1463 1464 // Return here, so the big switch below won't be indented. 1465 return true; 1466 } 1467 1468 switch (opcode) { 1469 // Set the address. 1470 case DW_CFA_set_loc: 1471 if (!ParseOperands("a", &ops)) return false; 1472 address_ = ops.offset; 1473 break; 1474 1475 // Advance the address. 1476 case DW_CFA_advance_loc1: 1477 if (!ParseOperands("1", &ops)) return false; 1478 address_ += ops.offset * cie->code_alignment_factor; 1479 break; 1480 1481 // Advance the address. 1482 case DW_CFA_advance_loc2: 1483 if (!ParseOperands("2", &ops)) return false; 1484 address_ += ops.offset * cie->code_alignment_factor; 1485 break; 1486 1487 // Advance the address. 1488 case DW_CFA_advance_loc4: 1489 if (!ParseOperands("4", &ops)) return false; 1490 address_ += ops.offset * cie->code_alignment_factor; 1491 break; 1492 1493 // Advance the address. 1494 case DW_CFA_MIPS_advance_loc8: 1495 if (!ParseOperands("8", &ops)) return false; 1496 address_ += ops.offset * cie->code_alignment_factor; 1497 break; 1498 1499 // Compute the CFA by adding an offset to a register. 1500 case DW_CFA_def_cfa: 1501 if (!ParseOperands("ro", &ops) || 1502 !DoDefCFA(ops.register_number, ops.offset)) 1503 return false; 1504 break; 1505 1506 // Compute the CFA by adding an offset to a register. 1507 case DW_CFA_def_cfa_sf: 1508 if (!ParseOperands("rs", &ops) || 1509 !DoDefCFA(ops.register_number, 1510 ops.signed_offset * cie->data_alignment_factor)) 1511 return false; 1512 break; 1513 1514 // Change the base register used to compute the CFA. 1515 case DW_CFA_def_cfa_register: { 1516 if (!ParseOperands("r", &ops)) return false; 1517 Rule *cfa_rule = rules_.CFARule(); 1518 if (!cfa_rule) { 1519 if (!DoDefCFA(ops.register_number, ops.offset)) { 1520 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); 1521 return false; 1522 } 1523 } else { 1524 cfa_rule->SetBaseRegister(ops.register_number); 1525 if (!cfa_rule->Handle(handler_, address_, 1526 Handler::kCFARegister)) 1527 return false; 1528 } 1529 break; 1530 } 1531 1532 // Change the offset used to compute the CFA. 1533 case DW_CFA_def_cfa_offset: 1534 if (!ParseOperands("o", &ops) || 1535 !DoDefCFAOffset(ops.offset)) 1536 return false; 1537 break; 1538 1539 // Change the offset used to compute the CFA. 1540 case DW_CFA_def_cfa_offset_sf: 1541 if (!ParseOperands("s", &ops) || 1542 !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor)) 1543 return false; 1544 break; 1545 1546 // Specify an expression whose value is the CFA. 1547 case DW_CFA_def_cfa_expression: { 1548 if (!ParseOperands("e", &ops)) 1549 return false; 1550 Rule *rule = new ValExpressionRule(ops.expression); 1551 rules_.SetCFARule(rule); 1552 if (!rule->Handle(handler_, address_, 1553 Handler::kCFARegister)) 1554 return false; 1555 break; 1556 } 1557 1558 // The register's value cannot be recovered. 1559 case DW_CFA_undefined: { 1560 if (!ParseOperands("r", &ops) || 1561 !DoRule(ops.register_number, new UndefinedRule())) 1562 return false; 1563 break; 1564 } 1565 1566 // The register's value is unchanged from its value in the caller. 1567 case DW_CFA_same_value: { 1568 if (!ParseOperands("r", &ops) || 1569 !DoRule(ops.register_number, new SameValueRule())) 1570 return false; 1571 break; 1572 } 1573 1574 // Find a register at an offset from the CFA. 1575 case DW_CFA_offset_extended: 1576 if (!ParseOperands("ro", &ops) || 1577 !DoOffset(ops.register_number, 1578 ops.offset * cie->data_alignment_factor)) 1579 return false; 1580 break; 1581 1582 // The register is saved at an offset from the CFA. 1583 case DW_CFA_offset_extended_sf: 1584 if (!ParseOperands("rs", &ops) || 1585 !DoOffset(ops.register_number, 1586 ops.signed_offset * cie->data_alignment_factor)) 1587 return false; 1588 break; 1589 1590 // The register is saved at an offset from the CFA. 1591 case DW_CFA_GNU_negative_offset_extended: 1592 if (!ParseOperands("ro", &ops) || 1593 !DoOffset(ops.register_number, 1594 -ops.offset * cie->data_alignment_factor)) 1595 return false; 1596 break; 1597 1598 // The register's value is the sum of the CFA plus an offset. 1599 case DW_CFA_val_offset: 1600 if (!ParseOperands("ro", &ops) || 1601 !DoValOffset(ops.register_number, 1602 ops.offset * cie->data_alignment_factor)) 1603 return false; 1604 break; 1605 1606 // The register's value is the sum of the CFA plus an offset. 1607 case DW_CFA_val_offset_sf: 1608 if (!ParseOperands("rs", &ops) || 1609 !DoValOffset(ops.register_number, 1610 ops.signed_offset * cie->data_alignment_factor)) 1611 return false; 1612 break; 1613 1614 // The register has been saved in another register. 1615 case DW_CFA_register: { 1616 if (!ParseOperands("ro", &ops) || 1617 !DoRule(ops.register_number, new RegisterRule(ops.offset))) 1618 return false; 1619 break; 1620 } 1621 1622 // An expression yields the address at which the register is saved. 1623 case DW_CFA_expression: { 1624 if (!ParseOperands("re", &ops) || 1625 !DoRule(ops.register_number, new ExpressionRule(ops.expression))) 1626 return false; 1627 break; 1628 } 1629 1630 // An expression yields the caller's value for the register. 1631 case DW_CFA_val_expression: { 1632 if (!ParseOperands("re", &ops) || 1633 !DoRule(ops.register_number, new ValExpressionRule(ops.expression))) 1634 return false; 1635 break; 1636 } 1637 1638 // Restore the rule established for a register by the CIE. 1639 case DW_CFA_restore_extended: 1640 if (!ParseOperands("r", &ops) || 1641 !DoRestore( ops.register_number)) 1642 return false; 1643 break; 1644 1645 // Save the current set of rules on a stack. 1646 case DW_CFA_remember_state: 1647 saved_rules_.push(rules_); 1648 break; 1649 1650 // Pop the current set of rules off the stack. 1651 case DW_CFA_restore_state: { 1652 if (saved_rules_.empty()) { 1653 reporter_->EmptyStateStack(entry_->offset, entry_->kind, 1654 CursorOffset()); 1655 return false; 1656 } 1657 const RuleMap &new_rules = saved_rules_.top(); 1658 if (rules_.CFARule() && !new_rules.CFARule()) { 1659 reporter_->ClearingCFARule(entry_->offset, entry_->kind, 1660 CursorOffset()); 1661 return false; 1662 } 1663 rules_.HandleTransitionTo(handler_, address_, new_rules); 1664 rules_ = new_rules; 1665 saved_rules_.pop(); 1666 break; 1667 } 1668 1669 // No operation. (Padding instruction.) 1670 case DW_CFA_nop: 1671 break; 1672 1673 // A SPARC register window save: Registers 8 through 15 (%o0-%o7) 1674 // are saved in registers 24 through 31 (%i0-%i7), and registers 1675 // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets 1676 // (0-15 * the register size). The register numbers must be 1677 // hard-coded. A GNU extension, and not a pretty one. 1678 case DW_CFA_GNU_window_save: { 1679 // Save %o0-%o7 in %i0-%i7. 1680 for (int i = 8; i < 16; i++) 1681 if (!DoRule(i, new RegisterRule(i + 16))) 1682 return false; 1683 // Save %l0-%l7 and %i0-%i7 at the CFA. 1684 for (int i = 16; i < 32; i++) 1685 // Assume that the byte reader's address size is the same as 1686 // the architecture's register size. !@#%*^ hilarious. 1687 if (!DoRule(i, new OffsetRule(Handler::kCFARegister, 1688 (i - 16) * reader_->AddressSize()))) 1689 return false; 1690 break; 1691 } 1692 1693 // I'm not sure what this is. GDB doesn't use it for unwinding. 1694 case DW_CFA_GNU_args_size: 1695 if (!ParseOperands("o", &ops)) return false; 1696 break; 1697 1698 // An opcode we don't recognize. 1699 default: { 1700 reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset()); 1701 return false; 1702 } 1703 } 1704 1705 return true; 1706 } 1707 1708 bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) { 1709 Rule *rule = new ValOffsetRule(base_register, offset); 1710 rules_.SetCFARule(rule); 1711 return rule->Handle(handler_, address_, 1712 Handler::kCFARegister); 1713 } 1714 1715 bool CallFrameInfo::State::DoDefCFAOffset(long offset) { 1716 Rule *cfa_rule = rules_.CFARule(); 1717 if (!cfa_rule) { 1718 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); 1719 return false; 1720 } 1721 cfa_rule->SetOffset(offset); 1722 return cfa_rule->Handle(handler_, address_, 1723 Handler::kCFARegister); 1724 } 1725 1726 bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) { 1727 rules_.SetRegisterRule(reg, rule); 1728 return rule->Handle(handler_, address_, reg); 1729 } 1730 1731 bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) { 1732 if (!rules_.CFARule()) { 1733 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); 1734 return false; 1735 } 1736 return DoRule(reg, 1737 new OffsetRule(Handler::kCFARegister, offset)); 1738 } 1739 1740 bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) { 1741 if (!rules_.CFARule()) { 1742 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); 1743 return false; 1744 } 1745 return DoRule(reg, 1746 new ValOffsetRule(Handler::kCFARegister, offset)); 1747 } 1748 1749 bool CallFrameInfo::State::DoRestore(unsigned reg) { 1750 // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE. 1751 if (entry_->kind == kCIE) { 1752 reporter_->RestoreInCIE(entry_->offset, CursorOffset()); 1753 return false; 1754 } 1755 Rule *rule = cie_rules_.RegisterRule(reg); 1756 if (!rule) { 1757 // This isn't really the right thing to do, but since CFI generally 1758 // only mentions callee-saves registers, and GCC's convention for 1759 // callee-saves registers is that they are unchanged, it's a good 1760 // approximation. 1761 rule = new SameValueRule(); 1762 } 1763 return DoRule(reg, rule); 1764 } 1765 1766 bool CallFrameInfo::ReadEntryPrologue(const char *cursor, Entry *entry) { 1767 const char *buffer_end = buffer_ + buffer_length_; 1768 1769 // Initialize enough of ENTRY for use in error reporting. 1770 entry->offset = cursor - buffer_; 1771 entry->start = cursor; 1772 entry->kind = kUnknown; 1773 entry->end = NULL; 1774 1775 // Read the initial length. This sets reader_'s offset size. 1776 size_t length_size; 1777 uint64 length = reader_->ReadInitialLength(cursor, &length_size); 1778 if (length_size > size_t(buffer_end - cursor)) 1779 return ReportIncomplete(entry); 1780 cursor += length_size; 1781 1782 // In a .eh_frame section, a length of zero marks the end of the series 1783 // of entries. 1784 if (length == 0 && eh_frame_) { 1785 entry->kind = kTerminator; 1786 entry->end = cursor; 1787 return true; 1788 } 1789 1790 // Validate the length. 1791 if (length > size_t(buffer_end - cursor)) 1792 return ReportIncomplete(entry); 1793 1794 // The length is the number of bytes after the initial length field; 1795 // we have that position handy at this point, so compute the end 1796 // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine, 1797 // and the length didn't fit in a size_t, we would have rejected it 1798 // above.) 1799 entry->end = cursor + length; 1800 1801 // Parse the next field: either the offset of a CIE or a CIE id. 1802 size_t offset_size = reader_->OffsetSize(); 1803 if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry); 1804 entry->id = reader_->ReadOffset(cursor); 1805 1806 // Don't advance cursor past id field yet; in .eh_frame data we need 1807 // the id's position to compute the section offset of an FDE's CIE. 1808 1809 // Now we can decide what kind of entry this is. 1810 if (eh_frame_) { 1811 // In .eh_frame data, an ID of zero marks the entry as a CIE, and 1812 // anything else is an offset from the id field of the FDE to the start 1813 // of the CIE. 1814 if (entry->id == 0) { 1815 entry->kind = kCIE; 1816 } else { 1817 entry->kind = kFDE; 1818 // Turn the offset from the id into an offset from the buffer's start. 1819 entry->id = (cursor - buffer_) - entry->id; 1820 } 1821 } else { 1822 // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the 1823 // offset size for the entry) marks the entry as a CIE, and anything 1824 // else is the offset of the CIE from the beginning of the section. 1825 if (offset_size == 4) 1826 entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; 1827 else { 1828 assert(offset_size == 8); 1829 entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; 1830 } 1831 } 1832 1833 // Now advance cursor past the id. 1834 cursor += offset_size; 1835 1836 // The fields specific to this kind of entry start here. 1837 entry->fields = cursor; 1838 1839 entry->cie = NULL; 1840 1841 return true; 1842 } 1843 1844 bool CallFrameInfo::ReadCIEFields(CIE *cie) { 1845 const char *cursor = cie->fields; 1846 size_t len; 1847 1848 assert(cie->kind == kCIE); 1849 1850 // Prepare for early exit. 1851 cie->version = 0; 1852 cie->augmentation.clear(); 1853 cie->code_alignment_factor = 0; 1854 cie->data_alignment_factor = 0; 1855 cie->return_address_register = 0; 1856 cie->has_z_augmentation = false; 1857 cie->pointer_encoding = DW_EH_PE_absptr; 1858 cie->instructions = 0; 1859 1860 // Parse the version number. 1861 if (cie->end - cursor < 1) 1862 return ReportIncomplete(cie); 1863 cie->version = reader_->ReadOneByte(cursor); 1864 cursor++; 1865 1866 // If we don't recognize the version, we can't parse any more fields of the 1867 // CIE. For DWARF CFI, we handle versions 1 through 3 (there was never a 1868 // version 2 of CFI data). For .eh_frame, we handle versions 1 and 3 as well; 1869 // the difference between those versions seems to be the same as for 1870 // .debug_frame. 1871 if (cie->version < 1 || cie->version > 3) { 1872 reporter_->UnrecognizedVersion(cie->offset, cie->version); 1873 return false; 1874 } 1875 1876 const char *augmentation_start = cursor; 1877 const void *augmentation_end = 1878 memchr(augmentation_start, '\0', cie->end - augmentation_start); 1879 if (! augmentation_end) return ReportIncomplete(cie); 1880 cursor = static_cast<const char *>(augmentation_end); 1881 cie->augmentation = string(augmentation_start, 1882 cursor - augmentation_start); 1883 // Skip the terminating '\0'. 1884 cursor++; 1885 1886 // Is this CFI augmented? 1887 if (!cie->augmentation.empty()) { 1888 // Is it an augmentation we recognize? 1889 if (cie->augmentation[0] == DW_Z_augmentation_start) { 1890 // Linux C++ ABI 'z' augmentation, used for exception handling data. 1891 cie->has_z_augmentation = true; 1892 } else { 1893 // Not an augmentation we recognize. Augmentations can have arbitrary 1894 // effects on the form of rest of the content, so we have to give up. 1895 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); 1896 return false; 1897 } 1898 } 1899 1900 // Parse the code alignment factor. 1901 cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len); 1902 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); 1903 cursor += len; 1904 1905 // Parse the data alignment factor. 1906 cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len); 1907 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); 1908 cursor += len; 1909 1910 // Parse the return address register. This is a ubyte in version 1, and 1911 // a ULEB128 in version 3. 1912 if (cie->version == 1) { 1913 if (cursor >= cie->end) return ReportIncomplete(cie); 1914 cie->return_address_register = uint8(*cursor++); 1915 } else { 1916 cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len); 1917 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); 1918 cursor += len; 1919 } 1920 1921 // If we have a 'z' augmentation string, find the augmentation data and 1922 // use the augmentation string to parse it. 1923 if (cie->has_z_augmentation) { 1924 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len); 1925 if (size_t(cie->end - cursor) < len + data_size) 1926 return ReportIncomplete(cie); 1927 cursor += len; 1928 const char *data = cursor; 1929 cursor += data_size; 1930 const char *data_end = cursor; 1931 1932 cie->has_z_lsda = false; 1933 cie->has_z_personality = false; 1934 cie->has_z_signal_frame = false; 1935 1936 // Walk the augmentation string, and extract values from the 1937 // augmentation data as the string directs. 1938 for (size_t i = 1; i < cie->augmentation.size(); i++) { 1939 switch (cie->augmentation[i]) { 1940 case DW_Z_has_LSDA: 1941 // The CIE's augmentation data holds the language-specific data 1942 // area pointer's encoding, and the FDE's augmentation data holds 1943 // the pointer itself. 1944 cie->has_z_lsda = true; 1945 // Fetch the LSDA encoding from the augmentation data. 1946 if (data >= data_end) return ReportIncomplete(cie); 1947 cie->lsda_encoding = DwarfPointerEncoding(*data++); 1948 if (!reader_->ValidEncoding(cie->lsda_encoding)) { 1949 reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding); 1950 return false; 1951 } 1952 // Don't check if the encoding is usable here --- we haven't 1953 // read the FDE's fields yet, so we're not prepared for 1954 // DW_EH_PE_funcrel, although that's a fine encoding for the 1955 // LSDA to use, since it appears in the FDE. 1956 break; 1957 1958 case DW_Z_has_personality_routine: 1959 // The CIE's augmentation data holds the personality routine 1960 // pointer's encoding, followed by the pointer itself. 1961 cie->has_z_personality = true; 1962 // Fetch the personality routine pointer's encoding from the 1963 // augmentation data. 1964 if (data >= data_end) return ReportIncomplete(cie); 1965 cie->personality_encoding = DwarfPointerEncoding(*data++); 1966 if (!reader_->ValidEncoding(cie->personality_encoding)) { 1967 reporter_->InvalidPointerEncoding(cie->offset, 1968 cie->personality_encoding); 1969 return false; 1970 } 1971 if (!reader_->UsableEncoding(cie->personality_encoding)) { 1972 reporter_->UnusablePointerEncoding(cie->offset, 1973 cie->personality_encoding); 1974 return false; 1975 } 1976 // Fetch the personality routine's pointer itself from the data. 1977 cie->personality_address = 1978 reader_->ReadEncodedPointer(data, cie->personality_encoding, 1979 &len); 1980 if (len > size_t(data_end - data)) 1981 return ReportIncomplete(cie); 1982 data += len; 1983 break; 1984 1985 case DW_Z_has_FDE_address_encoding: 1986 // The CIE's augmentation data holds the pointer encoding to use 1987 // for addresses in the FDE. 1988 if (data >= data_end) return ReportIncomplete(cie); 1989 cie->pointer_encoding = DwarfPointerEncoding(*data++); 1990 if (!reader_->ValidEncoding(cie->pointer_encoding)) { 1991 reporter_->InvalidPointerEncoding(cie->offset, 1992 cie->pointer_encoding); 1993 return false; 1994 } 1995 if (!reader_->UsableEncoding(cie->pointer_encoding)) { 1996 reporter_->UnusablePointerEncoding(cie->offset, 1997 cie->pointer_encoding); 1998 return false; 1999 } 2000 break; 2001 2002 case DW_Z_is_signal_trampoline: 2003 // Frames using this CIE are signal delivery frames. 2004 cie->has_z_signal_frame = true; 2005 break; 2006 2007 default: 2008 // An augmentation we don't recognize. 2009 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); 2010 return false; 2011 } 2012 } 2013 } 2014 2015 // The CIE's instructions start here. 2016 cie->instructions = cursor; 2017 2018 return true; 2019 } 2020 2021 bool CallFrameInfo::ReadFDEFields(FDE *fde) { 2022 const char *cursor = fde->fields; 2023 size_t size; 2024 2025 fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, 2026 &size); 2027 if (size > size_t(fde->end - cursor)) 2028 return ReportIncomplete(fde); 2029 cursor += size; 2030 reader_->SetFunctionBase(fde->address); 2031 2032 // For the length, we strip off the upper nybble of the encoding used for 2033 // the starting address. 2034 DwarfPointerEncoding length_encoding = 2035 DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f); 2036 fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size); 2037 if (size > size_t(fde->end - cursor)) 2038 return ReportIncomplete(fde); 2039 cursor += size; 2040 2041 // If the CIE has a 'z' augmentation string, then augmentation data 2042 // appears here. 2043 if (fde->cie->has_z_augmentation) { 2044 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size); 2045 if (size_t(fde->end - cursor) < size + data_size) 2046 return ReportIncomplete(fde); 2047 cursor += size; 2048 2049 // In the abstract, we should walk the augmentation string, and extract 2050 // items from the FDE's augmentation data as we encounter augmentation 2051 // string characters that specify their presence: the ordering of items 2052 // in the augmentation string determines the arrangement of values in 2053 // the augmentation data. 2054 // 2055 // In practice, there's only ever one value in FDE augmentation data 2056 // that we support --- the LSDA pointer --- and we have to bail if we 2057 // see any unrecognized augmentation string characters. So if there is 2058 // anything here at all, we know what it is, and where it starts. 2059 if (fde->cie->has_z_lsda) { 2060 // Check whether the LSDA's pointer encoding is usable now: only once 2061 // we've parsed the FDE's starting address do we call reader_-> 2062 // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes 2063 // usable. 2064 if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) { 2065 reporter_->UnusablePointerEncoding(fde->cie->offset, 2066 fde->cie->lsda_encoding); 2067 return false; 2068 } 2069 2070 fde->lsda_address = 2071 reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size); 2072 if (size > data_size) 2073 return ReportIncomplete(fde); 2074 // Ideally, we would also complain here if there were unconsumed 2075 // augmentation data. 2076 } 2077 2078 cursor += data_size; 2079 } 2080 2081 // The FDE's instructions start after those. 2082 fde->instructions = cursor; 2083 2084 return true; 2085 } 2086 2087 bool CallFrameInfo::Start() { 2088 const char *buffer_end = buffer_ + buffer_length_; 2089 const char *cursor; 2090 bool all_ok = true; 2091 const char *entry_end; 2092 bool ok; 2093 2094 // Traverse all the entries in buffer_, skipping CIEs and offering 2095 // FDEs to the handler. 2096 for (cursor = buffer_; cursor < buffer_end; 2097 cursor = entry_end, all_ok = all_ok && ok) { 2098 FDE fde; 2099 2100 // Make it easy to skip this entry with 'continue': assume that 2101 // things are not okay until we've checked all the data, and 2102 // prepare the address of the next entry. 2103 ok = false; 2104 2105 // Read the entry's prologue. 2106 if (!ReadEntryPrologue(cursor, &fde)) { 2107 if (!fde.end) { 2108 // If we couldn't even figure out this entry's extent, then we 2109 // must stop processing entries altogether. 2110 all_ok = false; 2111 break; 2112 } 2113 entry_end = fde.end; 2114 continue; 2115 } 2116 2117 // The next iteration picks up after this entry. 2118 entry_end = fde.end; 2119 2120 // Did we see an .eh_frame terminating mark? 2121 if (fde.kind == kTerminator) { 2122 // If there appears to be more data left in the section after the 2123 // terminating mark, warn the user. But this is just a warning; 2124 // we leave all_ok true. 2125 if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset); 2126 break; 2127 } 2128 2129 // In this loop, we skip CIEs. We only parse them fully when we 2130 // parse an FDE that refers to them. This limits our memory 2131 // consumption (beyond the buffer itself) to that needed to 2132 // process the largest single entry. 2133 if (fde.kind != kFDE) { 2134 ok = true; 2135 continue; 2136 } 2137 2138 // Validate the CIE pointer. 2139 if (fde.id > buffer_length_) { 2140 reporter_->CIEPointerOutOfRange(fde.offset, fde.id); 2141 continue; 2142 } 2143 2144 CIE cie; 2145 2146 // Parse this FDE's CIE header. 2147 if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) 2148 continue; 2149 // This had better be an actual CIE. 2150 if (cie.kind != kCIE) { 2151 reporter_->BadCIEId(fde.offset, fde.id); 2152 continue; 2153 } 2154 if (!ReadCIEFields(&cie)) 2155 continue; 2156 2157 // We now have the values that govern both the CIE and the FDE. 2158 cie.cie = &cie; 2159 fde.cie = &cie; 2160 2161 // Parse the FDE's header. 2162 if (!ReadFDEFields(&fde)) 2163 continue; 2164 2165 // Call Entry to ask the consumer if they're interested. 2166 if (!handler_->Entry(fde.offset, fde.address, fde.size, 2167 cie.version, cie.augmentation, 2168 cie.return_address_register)) { 2169 // The handler isn't interested in this entry. That's not an error. 2170 ok = true; 2171 continue; 2172 } 2173 2174 if (cie.has_z_augmentation) { 2175 // Report the personality routine address, if we have one. 2176 if (cie.has_z_personality) { 2177 if (!handler_ 2178 ->PersonalityRoutine(cie.personality_address, 2179 IsIndirectEncoding(cie.personality_encoding))) 2180 continue; 2181 } 2182 2183 // Report the language-specific data area address, if we have one. 2184 if (cie.has_z_lsda) { 2185 if (!handler_ 2186 ->LanguageSpecificDataArea(fde.lsda_address, 2187 IsIndirectEncoding(cie.lsda_encoding))) 2188 continue; 2189 } 2190 2191 // If this is a signal-handling frame, report that. 2192 if (cie.has_z_signal_frame) { 2193 if (!handler_->SignalHandler()) 2194 continue; 2195 } 2196 } 2197 2198 // Interpret the CIE's instructions, and then the FDE's instructions. 2199 State state(reader_, handler_, reporter_, fde.address); 2200 ok = state.InterpretCIE(cie) && state.InterpretFDE(fde); 2201 2202 // Tell the ByteReader that the function start address from the 2203 // FDE header is no longer valid. 2204 reader_->ClearFunctionBase(); 2205 2206 // Report the end of the entry. 2207 handler_->End(); 2208 } 2209 2210 return all_ok; 2211 } 2212 2213 const char *CallFrameInfo::KindName(EntryKind kind) { 2214 if (kind == CallFrameInfo::kUnknown) 2215 return "entry"; 2216 else if (kind == CallFrameInfo::kCIE) 2217 return "common information entry"; 2218 else if (kind == CallFrameInfo::kFDE) 2219 return "frame description entry"; 2220 else { 2221 assert (kind == CallFrameInfo::kTerminator); 2222 return ".eh_frame sequence terminator"; 2223 } 2224 } 2225 2226 bool CallFrameInfo::ReportIncomplete(Entry *entry) { 2227 reporter_->Incomplete(entry->offset, entry->kind); 2228 return false; 2229 } 2230 2231 void CallFrameInfo::Reporter::Incomplete(uint64 offset, 2232 CallFrameInfo::EntryKind kind) { 2233 fprintf(stderr, 2234 "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n", 2235 filename_.c_str(), CallFrameInfo::KindName(kind), offset, 2236 section_.c_str()); 2237 } 2238 2239 void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) { 2240 fprintf(stderr, 2241 "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker" 2242 " before end of section contents\n", 2243 filename_.c_str(), offset, section_.c_str()); 2244 } 2245 2246 void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset, 2247 uint64 cie_offset) { 2248 fprintf(stderr, 2249 "%s: CFI frame description entry at offset 0x%llx in '%s':" 2250 " CIE pointer is out of range: 0x%llx\n", 2251 filename_.c_str(), offset, section_.c_str(), cie_offset); 2252 } 2253 2254 void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) { 2255 fprintf(stderr, 2256 "%s: CFI frame description entry at offset 0x%llx in '%s':" 2257 " CIE pointer does not point to a CIE: 0x%llx\n", 2258 filename_.c_str(), offset, section_.c_str(), cie_offset); 2259 } 2260 2261 void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) { 2262 fprintf(stderr, 2263 "%s: CFI frame description entry at offset 0x%llx in '%s':" 2264 " CIE specifies unrecognized version: %d\n", 2265 filename_.c_str(), offset, section_.c_str(), version); 2266 } 2267 2268 void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset, 2269 const string &aug) { 2270 fprintf(stderr, 2271 "%s: CFI frame description entry at offset 0x%llx in '%s':" 2272 " CIE specifies unrecognized augmentation: '%s'\n", 2273 filename_.c_str(), offset, section_.c_str(), aug.c_str()); 2274 } 2275 2276 void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset, 2277 uint8 encoding) { 2278 fprintf(stderr, 2279 "%s: CFI common information entry at offset 0x%llx in '%s':" 2280 " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n", 2281 filename_.c_str(), offset, section_.c_str(), encoding); 2282 } 2283 2284 void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset, 2285 uint8 encoding) { 2286 fprintf(stderr, 2287 "%s: CFI common information entry at offset 0x%llx in '%s':" 2288 " 'z' augmentation specifies a pointer encoding for which" 2289 " we have no base address: 0x%02x\n", 2290 filename_.c_str(), offset, section_.c_str(), encoding); 2291 } 2292 2293 void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) { 2294 fprintf(stderr, 2295 "%s: CFI common information entry at offset 0x%llx in '%s':" 2296 " the DW_CFA_restore instruction at offset 0x%llx" 2297 " cannot be used in a common information entry\n", 2298 filename_.c_str(), offset, section_.c_str(), insn_offset); 2299 } 2300 2301 void CallFrameInfo::Reporter::BadInstruction(uint64 offset, 2302 CallFrameInfo::EntryKind kind, 2303 uint64 insn_offset) { 2304 fprintf(stderr, 2305 "%s: CFI %s at offset 0x%llx in section '%s':" 2306 " the instruction at offset 0x%llx is unrecognized\n", 2307 filename_.c_str(), CallFrameInfo::KindName(kind), 2308 offset, section_.c_str(), insn_offset); 2309 } 2310 2311 void CallFrameInfo::Reporter::NoCFARule(uint64 offset, 2312 CallFrameInfo::EntryKind kind, 2313 uint64 insn_offset) { 2314 fprintf(stderr, 2315 "%s: CFI %s at offset 0x%llx in section '%s':" 2316 " the instruction at offset 0x%llx assumes that a CFA rule has" 2317 " been set, but none has been set\n", 2318 filename_.c_str(), CallFrameInfo::KindName(kind), offset, 2319 section_.c_str(), insn_offset); 2320 } 2321 2322 void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset, 2323 CallFrameInfo::EntryKind kind, 2324 uint64 insn_offset) { 2325 fprintf(stderr, 2326 "%s: CFI %s at offset 0x%llx in section '%s':" 2327 " the DW_CFA_restore_state instruction at offset 0x%llx" 2328 " should pop a saved state from the stack, but the stack is empty\n", 2329 filename_.c_str(), CallFrameInfo::KindName(kind), offset, 2330 section_.c_str(), insn_offset); 2331 } 2332 2333 void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset, 2334 CallFrameInfo::EntryKind kind, 2335 uint64 insn_offset) { 2336 fprintf(stderr, 2337 "%s: CFI %s at offset 0x%llx in section '%s':" 2338 " the DW_CFA_restore_state instruction at offset 0x%llx" 2339 " would clear the CFA rule in effect\n", 2340 filename_.c_str(), CallFrameInfo::KindName(kind), offset, 2341 section_.c_str(), insn_offset); 2342 } 2343 2344 } // namespace dwarf2reader 2345