1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "courgette/disassembler_elf_32.h" 6 7 #include <algorithm> 8 #include <string> 9 #include <vector> 10 11 #include "base/basictypes.h" 12 #include "base/logging.h" 13 #include "base/memory/scoped_vector.h" 14 15 #include "courgette/assembly_program.h" 16 #include "courgette/courgette.h" 17 #include "courgette/encoded_program.h" 18 19 namespace courgette { 20 21 DisassemblerElf32::DisassemblerElf32(const void* start, size_t length) 22 : Disassembler(start, length), 23 header_(NULL), 24 section_header_table_(NULL), 25 section_header_table_size_(0), 26 program_header_table_(NULL), 27 program_header_table_size_(0), 28 default_string_section_(NULL) { 29 } 30 31 bool DisassemblerElf32::ParseHeader() { 32 if (length() < sizeof(Elf32_Ehdr)) 33 return Bad("Too small"); 34 35 header_ = (Elf32_Ehdr *)start(); 36 37 // Have magic for elf header? 38 if (header_->e_ident[0] != 0x7f || 39 header_->e_ident[1] != 'E' || 40 header_->e_ident[2] != 'L' || 41 header_->e_ident[3] != 'F') 42 return Bad("No Magic Number"); 43 44 if (header_->e_type != ET_EXEC && 45 header_->e_type != ET_DYN) 46 return Bad("Not an executable file or shared library"); 47 48 if (header_->e_machine != ElfEM()) 49 return Bad("Not a supported architecture"); 50 51 if (header_->e_version != 1) 52 return Bad("Unknown file version"); 53 54 if (header_->e_shentsize != sizeof(Elf32_Shdr)) 55 return Bad("Unexpected section header size"); 56 57 if (header_->e_shoff >= length()) 58 return Bad("Out of bounds section header table offset"); 59 60 section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff); 61 section_header_table_size_ = header_->e_shnum; 62 63 if ((header_->e_shoff + header_->e_shnum ) >= length()) 64 return Bad("Out of bounds section header table"); 65 66 if (header_->e_phoff >= length()) 67 return Bad("Out of bounds program header table offset"); 68 69 program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff); 70 program_header_table_size_ = header_->e_phnum; 71 72 if ((header_->e_phoff + header_->e_phnum) >= length()) 73 return Bad("Out of bounds program header table"); 74 75 default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx); 76 77 ReduceLength(DiscoverLength()); 78 79 return Good(); 80 } 81 82 bool DisassemblerElf32::Disassemble(AssemblyProgram* target) { 83 if (!ok()) 84 return false; 85 86 // The Image Base is always 0 for ELF Executables 87 target->set_image_base(0); 88 89 if (!ParseAbs32Relocs()) 90 return false; 91 92 if (!ParseRel32RelocsFromSections()) 93 return false; 94 95 if (!ParseFile(target)) 96 return false; 97 98 target->DefaultAssignIndexes(); 99 100 return true; 101 } 102 103 uint32 DisassemblerElf32::DiscoverLength() { 104 uint32 result = 0; 105 106 // Find the end of the last section 107 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { 108 const Elf32_Shdr *section_header = SectionHeader(section_id); 109 110 if (section_header->sh_type == SHT_NOBITS) 111 continue; 112 113 uint32 section_end = section_header->sh_offset + section_header->sh_size; 114 115 if (section_end > result) 116 result = section_end; 117 } 118 119 // Find the end of the last segment 120 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { 121 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); 122 123 uint32 segment_end = segment_header->p_offset + segment_header->p_filesz; 124 125 if (segment_end > result) 126 result = segment_end; 127 } 128 129 uint32 section_table_end = header_->e_shoff + 130 (header_->e_shnum * sizeof(Elf32_Shdr)); 131 if (section_table_end > result) 132 result = section_table_end; 133 134 uint32 segment_table_end = header_->e_phoff + 135 (header_->e_phnum * sizeof(Elf32_Phdr)); 136 if (segment_table_end > result) 137 result = segment_table_end; 138 139 return result; 140 } 141 142 CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const { 143 144 // It's valid if it's contained in any program segment 145 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { 146 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i); 147 148 if (segment_header->p_type != PT_LOAD) 149 continue; 150 151 Elf32_Addr begin = segment_header->p_vaddr; 152 Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz; 153 154 if (rva >= begin && rva < end) 155 return true; 156 } 157 158 return false; 159 } 160 161 // Returns RVA for an in memory address, or NULL. 162 CheckBool DisassemblerElf32::RVAToFileOffset(Elf32_Addr addr, 163 size_t* result) const { 164 165 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) { 166 Elf32_Addr begin = ProgramSegmentMemoryBegin(i); 167 Elf32_Addr end = begin + ProgramSegmentMemorySize(i); 168 169 if (addr >= begin && addr < end) { 170 Elf32_Addr offset = addr - begin; 171 172 if (offset < ProgramSegmentFileSize(i)) { 173 *result = ProgramSegmentFileOffset(i) + offset; 174 return true; 175 } 176 } 177 } 178 179 return false; 180 } 181 182 RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const { 183 // File offsets can be 64 bit values, but we are dealing with 32 184 // bit executables and so only need to support 32bit file sizes. 185 uint32 offset32 = (uint32)offset; 186 187 for (int i = 0; i < SectionHeaderCount(); i++) { 188 189 const Elf32_Shdr *section_header = SectionHeader(i); 190 191 // These can appear to have a size in the file, but don't. 192 if (section_header->sh_type == SHT_NOBITS) 193 continue; 194 195 Elf32_Off section_begin = section_header->sh_offset; 196 Elf32_Off section_end = section_begin + section_header->sh_size; 197 198 if (offset32 >= section_begin && offset32 < section_end) { 199 return section_header->sh_addr + (offset32 - section_begin); 200 } 201 } 202 203 return 0; 204 } 205 206 CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas, 207 std::vector<size_t>* offsets) { 208 offsets->clear(); 209 210 for (std::vector<RVA>::iterator rva = rvas->begin(); 211 rva != rvas->end(); 212 rva++) { 213 214 size_t offset; 215 216 if (!RVAToFileOffset(*rva, &offset)) 217 return false; 218 219 offsets->push_back(offset); 220 } 221 222 return true; 223 } 224 225 CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) { 226 for (ScopedVector<TypedRVA>::iterator rva = rvas->begin(); 227 rva != rvas->end(); 228 rva++) { 229 230 size_t offset; 231 232 if (!RVAToFileOffset((*rva)->rva(), &offset)) 233 return false; 234 235 (*rva)->set_offset(offset); 236 } 237 238 return true; 239 } 240 241 CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) { 242 // Walk all the bytes in the file, whether or not in a section. 243 uint32 file_offset = 0; 244 245 std::vector<size_t> abs_offsets; 246 247 if (!RVAsToOffsets(&abs32_locations_, &abs_offsets)) 248 return false; 249 250 if (!RVAsToOffsets(&rel32_locations_)) 251 return false; 252 253 std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin(); 254 ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin(); 255 256 std::vector<size_t>::iterator end_abs_offset = abs_offsets.end(); 257 ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end(); 258 259 for (int section_id = 0; 260 section_id < SectionHeaderCount(); 261 section_id++) { 262 263 const Elf32_Shdr *section_header = SectionHeader(section_id); 264 265 if (!ParseSimpleRegion(file_offset, 266 section_header->sh_offset, 267 program)) 268 return false; 269 file_offset = section_header->sh_offset; 270 271 switch (section_header->sh_type) { 272 case SHT_REL: 273 if (!ParseRelocationSection(section_header, program)) 274 return false; 275 file_offset = section_header->sh_offset + section_header->sh_size; 276 break; 277 case SHT_PROGBITS: 278 if (!ParseProgbitsSection(section_header, 279 ¤t_abs_offset, end_abs_offset, 280 ¤t_rel, end_rel, 281 program)) 282 return false; 283 file_offset = section_header->sh_offset + section_header->sh_size; 284 break; 285 case SHT_NOBITS: 286 // Fall through 287 case SHT_INIT_ARRAY: 288 // Fall through 289 case SHT_FINI_ARRAY: 290 while (current_abs_offset != end_abs_offset && 291 *current_abs_offset >= section_header->sh_offset && 292 *current_abs_offset < 293 (section_header->sh_offset + section_header->sh_size)) { 294 // Skip any abs_offsets appear in the unsupported INIT_ARRAY section 295 VLOG(1) << "Skipping relocation entry for unsupported section: " << 296 section_header->sh_type; 297 current_abs_offset++; 298 } 299 break; 300 default: 301 if (current_abs_offset != end_abs_offset && 302 *current_abs_offset >= section_header->sh_offset && 303 *current_abs_offset < 304 (section_header->sh_offset + section_header->sh_size)) 305 VLOG(1) << "Relocation address in unrecognized ELF section: " << \ 306 section_header->sh_type; 307 break; 308 } 309 } 310 311 // Rest of the file past the last section 312 if (!ParseSimpleRegion(file_offset, 313 length(), 314 program)) 315 return false; 316 317 // Make certain we consume all of the relocations as expected 318 return (current_abs_offset == end_abs_offset); 319 } 320 321 CheckBool DisassemblerElf32::ParseProgbitsSection( 322 const Elf32_Shdr *section_header, 323 std::vector<size_t>::iterator* current_abs_offset, 324 std::vector<size_t>::iterator end_abs_offset, 325 ScopedVector<TypedRVA>::iterator* current_rel, 326 ScopedVector<TypedRVA>::iterator end_rel, 327 AssemblyProgram* program) { 328 329 // Walk all the bytes in the file, whether or not in a section. 330 size_t file_offset = section_header->sh_offset; 331 size_t section_end = section_header->sh_offset + section_header->sh_size; 332 333 Elf32_Addr origin = section_header->sh_addr; 334 size_t origin_offset = section_header->sh_offset; 335 if (!program->EmitOriginInstruction(origin)) 336 return false; 337 338 while (file_offset < section_end) { 339 340 if (*current_abs_offset != end_abs_offset && 341 file_offset > **current_abs_offset) 342 return false; 343 344 while (*current_rel != end_rel && 345 file_offset > (**current_rel)->get_offset()) { 346 (*current_rel)++; 347 } 348 349 size_t next_relocation = section_end; 350 351 if (*current_abs_offset != end_abs_offset && 352 next_relocation > **current_abs_offset) 353 next_relocation = **current_abs_offset; 354 355 // Rel offsets are heuristically derived, and might (incorrectly) overlap 356 // an Abs value, or the end of the section, so +3 to make sure there is 357 // room for the full 4 byte value. 358 if (*current_rel != end_rel && 359 next_relocation > ((**current_rel)->get_offset() + 3)) 360 next_relocation = (**current_rel)->get_offset(); 361 362 if (next_relocation > file_offset) { 363 if (!ParseSimpleRegion(file_offset, next_relocation, program)) 364 return false; 365 366 file_offset = next_relocation; 367 continue; 368 } 369 370 if (*current_abs_offset != end_abs_offset && 371 file_offset == **current_abs_offset) { 372 373 const uint8* p = OffsetToPointer(file_offset); 374 RVA target_rva = Read32LittleEndian(p); 375 376 if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva))) 377 return false; 378 file_offset += sizeof(RVA); 379 (*current_abs_offset)++; 380 continue; 381 } 382 383 if (*current_rel != end_rel && 384 file_offset == (**current_rel)->get_offset()) { 385 386 uint32 relative_target = (**current_rel)->relative_target(); 387 // This cast is for 64 bit systems, and is only safe because we 388 // are working on 32 bit executables. 389 RVA target_rva = (RVA)(origin + (file_offset - origin_offset) + 390 relative_target); 391 392 if (! (**current_rel)->EmitInstruction(program, target_rva)) 393 return false; 394 file_offset += (**current_rel)->op_size(); 395 (*current_rel)++; 396 continue; 397 } 398 } 399 400 // Rest of the section (if any) 401 return ParseSimpleRegion(file_offset, section_end, program); 402 } 403 404 CheckBool DisassemblerElf32::ParseSimpleRegion( 405 size_t start_file_offset, 406 size_t end_file_offset, 407 AssemblyProgram* program) { 408 409 const uint8* start = OffsetToPointer(start_file_offset); 410 const uint8* end = OffsetToPointer(end_file_offset); 411 412 // Callers don't guarantee start < end 413 if (start >= end) return true; 414 415 const ptrdiff_t len = end - start; // Works because vars are byte pointers 416 417 if (!program->EmitBytesInstruction(start, len)) 418 return false; 419 420 return true; 421 } 422 423 CheckBool DisassemblerElf32::ParseAbs32Relocs() { 424 abs32_locations_.clear(); 425 426 // Loop through sections for relocation sections 427 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) { 428 const Elf32_Shdr *section_header = SectionHeader(section_id); 429 430 if (section_header->sh_type == SHT_REL) { 431 432 Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id); 433 434 int relocs_table_count = section_header->sh_size / 435 section_header->sh_entsize; 436 437 // Elf32_Word relocation_section_id = section_header->sh_info; 438 439 // Loop through relocation objects in the relocation section 440 for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) { 441 RVA rva; 442 443 // Quite a few of these conversions fail, and we simply skip 444 // them, that's okay. 445 if (RelToRVA(relocs_table[rel_id], &rva) && CheckSection(rva)) 446 abs32_locations_.push_back(rva); 447 } 448 } 449 } 450 451 std::sort(abs32_locations_.begin(), abs32_locations_.end()); 452 return true; 453 } 454 455 CheckBool DisassemblerElf32::CheckSection(RVA rva) { 456 size_t offset; 457 458 if (!RVAToFileOffset(rva, &offset)) { 459 return false; 460 } 461 462 for (int section_id = 0; 463 section_id < SectionHeaderCount(); 464 section_id++) { 465 466 const Elf32_Shdr *section_header = SectionHeader(section_id); 467 468 if (offset >= section_header->sh_offset && 469 offset < (section_header->sh_offset + section_header->sh_size)) { 470 switch (section_header->sh_type) { 471 case SHT_REL: 472 // Fall-through 473 case SHT_PROGBITS: 474 return true; 475 } 476 } 477 } 478 479 return false; 480 } 481 482 CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() { 483 484 rel32_locations_.clear(); 485 486 // Loop through sections for relocation sections 487 for (int section_id = 0; 488 section_id < SectionHeaderCount(); 489 section_id++) { 490 491 const Elf32_Shdr *section_header = SectionHeader(section_id); 492 493 if (section_header->sh_type != SHT_PROGBITS) 494 continue; 495 496 if (!ParseRel32RelocsFromSection(section_header)) 497 return false; 498 } 499 500 std::sort(rel32_locations_.begin(), 501 rel32_locations_.end(), 502 TypedRVA::IsLessThan); 503 return true; 504 } 505 506 } // namespace courgette 507