1 /* Sniff out modules from ELF headers visible in memory segments. 2 Copyright (C) 2008 Red Hat, Inc. 3 This file is part of Red Hat elfutils. 4 5 Red Hat elfutils is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by the 7 Free Software Foundation; version 2 of the License. 8 9 Red Hat elfutils is distributed in the hope that it will be useful, but 10 WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 General Public License for more details. 13 14 You should have received a copy of the GNU General Public License along 15 with Red Hat elfutils; if not, write to the Free Software Foundation, 16 Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA. 17 18 In addition, as a special exception, Red Hat, Inc. gives You the 19 additional right to link the code of Red Hat elfutils with code licensed 20 under any Open Source Initiative certified open source license 21 (http://www.opensource.org/licenses/index.php) which requires the 22 distribution of source code with any binary distribution and to 23 distribute linked combinations of the two. Non-GPL Code permitted under 24 this exception must only link to the code of Red Hat elfutils through 25 those well defined interfaces identified in the file named EXCEPTION 26 found in the source code files (the "Approved Interfaces"). The files 27 of Non-GPL Code may instantiate templates or use macros or inline 28 functions from the Approved Interfaces without causing the resulting 29 work to be covered by the GNU General Public License. Only Red Hat, 30 Inc. may make changes or additions to the list of Approved Interfaces. 31 Red Hat's grant of this exception is conditioned upon your not adding 32 any new exceptions. If you wish to add a new Approved Interface or 33 exception, please contact Red Hat. You must obey the GNU General Public 34 License in all respects for all of the Red Hat elfutils code and other 35 code used in conjunction with Red Hat elfutils except the Non-GPL Code 36 covered by this exception. If you modify this file, you may extend this 37 exception to your version of the file, but you are not obligated to do 38 so. If you do not wish to provide this exception without modification, 39 you must delete this exception statement from your version and license 40 this file solely under the GPL without exception. 41 42 Red Hat elfutils is an included package of the Open Invention Network. 43 An included package of the Open Invention Network is a package for which 44 Open Invention Network licensees cross-license their patents. No patent 45 license is granted, either expressly or impliedly, by designation as an 46 included package. Should you wish to participate in the Open Invention 47 Network licensing program, please visit www.openinventionnetwork.com 48 <http://www.openinventionnetwork.com>. */ 49 50 #include <config.h> 51 #include "../libelf/libelfP.h" /* For NOTE_ALIGN. */ 52 #undef _ 53 #include "libdwflP.h" 54 55 #include <elf.h> 56 #include <gelf.h> 57 #include <inttypes.h> 58 #include <sys/param.h> 59 #include <alloca.h> 60 #include <endian.h> 61 62 63 /* A good size for the initial read from memory, if it's not too costly. 64 This more than covers the phdrs and note segment in the average 64-bit 65 binary. */ 66 67 #define INITIAL_READ 1024 68 69 #if __BYTE_ORDER == __LITTLE_ENDIAN 70 # define MY_ELFDATA ELFDATA2LSB 71 #else 72 # define MY_ELFDATA ELFDATA2MSB 73 #endif 74 75 76 /* Return user segment index closest to ADDR but not above it. */ 77 static int 78 addr_segndx (Dwfl *dwfl, size_t segment, GElf_Addr addr) 79 { 80 int ndx = dwfl->lookup_segndx[segment]; 81 do 82 { 83 if (dwfl->lookup_segndx[segment] >= 0) 84 ndx = dwfl->lookup_segndx[segment]; 85 ++segment; 86 } 87 while (segment < dwfl->lookup_elts - 1 88 && dwfl->lookup_addr[segment] < addr); 89 90 while (dwfl->lookup_segndx[segment] < 0 91 && segment < dwfl->lookup_elts - 1) 92 ++segment; 93 94 if (dwfl->lookup_segndx[segment] >= 0) 95 ndx = dwfl->lookup_segndx[segment]; 96 97 return ndx; 98 } 99 100 int 101 dwfl_segment_report_module (Dwfl *dwfl, int ndx, const char *name, 102 Dwfl_Memory_Callback *memory_callback, 103 void *memory_callback_arg, 104 Dwfl_Module_Callback *read_eagerly, 105 void *read_eagerly_arg) 106 { 107 size_t segment = ndx; 108 109 if (segment >= dwfl->lookup_elts) 110 segment = dwfl->lookup_elts - 1; 111 112 while (segment > 0 && dwfl->lookup_segndx[segment] > ndx) 113 --segment; 114 115 while (dwfl->lookup_segndx[segment] < ndx) 116 if (++segment == dwfl->lookup_elts) 117 return 0; 118 119 GElf_Addr start = dwfl->lookup_addr[segment]; 120 121 inline bool segment_read (int segndx, 122 void **buffer, size_t *buffer_available, 123 GElf_Addr addr, size_t minread) 124 { 125 return ! (*memory_callback) (dwfl, segndx, buffer, buffer_available, 126 addr, minread, memory_callback_arg); 127 } 128 129 inline void release_buffer (void **buffer, size_t *buffer_available) 130 { 131 if (*buffer != NULL) 132 (void) segment_read (-1, buffer, buffer_available, 0, 0); 133 } 134 135 /* First read in the file header and check its sanity. */ 136 137 void *buffer = NULL; 138 size_t buffer_available = INITIAL_READ; 139 140 inline int finish (void) 141 { 142 release_buffer (&buffer, &buffer_available); 143 return ndx; 144 } 145 146 if (segment_read (ndx, &buffer, &buffer_available, 147 start, sizeof (Elf64_Ehdr)) 148 || memcmp (buffer, ELFMAG, SELFMAG) != 0) 149 return finish (); 150 151 inline bool read_portion (void **data, size_t *data_size, 152 GElf_Addr vaddr, size_t filesz) 153 { 154 if (vaddr - start + filesz > buffer_available) 155 { 156 *data = NULL; 157 *data_size = filesz; 158 return segment_read (addr_segndx (dwfl, segment, vaddr), 159 data, data_size, vaddr, filesz); 160 } 161 162 /* We already have this whole note segment from our initial read. */ 163 *data = vaddr - start + buffer; 164 *data_size = 0; 165 return false; 166 } 167 168 inline void finish_portion (void **data, size_t *data_size) 169 { 170 if (*data_size != 0) 171 release_buffer (data, data_size); 172 } 173 174 /* Extract the information we need from the file header. */ 175 union 176 { 177 Elf32_Ehdr e32; 178 Elf64_Ehdr e64; 179 } ehdr; 180 GElf_Off phoff; 181 uint_fast16_t phnum; 182 uint_fast16_t phentsize; 183 GElf_Off shdrs_end; 184 Elf_Data xlatefrom = 185 { 186 .d_type = ELF_T_EHDR, 187 .d_buf = (void *) buffer, 188 .d_version = EV_CURRENT, 189 }; 190 Elf_Data xlateto = 191 { 192 .d_type = ELF_T_EHDR, 193 .d_buf = &ehdr, 194 .d_size = sizeof ehdr, 195 .d_version = EV_CURRENT, 196 }; 197 switch (((const unsigned char *) buffer)[EI_CLASS]) 198 { 199 case ELFCLASS32: 200 xlatefrom.d_size = sizeof (Elf32_Ehdr); 201 if (elf32_xlatetom (&xlateto, &xlatefrom, 202 ((const unsigned char *) buffer)[EI_DATA]) == NULL) 203 return finish (); 204 phoff = ehdr.e32.e_phoff; 205 phnum = ehdr.e32.e_phnum; 206 phentsize = ehdr.e32.e_phentsize; 207 if (phentsize != sizeof (Elf32_Phdr)) 208 return finish (); 209 shdrs_end = ehdr.e32.e_shoff + ehdr.e32.e_shnum * ehdr.e32.e_shentsize; 210 break; 211 212 case ELFCLASS64: 213 xlatefrom.d_size = sizeof (Elf64_Ehdr); 214 if (elf64_xlatetom (&xlateto, &xlatefrom, 215 ((const unsigned char *) buffer)[EI_DATA]) == NULL) 216 return finish (); 217 phoff = ehdr.e64.e_phoff; 218 phnum = ehdr.e64.e_phnum; 219 phentsize = ehdr.e64.e_phentsize; 220 if (phentsize != sizeof (Elf64_Phdr)) 221 return finish (); 222 shdrs_end = ehdr.e64.e_shoff + ehdr.e64.e_shnum * ehdr.e64.e_shentsize; 223 break; 224 225 default: 226 return finish (); 227 } 228 229 /* The file header tells where to find the program headers. 230 These are what we need to find the boundaries of the module. 231 Without them, we don't have a module to report. */ 232 233 if (phnum == 0) 234 return finish (); 235 236 xlatefrom.d_type = xlateto.d_type = ELF_T_PHDR; 237 xlatefrom.d_size = phnum * phentsize; 238 239 void *ph_buffer = NULL; 240 size_t ph_buffer_size = 0; 241 if (read_portion (&ph_buffer, &ph_buffer_size, 242 start + phoff, xlatefrom.d_size)) 243 return finish (); 244 245 xlatefrom.d_buf = ph_buffer; 246 247 union 248 { 249 Elf32_Phdr p32[phnum]; 250 Elf64_Phdr p64[phnum]; 251 } phdrs; 252 253 xlateto.d_buf = &phdrs; 254 xlateto.d_size = sizeof phdrs; 255 256 /* Track the bounds of the file visible in memory. */ 257 GElf_Off file_trimmed_end = 0; /* Proper p_vaddr + p_filesz end. */ 258 GElf_Off file_end = 0; /* Rounded up to effective page size. */ 259 GElf_Off contiguous = 0; /* Visible as contiguous file from START. */ 260 GElf_Off total_filesz = 0; /* Total size of data to read. */ 261 262 /* Collect the bias between START and the containing PT_LOAD's p_vaddr. */ 263 GElf_Addr bias = 0; 264 bool found_bias = false; 265 266 /* Collect the unbiased bounds of the module here. */ 267 GElf_Addr module_start = -1l; 268 GElf_Addr module_end = 0; 269 270 /* If we see PT_DYNAMIC, record it here. */ 271 GElf_Addr dyn_vaddr = 0; 272 GElf_Xword dyn_filesz = 0; 273 274 /* Collect the build ID bits here. */ 275 void *build_id = NULL; 276 size_t build_id_len = 0; 277 GElf_Addr build_id_vaddr = 0; 278 279 /* Consider a PT_NOTE we've found in the image. */ 280 inline void consider_notes (GElf_Addr vaddr, GElf_Xword filesz) 281 { 282 /* If we have already seen a build ID, we don't care any more. */ 283 if (build_id != NULL || filesz == 0) 284 return; 285 286 void *data; 287 size_t data_size; 288 if (read_portion (&data, &data_size, vaddr, filesz)) 289 return; 290 291 assert (sizeof (Elf32_Nhdr) == sizeof (Elf64_Nhdr)); 292 293 void *notes; 294 if (ehdr.e32.e_ident[EI_DATA] == MY_ELFDATA) 295 notes = data; 296 else 297 { 298 notes = malloc (filesz); 299 if (unlikely (notes == NULL)) 300 return; 301 xlatefrom.d_type = xlateto.d_type = ELF_T_NHDR; 302 xlatefrom.d_buf = (void *) data; 303 xlatefrom.d_size = filesz; 304 xlateto.d_buf = notes; 305 xlateto.d_size = filesz; 306 if (elf32_xlatetom (&xlateto, &xlatefrom, 307 ehdr.e32.e_ident[EI_DATA]) == NULL) 308 goto done; 309 } 310 311 const GElf_Nhdr *nh = notes; 312 while ((const void *) nh < (const void *) notes + filesz) 313 { 314 const void *note_name = nh + 1; 315 const void *note_desc = note_name + NOTE_ALIGN (nh->n_namesz); 316 if (unlikely ((size_t) ((const void *) notes + filesz 317 - note_desc) < nh->n_descsz)) 318 break; 319 320 if (nh->n_type == NT_GNU_BUILD_ID 321 && nh->n_descsz > 0 322 && nh->n_namesz == sizeof "GNU" 323 && !memcmp (note_name, "GNU", sizeof "GNU")) 324 { 325 build_id_vaddr = note_desc - (const void *) notes + vaddr; 326 build_id_len = nh->n_descsz; 327 build_id = malloc (nh->n_descsz); 328 if (likely (build_id != NULL)) 329 memcpy (build_id, note_desc, build_id_len); 330 break; 331 } 332 333 nh = note_desc + NOTE_ALIGN (nh->n_descsz); 334 } 335 336 done: 337 if (notes != data) 338 free (notes); 339 finish_portion (&data, &data_size); 340 } 341 342 /* Consider each of the program headers we've read from the image. */ 343 inline void consider_phdr (GElf_Word type, 344 GElf_Addr vaddr, GElf_Xword memsz, 345 GElf_Off offset, GElf_Xword filesz, 346 GElf_Xword align) 347 { 348 switch (type) 349 { 350 case PT_DYNAMIC: 351 dyn_vaddr = vaddr; 352 dyn_filesz = filesz; 353 break; 354 355 case PT_NOTE: 356 /* We calculate from the p_offset of the note segment, 357 because we don't yet know the bias for its p_vaddr. */ 358 consider_notes (start + offset, filesz); 359 break; 360 361 case PT_LOAD: 362 align = dwfl->segment_align > 1 ? dwfl->segment_align : align ?: 1; 363 364 GElf_Addr vaddr_end = (vaddr + memsz + align - 1) & -align; 365 GElf_Addr filesz_vaddr = filesz < memsz ? vaddr + filesz : vaddr_end; 366 GElf_Off filesz_offset = filesz_vaddr - vaddr + offset; 367 368 if (file_trimmed_end < offset + filesz) 369 { 370 file_trimmed_end = offset + filesz; 371 372 /* Trim the last segment so we don't bother with zeros 373 in the last page that are off the end of the file. 374 However, if the extra bit in that page includes the 375 section headers, keep them. */ 376 if (shdrs_end <= filesz_offset && shdrs_end > file_trimmed_end) 377 { 378 filesz += shdrs_end - file_trimmed_end; 379 file_trimmed_end = shdrs_end; 380 } 381 } 382 383 total_filesz += filesz; 384 385 if (file_end < filesz_offset) 386 { 387 file_end = filesz_offset; 388 if (filesz_vaddr - start == filesz_offset) 389 contiguous = file_end; 390 } 391 392 if (!found_bias && (offset & -align) == 0 393 && likely (filesz_offset >= phoff + phnum * phentsize)) 394 { 395 bias = start - vaddr; 396 found_bias = true; 397 } 398 399 vaddr &= -align; 400 if (vaddr < module_start) 401 module_start = vaddr; 402 403 if (module_end < vaddr_end) 404 module_end = vaddr_end; 405 break; 406 } 407 } 408 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32) 409 { 410 if (elf32_xlatetom (&xlateto, &xlatefrom, 411 ehdr.e32.e_ident[EI_DATA]) == NULL) 412 found_bias = false; /* Trigger error check. */ 413 else 414 for (uint_fast16_t i = 0; i < phnum; ++i) 415 consider_phdr (phdrs.p32[i].p_type, 416 phdrs.p32[i].p_vaddr, phdrs.p32[i].p_memsz, 417 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz, 418 phdrs.p32[i].p_align); 419 } 420 else 421 { 422 if (elf64_xlatetom (&xlateto, &xlatefrom, 423 ehdr.e32.e_ident[EI_DATA]) == NULL) 424 found_bias = false; /* Trigger error check. */ 425 else 426 for (uint_fast16_t i = 0; i < phnum; ++i) 427 consider_phdr (phdrs.p64[i].p_type, 428 phdrs.p64[i].p_vaddr, phdrs.p64[i].p_memsz, 429 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz, 430 phdrs.p64[i].p_align); 431 } 432 433 finish_portion (&ph_buffer, &ph_buffer_size); 434 435 /* We must have seen the segment covering offset 0, or else the ELF 436 header we read at START was not produced by these program headers. */ 437 if (unlikely (!found_bias)) 438 return finish (); 439 440 /* Now we know enough to report a module for sure: its bounds. */ 441 module_start += bias; 442 module_end += bias; 443 444 dyn_vaddr += bias; 445 446 /* Our return value now says to skip the segments contained 447 within the module. 448 XXX handle gaps 449 */ 450 ndx = addr_segndx (dwfl, segment, module_end); 451 452 /* Examine its .dynamic section to get more interesting details. 453 If it has DT_SONAME, we'll use that as the module name. 454 We need its DT_STRTAB and DT_STRSZ to decipher DT_SONAME, 455 and they also tell us the essential portion of the file 456 for fetching symbols. */ 457 GElf_Addr soname_stroff = 0; 458 GElf_Addr dynstr_vaddr = 0; 459 GElf_Xword dynstrsz = 0; 460 inline bool consider_dyn (GElf_Sxword tag, GElf_Xword val) 461 { 462 switch (tag) 463 { 464 default: 465 return false; 466 467 case DT_SONAME: 468 soname_stroff = val; 469 break; 470 471 case DT_STRTAB: 472 dynstr_vaddr = val; 473 break; 474 475 case DT_STRSZ: 476 dynstrsz = val; 477 break; 478 } 479 480 return soname_stroff != 0 && dynstr_vaddr != 0 && dynstrsz != 0; 481 } 482 483 const size_t dyn_entsize = (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32 484 ? sizeof (Elf32_Dyn) : sizeof (Elf64_Dyn)); 485 void *dyn_data = NULL; 486 size_t dyn_data_size = 0; 487 if (dyn_filesz != 0 && dyn_filesz % dyn_entsize == 0 488 && ! read_portion (&dyn_data, &dyn_data_size, dyn_vaddr, dyn_filesz)) 489 { 490 union 491 { 492 Elf32_Dyn d32[dyn_filesz / sizeof (Elf32_Dyn)]; 493 Elf64_Dyn d64[dyn_filesz / sizeof (Elf64_Dyn)]; 494 } dyn; 495 496 xlatefrom.d_type = xlateto.d_type = ELF_T_DYN; 497 xlatefrom.d_buf = (void *) dyn_data; 498 xlatefrom.d_size = dyn_filesz; 499 xlateto.d_buf = &dyn; 500 xlateto.d_size = sizeof dyn; 501 502 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32) 503 { 504 if (elf32_xlatetom (&xlateto, &xlatefrom, 505 ehdr.e32.e_ident[EI_DATA]) != NULL) 506 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d32[0]; ++i) 507 if (consider_dyn (dyn.d32[i].d_tag, dyn.d32[i].d_un.d_val)) 508 break; 509 } 510 else 511 { 512 if (elf64_xlatetom (&xlateto, &xlatefrom, 513 ehdr.e32.e_ident[EI_DATA]) != NULL) 514 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d64[0]; ++i) 515 if (consider_dyn (dyn.d64[i].d_tag, dyn.d64[i].d_un.d_val)) 516 break; 517 } 518 } 519 finish_portion (&dyn_data, &dyn_data_size); 520 521 /* We'll use the name passed in or a stupid default if not DT_SONAME. */ 522 if (name == NULL) 523 name = ehdr.e32.e_type == ET_EXEC ? "[exe]" : "[dso]"; 524 525 void *soname = NULL; 526 size_t soname_size = 0; 527 if (dynstrsz != 0 && dynstr_vaddr != 0) 528 { 529 /* We know the bounds of the .dynstr section. */ 530 dynstr_vaddr += bias; 531 if (unlikely (dynstr_vaddr + dynstrsz > module_end)) 532 dynstrsz = 0; 533 534 /* Try to get the DT_SONAME string. */ 535 if (soname_stroff != 0 && soname_stroff < dynstrsz - 1 536 && ! read_portion (&soname, &soname_size, 537 dynstr_vaddr + soname_stroff, 0)) 538 name = soname; 539 } 540 541 /* Now that we have chosen the module's name and bounds, report it. 542 If we found a build ID, report that too. */ 543 544 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, name, 545 module_start, module_end); 546 if (likely (mod != NULL) && build_id != NULL 547 && unlikely (INTUSE(dwfl_module_report_build_id) (mod, 548 build_id, 549 build_id_len, 550 build_id_vaddr))) 551 { 552 mod->gc = true; 553 mod = NULL; 554 } 555 556 /* At this point we do not need BUILD_ID or NAME any more. 557 They have been copied. */ 558 free (build_id); 559 finish_portion (&soname, &soname_size); 560 561 if (unlikely (mod == NULL)) 562 { 563 ndx = -1; 564 return finish (); 565 } 566 567 /* We have reported the module. Now let the caller decide whether we 568 should read the whole thing in right now. */ 569 570 const GElf_Off cost = (contiguous < file_trimmed_end ? total_filesz 571 : buffer_available >= contiguous ? 0 572 : contiguous - buffer_available); 573 const GElf_Off worthwhile = ((dynstr_vaddr == 0 || dynstrsz == 0) ? 0 574 : dynstr_vaddr + dynstrsz - start); 575 const GElf_Off whole = MAX (file_trimmed_end, shdrs_end); 576 577 Elf *elf = NULL; 578 if ((*read_eagerly) (MODCB_ARGS (mod), &buffer, &buffer_available, 579 cost, worthwhile, whole, contiguous, 580 read_eagerly_arg, &elf) 581 && elf == NULL) 582 { 583 /* The caller wants to read the whole file in right now, but hasn't 584 done it for us. Fill in a local image of the virtual file. */ 585 586 void *contents = calloc (1, file_trimmed_end); 587 if (unlikely (contents == NULL)) 588 return finish (); 589 590 inline void final_read (size_t offset, GElf_Addr vaddr, size_t size) 591 { 592 void *into = contents + offset; 593 size_t read_size = size; 594 (void) segment_read (addr_segndx (dwfl, segment, vaddr), 595 &into, &read_size, vaddr, size); 596 } 597 598 if (contiguous < file_trimmed_end) 599 { 600 /* We can't use the memory image verbatim as the file image. 601 So we'll be reading into a local image of the virtual file. */ 602 603 inline void read_phdr (GElf_Word type, GElf_Addr vaddr, 604 GElf_Off offset, GElf_Xword filesz) 605 { 606 if (type == PT_LOAD) 607 final_read (offset, vaddr + bias, filesz); 608 } 609 610 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32) 611 for (uint_fast16_t i = 0; i < phnum; ++i) 612 read_phdr (phdrs.p32[i].p_type, phdrs.p32[i].p_vaddr, 613 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz); 614 else 615 for (uint_fast16_t i = 0; i < phnum; ++i) 616 read_phdr (phdrs.p64[i].p_type, phdrs.p64[i].p_vaddr, 617 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz); 618 } 619 else 620 { 621 /* The whole file sits contiguous in memory, 622 but the caller didn't want to just do it. */ 623 624 const size_t have = MIN (buffer_available, file_trimmed_end); 625 memcpy (contents, buffer, have); 626 627 if (have < file_trimmed_end) 628 final_read (have, start + have, file_trimmed_end - have); 629 } 630 631 elf = elf_memory (contents, file_trimmed_end); 632 if (unlikely (elf == NULL)) 633 free (contents); 634 else 635 elf->flags |= ELF_F_MALLOCED; 636 } 637 638 if (elf != NULL) 639 { 640 /* Install the file in the module. */ 641 mod->main.elf = elf; 642 mod->main.bias = bias; 643 } 644 645 return finish (); 646 } 647