1 /* 2 * This file is part of ltrace. 3 * Copyright (C) 2012,2013,2014 Petr Machata, Red Hat Inc. 4 * Copyright (C) 2004,2008,2009 Juan Cespedes 5 * Copyright (C) 2006 Paul Gilliam 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License as 9 * published by the Free Software Foundation; either version 2 of the 10 * License, or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 20 * 02110-1301 USA 21 */ 22 23 #include <gelf.h> 24 #include <sys/ptrace.h> 25 #include <errno.h> 26 #include <inttypes.h> 27 #include <assert.h> 28 #include <stdbool.h> 29 #include <string.h> 30 31 #include "proc.h" 32 #include "common.h" 33 #include "insn.h" 34 #include "library.h" 35 #include "breakpoint.h" 36 #include "linux-gnu/trace.h" 37 #include "backend.h" 38 39 /* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and 40 * new-style "secure" PLT. We can tell one from the other by the 41 * flags on the .plt section. If it's +X (executable), it's BSS PLT, 42 * otherwise it's secure. 43 * 44 * BSS PLT works the same way as most architectures: the .plt section 45 * contains trampolines and we put breakpoints to those. If not 46 * prelinked, .plt contains zeroes, and dynamic linker fills in the 47 * initial set of trampolines, which means that we need to delay 48 * enabling breakpoints until after binary entry point is hit. 49 * Additionally, after first call, dynamic linker updates .plt with 50 * branch to resolved address. That means that on first hit, we must 51 * do something similar to the PPC64 gambit described below. 52 * 53 * With secure PLT, the .plt section doesn't contain instructions but 54 * addresses. The real PLT table is stored in .text. Addresses of 55 * those PLT entries can be computed, and apart from the fact that 56 * they are in .text, they are ordinary PLT entries. 57 * 58 * 64-bit PPC is more involved. Program linker creates for each 59 * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee> 60 * (where xxxxxxxx is a hexadecimal number). That stub does the call 61 * dispatch: it loads an address of a function to call from the 62 * section .plt, and branches. PLT entries themselves are essentially 63 * a curried call to the resolver. When the symbol is resolved, the 64 * resolver updates the value stored in .plt, and the next time 65 * around, the stub calls the library function directly. So we make 66 * at most one trip (none if the binary is prelinked) through each PLT 67 * entry, and correspondingly that is useless as a breakpoint site. 68 * 69 * Note the three confusing terms: stubs (that play the role of PLT 70 * entries), PLT entries, .plt section. 71 * 72 * We first check symbol tables and see if we happen to have stub 73 * symbols available. If yes we just put breakpoints to those, and 74 * treat them as usual breakpoints. The only tricky part is realizing 75 * that there can be more than one breakpoint per symbol. 76 * 77 * The case that we don't have the stub symbols available is harder. 78 * The following scheme uses two kinds of PLT breakpoints: unresolved 79 * and resolved (to some address). When the process starts (or when 80 * we attach), we distribute unresolved PLT breakpoints to the PLT 81 * entries (not stubs). Then we look in .plt, and for each entry 82 * whose value is different than the corresponding PLT entry address, 83 * we assume it was already resolved, and convert the breakpoint to 84 * resolved. We also rewrite the resolved value in .plt back to the 85 * PLT address. 86 * 87 * When a PLT entry hits a resolved breakpoint (which happens because 88 * we rewrite .plt with the original unresolved addresses), we move 89 * the instruction pointer to the corresponding address and continue 90 * the process as if nothing happened. 91 * 92 * When unresolved PLT entry is called for the first time, we need to 93 * catch the new value that the resolver will write to a .plt slot. 94 * We also need to prevent another thread from racing through and 95 * taking the branch without ltrace noticing. So when unresolved PLT 96 * entry hits, we have to stop all threads. We then single-step 97 * through the resolver, until the .plt slot changes. When it does, 98 * we treat it the same way as above: convert the PLT breakpoint to 99 * resolved, and rewrite the .plt value back to PLT address. We then 100 * start all threads again. 101 * 102 * As an optimization, we remember the address where the address was 103 * resolved, and put a breakpoint there. The next time around (when 104 * the next PLT entry is to be resolved), instead of single-stepping 105 * through half the dynamic linker, we just let the thread run and hit 106 * this breakpoint. When it hits, we know the PLT entry was resolved. 107 * 108 * Another twist comes from tracing slots corresponding to 109 * R_PPC64_JMP_IREL relocations. These have no dedicated PLT entry. 110 * The calls are done directly from stubs, and the .plt entry 111 * (actually .iplt entry, these live in a special section) is resolved 112 * in advance before the binary starts. Because there's no PLT entry, 113 * we put the PLT breakpoints directly to the IFUNC resolver code, and 114 * then would like them to behave like ordinary PLT slots, including 115 * catching the point where these get resolved to unresolve them. So 116 * for the first call (which is the actual resolver call), we pretend 117 * that this breakpoint is artificial and has no associated symbol, 118 * and turn it on fully only after the first hit. Ideally we would 119 * trace that first call as well, but then the stepper, which tries to 120 * catch the point where the slot is resolved, would hit the return 121 * breakpoint and that's not currently handled well. 122 * 123 * On PPC32 with secure PLT, the address of IFUNC symbols in main 124 * binary actually isn't of the resolver, but of a PLT slot. We 125 * therefore have to locate the corresponding PLT relocation (which is 126 * of type R_PPC_IRELATIVE) and request that it be traced. The addend 127 * of that relocation is an address of resolver, and we request 128 * tracing of the xyz.IFUNC symbol there. 129 * 130 * XXX TODO If we have hardware watch point, we might put a read watch 131 * on .plt slot, and discover the offenders this way. I don't know 132 * the details, but I assume at most a handful (like, one or two, if 133 * available at all) addresses may be watched at a time, and thus this 134 * would be used as an amendment of the above rather than full-on 135 * solution to PLT tracing on PPC. 136 */ 137 138 #define PPC_PLT_STUB_SIZE 16 139 #define PPC64_PLT_STUB_SIZE 8 //xxx 140 141 static inline int 142 host_powerpc64() 143 { 144 #ifdef __powerpc64__ 145 return 1; 146 #else 147 return 0; 148 #endif 149 } 150 151 static void 152 mark_as_resolved(struct library_symbol *libsym, GElf_Addr value) 153 { 154 libsym->arch.type = PPC_PLT_RESOLVED; 155 libsym->arch.resolved_value = value; 156 } 157 158 static void 159 ppc32_delayed_symbol(struct library_symbol *libsym) 160 { 161 /* arch_dynlink_done is called on attach as well. In that 162 * case some slots will have been resolved already. 163 * Unresolved PLT looks like this: 164 * 165 * <sleep@plt>: li r11,0 166 * <sleep@plt+4>: b "resolve" 167 * 168 * "resolve" is another address in PLTGOT (the same block that 169 * all the PLT slots are it). When resolved, it looks either 170 * this way: 171 * 172 * <sleep@plt>: b 0xfea88d0 <sleep> 173 * 174 * Which is easy to detect. It can also look this way: 175 * 176 * <sleep@plt>: li r11,0 177 * <sleep@plt+4>: b "dispatch" 178 * 179 * The "dispatch" address lies in PLTGOT as well. In current 180 * GNU toolchain, "dispatch" address is the same as PLTGOT 181 * address. We rely on this to figure out whether the address 182 * is resolved or not. */ 183 184 uint32_t insn1 = libsym->arch.resolved_value >> 32; 185 uint32_t insn2 = (uint32_t) libsym->arch.resolved_value; 186 if ((insn1 & BRANCH_MASK) == B_INSN 187 || ((insn2 & BRANCH_MASK) == B_INSN 188 /* XXX double cast */ 189 && (ppc_branch_dest(libsym->enter_addr + 4, insn2) 190 == (arch_addr_t) (long) libsym->lib->arch.pltgot_addr))) 191 { 192 mark_as_resolved(libsym, libsym->arch.resolved_value); 193 } 194 } 195 196 void 197 arch_dynlink_done(struct process *proc) 198 { 199 /* We may need to activate delayed symbols. */ 200 struct library_symbol *libsym = NULL; 201 while ((libsym = proc_each_symbol(proc, libsym, 202 library_symbol_delayed_cb, NULL))) { 203 if (proc_read_64(proc, libsym->enter_addr, 204 &libsym->arch.resolved_value) < 0) { 205 fprintf(stderr, 206 "couldn't read PLT value for %s(%p): %s\n", 207 libsym->name, libsym->enter_addr, 208 strerror(errno)); 209 return; 210 } 211 212 if (proc->e_machine == EM_PPC) 213 ppc32_delayed_symbol(libsym); 214 215 if (proc_activate_delayed_symbol(proc, libsym) < 0) 216 return; 217 218 if (proc->e_machine == EM_PPC) 219 /* XXX double cast */ 220 libsym->arch.plt_slot_addr 221 = (GElf_Addr) (uintptr_t) libsym->enter_addr; 222 } 223 } 224 225 static bool 226 reloc_is_irelative(int machine, GElf_Rela *rela) 227 { 228 bool irelative = false; 229 if (machine == EM_PPC64) { 230 #ifdef R_PPC64_JMP_IREL 231 irelative = GELF_R_TYPE(rela->r_info) == R_PPC64_JMP_IREL; 232 #endif 233 } else { 234 assert(machine == EM_PPC); 235 #ifdef R_PPC_IRELATIVE 236 irelative = GELF_R_TYPE(rela->r_info) == R_PPC_IRELATIVE; 237 #endif 238 } 239 return irelative; 240 } 241 242 GElf_Addr 243 arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela) 244 { 245 if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) { 246 assert(lte->arch.plt_stub_vma != 0); 247 return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx; 248 249 } else if (lte->ehdr.e_machine == EM_PPC) { 250 return rela->r_offset; 251 252 /* Beyond this point, we are on PPC64, but don't have stub 253 * symbols. */ 254 255 } else if (reloc_is_irelative(lte->ehdr.e_machine, rela)) { 256 257 /* Put JMP_IREL breakpoint to resolver, since there's 258 * no dedicated PLT entry. */ 259 260 assert(rela->r_addend != 0); 261 /* XXX double cast */ 262 arch_addr_t res_addr = (arch_addr_t) (uintptr_t) rela->r_addend; 263 if (arch_translate_address(lte, res_addr, &res_addr) < 0) { 264 fprintf(stderr, "Couldn't OPD-translate IRELATIVE " 265 "resolver address.\n"); 266 return 0; 267 } 268 /* XXX double cast */ 269 return (GElf_Addr) (uintptr_t) res_addr; 270 271 } else { 272 /* We put brakpoints to PLT entries the same as the 273 * PPC32 secure PLT case does. */ 274 assert(lte->arch.plt_stub_vma != 0); 275 return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx; 276 } 277 } 278 279 /* This entry point is called when ltelf is not available 280 * anymore--during runtime. At that point we don't have to concern 281 * ourselves with bias, as the values in OPD have been resolved 282 * already. */ 283 int 284 arch_translate_address_dyn(struct process *proc, 285 arch_addr_t addr, arch_addr_t *ret) 286 { 287 if (proc->e_machine == EM_PPC64) { 288 uint64_t value; 289 if (proc_read_64(proc, addr, &value) < 0) { 290 fprintf(stderr, 291 "dynamic .opd translation of %p: %s\n", 292 addr, strerror(errno)); 293 return -1; 294 } 295 /* XXX The double cast should be removed when 296 * arch_addr_t becomes integral type. */ 297 *ret = (arch_addr_t)(uintptr_t)value; 298 return 0; 299 } 300 301 *ret = addr; 302 return 0; 303 } 304 305 int 306 arch_translate_address(struct ltelf *lte, 307 arch_addr_t addr, arch_addr_t *ret) 308 { 309 if (lte->ehdr.e_machine == EM_PPC64) { 310 /* XXX The double cast should be removed when 311 * arch_addr_t becomes integral type. */ 312 GElf_Xword offset 313 = (GElf_Addr)(uintptr_t)addr - lte->arch.opd_base; 314 uint64_t value; 315 if (elf_read_u64(lte->arch.opd_data, offset, &value) < 0) { 316 fprintf(stderr, "static .opd translation of %p: %s\n", 317 addr, elf_errmsg(-1)); 318 return -1; 319 } 320 *ret = (arch_addr_t)(uintptr_t)(value + lte->bias); 321 return 0; 322 } 323 324 *ret = addr; 325 return 0; 326 } 327 328 static int 329 load_opd_data(struct ltelf *lte, struct library *lib) 330 { 331 Elf_Scn *sec; 332 GElf_Shdr shdr; 333 if (elf_get_section_named(lte, ".opd", &sec, &shdr) < 0 334 || sec == NULL) { 335 fail: 336 fprintf(stderr, "couldn't find .opd data\n"); 337 return -1; 338 } 339 340 lte->arch.opd_data = elf_rawdata(sec, NULL); 341 if (lte->arch.opd_data == NULL) 342 goto fail; 343 344 lte->arch.opd_base = shdr.sh_addr + lte->bias; 345 lte->arch.opd_size = shdr.sh_size; 346 347 return 0; 348 } 349 350 void * 351 sym2addr(struct process *proc, struct library_symbol *sym) 352 { 353 return sym->enter_addr; 354 } 355 356 static GElf_Addr 357 get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data) 358 { 359 Elf_Scn *ppcgot_sec = NULL; 360 GElf_Shdr ppcgot_shdr; 361 if (ppcgot != 0 362 && (elf_get_section_covering(lte, ppcgot, 363 &ppcgot_sec, &ppcgot_shdr) < 0 364 || ppcgot_sec == NULL)) 365 fprintf(stderr, 366 "DT_PPC_GOT=%#"PRIx64", but no such section found\n", 367 ppcgot); 368 369 if (ppcgot_sec != NULL) { 370 Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr); 371 if (data == NULL || data->d_size < 8 ) { 372 fprintf(stderr, "couldn't read GOT data\n"); 373 } else { 374 // where PPCGOT begins in .got 375 size_t offset = ppcgot - ppcgot_shdr.sh_addr; 376 assert(offset % 4 == 0); 377 uint32_t glink_vma; 378 if (elf_read_u32(data, offset + 4, &glink_vma) < 0) { 379 fprintf(stderr, "couldn't read glink VMA" 380 " address at %zd@GOT\n", offset); 381 return 0; 382 } 383 if (glink_vma != 0) { 384 debug(1, "PPC GOT glink_vma address: %#" PRIx32, 385 glink_vma); 386 return (GElf_Addr)glink_vma; 387 } 388 } 389 } 390 391 if (plt_data != NULL) { 392 uint32_t glink_vma; 393 if (elf_read_u32(plt_data, 0, &glink_vma) < 0) { 394 fprintf(stderr, "couldn't read glink VMA address\n"); 395 return 0; 396 } 397 debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma); 398 return (GElf_Addr)glink_vma; 399 } 400 401 return 0; 402 } 403 404 static int 405 nonzero_data(Elf_Data *data) 406 { 407 /* We are not supposed to get here if there's no PLT. */ 408 assert(data != NULL); 409 410 unsigned char *buf = data->d_buf; 411 if (buf == NULL) 412 return 0; 413 414 size_t i; 415 for (i = 0; i < data->d_size; ++i) 416 if (buf[i] != 0) 417 return 1; 418 return 0; 419 } 420 421 static enum callback_status 422 reloc_copy_if_irelative(GElf_Rela *rela, void *data) 423 { 424 struct ltelf *lte = data; 425 426 return CBS_STOP_IF(reloc_is_irelative(lte->ehdr.e_machine, rela) 427 && VECT_PUSHBACK(<e->plt_relocs, rela) < 0); 428 } 429 430 int 431 arch_elf_init(struct ltelf *lte, struct library *lib) 432 { 433 if (lte->ehdr.e_machine == EM_PPC64 434 && load_opd_data(lte, lib) < 0) 435 return -1; 436 437 lte->arch.secure_plt = !(lte->plt_flags & SHF_EXECINSTR); 438 439 /* For PPC32 BSS, it is important whether the binary was 440 * prelinked. If .plt section is NODATA, or if it contains 441 * zeroes, then this library is not prelinked, and we need to 442 * delay breakpoints. */ 443 if (lte->ehdr.e_machine == EM_PPC && !lte->arch.secure_plt) 444 lib->arch.bss_plt_prelinked = nonzero_data(lte->plt_data); 445 else 446 /* For cases where it's irrelevant, initialize the 447 * value to something conspicuous. */ 448 lib->arch.bss_plt_prelinked = -1; 449 450 /* On PPC64 and PPC32 secure, IRELATIVE relocations actually 451 * relocate .iplt section, and as such are stored in .rela.dyn 452 * (where all non-PLT relocations are stored) instead of 453 * .rela.plt. Add these to lte->plt_relocs. */ 454 455 GElf_Addr rela, relasz; 456 Elf_Scn *rela_sec; 457 GElf_Shdr rela_shdr; 458 if ((lte->ehdr.e_machine == EM_PPC64 || lte->arch.secure_plt) 459 && elf_load_dynamic_entry(lte, DT_RELA, &rela) == 0 460 && elf_load_dynamic_entry(lte, DT_RELASZ, &relasz) == 0 461 && elf_get_section_covering(lte, rela, &rela_sec, &rela_shdr) == 0 462 && rela_sec != NULL) { 463 464 struct vect v; 465 VECT_INIT(&v, GElf_Rela); 466 int ret = elf_read_relocs(lte, rela_sec, &rela_shdr, &v); 467 if (ret >= 0 468 && VECT_EACH(&v, GElf_Rela, NULL, 469 reloc_copy_if_irelative, lte) != NULL) 470 ret = -1; 471 472 VECT_DESTROY(&v, GElf_Rela, NULL, NULL); 473 474 if (ret < 0) 475 return ret; 476 } 477 478 if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) { 479 GElf_Addr ppcgot; 480 if (elf_load_dynamic_entry(lte, DT_PPC_GOT, &ppcgot) < 0) { 481 fprintf(stderr, "couldn't find DT_PPC_GOT\n"); 482 return -1; 483 } 484 GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data); 485 486 size_t count = vect_size(<e->plt_relocs); 487 lte->arch.plt_stub_vma = glink_vma 488 - (GElf_Addr) count * PPC_PLT_STUB_SIZE; 489 debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma); 490 491 } else if (lte->ehdr.e_machine == EM_PPC64) { 492 GElf_Addr glink_vma; 493 if (elf_load_dynamic_entry(lte, DT_PPC64_GLINK, 494 &glink_vma) < 0) { 495 fprintf(stderr, "couldn't find DT_PPC64_GLINK\n"); 496 return -1; 497 } 498 499 /* The first glink stub starts at offset 32. */ 500 lte->arch.plt_stub_vma = glink_vma + 32; 501 502 } else { 503 /* By exhaustion--PPC32 BSS. */ 504 if (elf_load_dynamic_entry(lte, DT_PLTGOT, 505 &lib->arch.pltgot_addr) < 0) { 506 fprintf(stderr, "couldn't find DT_PLTGOT\n"); 507 return -1; 508 } 509 } 510 511 /* On PPC64, look for stub symbols in symbol table. These are 512 * called: xxxxxxxx.plt_call.callee_name@version+addend. */ 513 if (lte->ehdr.e_machine == EM_PPC64 514 && lte->symtab != NULL && lte->strtab != NULL) { 515 516 /* N.B. We can't simply skip the symbols that we fail 517 * to read or malloc. There may be more than one stub 518 * per symbol name, and if we failed in one but 519 * succeeded in another, the PLT enabling code would 520 * have no way to tell that something is missing. We 521 * could work around that, of course, but it doesn't 522 * seem worth the trouble. So if anything fails, we 523 * just pretend that we don't have stub symbols at 524 * all, as if the binary is stripped. */ 525 526 size_t i; 527 for (i = 0; i < lte->symtab_count; ++i) { 528 GElf_Sym sym; 529 if (gelf_getsym(lte->symtab, i, &sym) == NULL) { 530 struct library_symbol *sym, *next; 531 fail: 532 for (sym = lte->arch.stubs; sym != NULL; ) { 533 next = sym->next; 534 library_symbol_destroy(sym); 535 free(sym); 536 sym = next; 537 } 538 lte->arch.stubs = NULL; 539 break; 540 } 541 542 const char *name = lte->strtab + sym.st_name; 543 544 #define STUBN ".plt_call." 545 if ((name = strstr(name, STUBN)) == NULL) 546 continue; 547 name += sizeof(STUBN) - 1; 548 #undef STUBN 549 550 size_t len; 551 const char *ver = strchr(name, '@'); 552 if (ver != NULL) { 553 len = ver - name; 554 555 } else { 556 /* If there is "+" at all, check that 557 * the symbol name ends in "+0". */ 558 const char *add = strrchr(name, '+'); 559 if (add != NULL) { 560 assert(strcmp(add, "+0") == 0); 561 len = add - name; 562 } else { 563 len = strlen(name); 564 } 565 } 566 567 char *sym_name = strndup(name, len); 568 struct library_symbol *libsym = malloc(sizeof(*libsym)); 569 if (sym_name == NULL || libsym == NULL) { 570 fail2: 571 free(sym_name); 572 free(libsym); 573 goto fail; 574 } 575 576 /* XXX The double cast should be removed when 577 * arch_addr_t becomes integral type. */ 578 arch_addr_t addr = (arch_addr_t) 579 (uintptr_t)sym.st_value + lte->bias; 580 if (library_symbol_init(libsym, addr, sym_name, 1, 581 LS_TOPLT_EXEC) < 0) 582 goto fail2; 583 libsym->arch.type = PPC64_PLT_STUB; 584 libsym->next = lte->arch.stubs; 585 lte->arch.stubs = libsym; 586 } 587 } 588 589 return 0; 590 } 591 592 static int 593 read_plt_slot_value(struct process *proc, GElf_Addr addr, GElf_Addr *valp) 594 { 595 /* On PPC64, we read from .plt, which contains 8 byte 596 * addresses. On PPC32 we read from .plt, which contains 4 597 * byte instructions, but the PLT is two instructions, and 598 * either can change. */ 599 uint64_t l; 600 /* XXX double cast. */ 601 if (proc_read_64(proc, (arch_addr_t)(uintptr_t)addr, &l) < 0) { 602 fprintf(stderr, "ptrace .plt slot value @%#" PRIx64": %s\n", 603 addr, strerror(errno)); 604 return -1; 605 } 606 607 *valp = (GElf_Addr)l; 608 return 0; 609 } 610 611 static int 612 unresolve_plt_slot(struct process *proc, GElf_Addr addr, GElf_Addr value) 613 { 614 /* We only modify plt_entry[0], which holds the resolved 615 * address of the routine. We keep the TOC and environment 616 * pointers intact. Hence the only adjustment that we need to 617 * do is to IP. */ 618 if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) { 619 fprintf(stderr, "failed to unresolve .plt slot: %s\n", 620 strerror(errno)); 621 return -1; 622 } 623 return 0; 624 } 625 626 enum plt_status 627 arch_elf_add_func_entry(struct process *proc, struct ltelf *lte, 628 const GElf_Sym *sym, 629 arch_addr_t addr, const char *name, 630 struct library_symbol **ret) 631 { 632 if (lte->ehdr.e_machine != EM_PPC || lte->ehdr.e_type == ET_DYN) 633 return PLT_DEFAULT; 634 635 bool ifunc = false; 636 #ifdef STT_GNU_IFUNC 637 ifunc = GELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC; 638 #endif 639 if (! ifunc) 640 return PLT_DEFAULT; 641 642 size_t len = vect_size(<e->plt_relocs); 643 size_t i; 644 for (i = 0; i < len; ++i) { 645 GElf_Rela *rela = VECT_ELEMENT(<e->plt_relocs, GElf_Rela, i); 646 if (sym->st_value == arch_plt_sym_val(lte, i, rela)) { 647 648 char *tmp_name = linux_append_IFUNC_to_name(name); 649 struct library_symbol *libsym = malloc(sizeof *libsym); 650 651 /* XXX double cast. */ 652 arch_addr_t resolver_addr 653 = (arch_addr_t) (uintptr_t) rela->r_addend; 654 655 if (tmp_name == NULL || libsym == NULL 656 || library_symbol_init(libsym, resolver_addr, 657 tmp_name, 1, 658 LS_TOPLT_EXEC) < 0) { 659 fail: 660 free(tmp_name); 661 free(libsym); 662 return PLT_FAIL; 663 } 664 665 if (elf_add_plt_entry(proc, lte, name, rela, 666 i, ret) < 0) { 667 library_symbol_destroy(libsym); 668 goto fail; 669 } 670 671 libsym->proto = linux_IFUNC_prototype(); 672 libsym->next = *ret; 673 *ret = libsym; 674 return PLT_OK; 675 } 676 } 677 678 *ret = NULL; 679 return PLT_OK; 680 } 681 682 struct ppc_unresolve_data { 683 struct ppc_unresolve_data *self; /* A canary. */ 684 GElf_Addr plt_entry_addr; 685 GElf_Addr plt_slot_addr; 686 GElf_Addr plt_slot_value; 687 bool is_irelative; 688 }; 689 690 enum plt_status 691 arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte, 692 const char *a_name, GElf_Rela *rela, size_t ndx, 693 struct library_symbol **ret) 694 { 695 bool is_irelative = reloc_is_irelative(lte->ehdr.e_machine, rela); 696 char *name; 697 if (! is_irelative) { 698 name = strdup(a_name); 699 } else { 700 GElf_Addr addr = lte->ehdr.e_machine == EM_PPC64 701 ? (GElf_Addr) rela->r_addend 702 : arch_plt_sym_val(lte, ndx, rela); 703 name = linux_elf_find_irelative_name(lte, addr); 704 } 705 706 if (name == NULL) { 707 fail: 708 free(name); 709 return PLT_FAIL; 710 } 711 712 struct library_symbol *chain = NULL; 713 if (lte->ehdr.e_machine == EM_PPC) { 714 if (default_elf_add_plt_entry(proc, lte, name, rela, ndx, 715 &chain) < 0) 716 goto fail; 717 718 if (! lte->arch.secure_plt) { 719 /* On PPC32 with BSS PLT, delay the symbol 720 * until dynamic linker is done. */ 721 assert(!chain->delayed); 722 chain->delayed = 1; 723 } 724 725 ok: 726 *ret = chain; 727 free(name); 728 return PLT_OK; 729 } 730 731 /* PPC64. If we have stubs, we return a chain of breakpoint 732 * sites, one for each stub that corresponds to this PLT 733 * entry. */ 734 struct library_symbol **symp; 735 for (symp = <e->arch.stubs; *symp != NULL; ) { 736 struct library_symbol *sym = *symp; 737 if (strcmp(sym->name, name) != 0) { 738 symp = &(*symp)->next; 739 continue; 740 } 741 742 /* Re-chain the symbol from stubs to CHAIN. */ 743 *symp = sym->next; 744 sym->next = chain; 745 chain = sym; 746 } 747 748 if (chain != NULL) 749 goto ok; 750 751 /* We don't have stub symbols. Find corresponding .plt slot, 752 * and check whether it contains the corresponding PLT address 753 * (or 0 if the dynamic linker hasn't run yet). N.B. we don't 754 * want read this from ELF file, but from process image. That 755 * makes a difference if we are attaching to a running 756 * process. */ 757 758 GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela); 759 GElf_Addr plt_slot_addr = rela->r_offset; 760 761 assert(plt_slot_addr >= lte->plt_addr 762 || plt_slot_addr < lte->plt_addr + lte->plt_size); 763 764 GElf_Addr plt_slot_value; 765 if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0) 766 goto fail; 767 768 struct library_symbol *libsym = malloc(sizeof(*libsym)); 769 if (libsym == NULL) { 770 fprintf(stderr, "allocation for .plt slot: %s\n", 771 strerror(errno)); 772 fail2: 773 free(libsym); 774 goto fail; 775 } 776 777 /* XXX The double cast should be removed when 778 * arch_addr_t becomes integral type. */ 779 if (library_symbol_init(libsym, 780 (arch_addr_t) (uintptr_t) plt_entry_addr, 781 name, 1, LS_TOPLT_EXEC) < 0) 782 goto fail2; 783 libsym->arch.plt_slot_addr = plt_slot_addr; 784 785 if (! is_irelative 786 && (plt_slot_value == plt_entry_addr || plt_slot_value == 0)) { 787 libsym->arch.type = PPC_PLT_UNRESOLVED; 788 libsym->arch.resolved_value = plt_entry_addr; 789 } else { 790 /* Mark the symbol for later unresolving. We may not 791 * do this right away, as this is called by ltrace 792 * core for all symbols, and only later filtered. We 793 * only unresolve the symbol before the breakpoint is 794 * enabled. */ 795 796 libsym->arch.type = PPC_PLT_NEED_UNRESOLVE; 797 libsym->arch.data = malloc(sizeof *libsym->arch.data); 798 if (libsym->arch.data == NULL) 799 goto fail2; 800 801 libsym->arch.data->self = libsym->arch.data; 802 libsym->arch.data->plt_entry_addr = plt_entry_addr; 803 libsym->arch.data->plt_slot_addr = plt_slot_addr; 804 libsym->arch.data->plt_slot_value = plt_slot_value; 805 libsym->arch.data->is_irelative = is_irelative; 806 } 807 808 *ret = libsym; 809 return PLT_OK; 810 } 811 812 void 813 arch_elf_destroy(struct ltelf *lte) 814 { 815 struct library_symbol *sym; 816 for (sym = lte->arch.stubs; sym != NULL; ) { 817 struct library_symbol *next = sym->next; 818 library_symbol_destroy(sym); 819 free(sym); 820 sym = next; 821 } 822 } 823 824 static void 825 dl_plt_update_bp_on_hit(struct breakpoint *bp, struct process *proc) 826 { 827 debug(DEBUG_PROCESS, "pid=%d dl_plt_update_bp_on_hit %s(%p)", 828 proc->pid, breakpoint_name(bp), bp->addr); 829 struct process_stopping_handler *self = proc->arch.handler; 830 assert(self != NULL); 831 832 struct library_symbol *libsym = self->breakpoint_being_enabled->libsym; 833 GElf_Addr value; 834 if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0) 835 return; 836 837 /* On PPC64, we rewrite the slot value. */ 838 if (proc->e_machine == EM_PPC64) 839 unresolve_plt_slot(proc, libsym->arch.plt_slot_addr, 840 libsym->arch.resolved_value); 841 /* We mark the breakpoint as resolved on both arches. */ 842 mark_as_resolved(libsym, value); 843 844 /* cb_on_all_stopped looks if HANDLER is set to NULL as a way 845 * to check that this was run. It's an error if it 846 * wasn't. */ 847 proc->arch.handler = NULL; 848 849 breakpoint_turn_off(bp, proc); 850 } 851 852 static void 853 cb_on_all_stopped(struct process_stopping_handler *self) 854 { 855 /* Put that in for dl_plt_update_bp_on_hit to see. */ 856 assert(self->task_enabling_breakpoint->arch.handler == NULL); 857 self->task_enabling_breakpoint->arch.handler = self; 858 859 linux_ptrace_disable_and_continue(self); 860 } 861 862 static enum callback_status 863 cb_keep_stepping_p(struct process_stopping_handler *self) 864 { 865 struct process *proc = self->task_enabling_breakpoint; 866 struct library_symbol *libsym = self->breakpoint_being_enabled->libsym; 867 868 GElf_Addr value; 869 if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0) 870 return CBS_FAIL; 871 872 /* In UNRESOLVED state, the RESOLVED_VALUE in fact contains 873 * the PLT entry value. */ 874 if (value == libsym->arch.resolved_value) 875 return CBS_CONT; 876 877 debug(DEBUG_PROCESS, "pid=%d PLT got resolved to value %#"PRIx64, 878 proc->pid, value); 879 880 /* The .plt slot got resolved! We can migrate the breakpoint 881 * to RESOLVED and stop single-stepping. */ 882 if (proc->e_machine == EM_PPC64 883 && unresolve_plt_slot(proc, libsym->arch.plt_slot_addr, 884 libsym->arch.resolved_value) < 0) 885 return CBS_FAIL; 886 887 /* Resolving on PPC64 consists of overwriting a doubleword in 888 * .plt. That doubleword is than read back by a stub, and 889 * jumped on. Hopefully we can assume that double word update 890 * is done on a single place only, as it contains a final 891 * address. We still need to look around for any sync 892 * instruction, but essentially it is safe to optimize away 893 * the single stepping next time and install a post-update 894 * breakpoint. 895 * 896 * The situation on PPC32 BSS is more complicated. The 897 * dynamic linker here updates potentially several 898 * instructions (XXX currently we assume two) and the rules 899 * are more complicated. Sometimes it's enough to adjust just 900 * one of the addresses--the logic for generating optimal 901 * dispatch depends on relative addresses of the .plt entry 902 * and the jump destination. We can't assume that the some 903 * instruction block does the update every time. So on PPC32, 904 * we turn the optimization off and just step through it each 905 * time. */ 906 if (proc->e_machine == EM_PPC) 907 goto done; 908 909 /* Install breakpoint to the address where the change takes 910 * place. If we fail, then that just means that we'll have to 911 * singlestep the next time around as well. */ 912 struct process *leader = proc->leader; 913 if (leader == NULL || leader->arch.dl_plt_update_bp != NULL) 914 goto done; 915 916 /* We need to install to the next instruction. ADDR points to 917 * a store instruction, so moving the breakpoint one 918 * instruction forward is safe. */ 919 arch_addr_t addr = get_instruction_pointer(proc) + 4; 920 leader->arch.dl_plt_update_bp = insert_breakpoint_at(proc, addr, NULL); 921 if (leader->arch.dl_plt_update_bp == NULL) 922 goto done; 923 924 static struct bp_callbacks dl_plt_update_cbs = { 925 .on_hit = dl_plt_update_bp_on_hit, 926 }; 927 leader->arch.dl_plt_update_bp->cbs = &dl_plt_update_cbs; 928 929 /* Turn it off for now. We will turn it on again when we hit 930 * the PLT entry that needs this. */ 931 breakpoint_turn_off(leader->arch.dl_plt_update_bp, proc); 932 933 done: 934 mark_as_resolved(libsym, value); 935 936 return CBS_STOP; 937 } 938 939 static void 940 jump_to_entry_point(struct process *proc, struct breakpoint *bp) 941 { 942 /* XXX The double cast should be removed when 943 * arch_addr_t becomes integral type. */ 944 arch_addr_t rv = (arch_addr_t) 945 (uintptr_t)bp->libsym->arch.resolved_value; 946 set_instruction_pointer(proc, rv); 947 } 948 949 static void 950 ppc_plt_bp_continue(struct breakpoint *bp, struct process *proc) 951 { 952 /* If this is a first call through IREL breakpoint, enable the 953 * symbol so that it doesn't look like an artificial 954 * breakpoint anymore. */ 955 if (bp->libsym == NULL) { 956 assert(bp->arch.irel_libsym != NULL); 957 bp->libsym = bp->arch.irel_libsym; 958 bp->arch.irel_libsym = NULL; 959 } 960 961 switch (bp->libsym->arch.type) { 962 struct process *leader; 963 void (*on_all_stopped)(struct process_stopping_handler *); 964 enum callback_status (*keep_stepping_p) 965 (struct process_stopping_handler *); 966 967 case PPC_DEFAULT: 968 assert(proc->e_machine == EM_PPC); 969 assert(bp->libsym != NULL); 970 assert(bp->libsym->lib->arch.bss_plt_prelinked == 0); 971 /* Fall through. */ 972 973 case PPC_PLT_IRELATIVE: 974 case PPC_PLT_UNRESOLVED: 975 on_all_stopped = NULL; 976 keep_stepping_p = NULL; 977 leader = proc->leader; 978 979 if (leader != NULL && leader->arch.dl_plt_update_bp != NULL 980 && breakpoint_turn_on(leader->arch.dl_plt_update_bp, 981 proc) >= 0) 982 on_all_stopped = cb_on_all_stopped; 983 else 984 keep_stepping_p = cb_keep_stepping_p; 985 986 if (process_install_stopping_handler 987 (proc, bp, on_all_stopped, keep_stepping_p, NULL) < 0) { 988 fprintf(stderr, "ppc_plt_bp_continue: " 989 "couldn't install event handler\n"); 990 continue_after_breakpoint(proc, bp); 991 } 992 return; 993 994 case PPC_PLT_RESOLVED: 995 if (proc->e_machine == EM_PPC) { 996 continue_after_breakpoint(proc, bp); 997 return; 998 } 999 1000 jump_to_entry_point(proc, bp); 1001 continue_process(proc->pid); 1002 return; 1003 1004 case PPC64_PLT_STUB: 1005 case PPC_PLT_NEED_UNRESOLVE: 1006 /* These should never hit here. */ 1007 break; 1008 } 1009 1010 assert(bp->libsym->arch.type != bp->libsym->arch.type); 1011 abort(); 1012 } 1013 1014 /* When a process is in a PLT stub, it may have already read the data 1015 * in .plt that we changed. If we detach now, it will jump to PLT 1016 * entry and continue to the dynamic linker, where it will SIGSEGV, 1017 * because zeroth .plt slot is not filled in prelinked binaries, and 1018 * the dynamic linker needs that data. Moreover, the process may 1019 * actually have hit the breakpoint already. This functions tries to 1020 * detect both cases and do any fix-ups necessary to mend this 1021 * situation. */ 1022 static enum callback_status 1023 detach_task_cb(struct process *task, void *data) 1024 { 1025 struct breakpoint *bp = data; 1026 1027 if (get_instruction_pointer(task) == bp->addr) { 1028 debug(DEBUG_PROCESS, "%d at %p, which is PLT slot", 1029 task->pid, bp->addr); 1030 jump_to_entry_point(task, bp); 1031 return CBS_CONT; 1032 } 1033 1034 /* XXX There's still a window of several instructions where we 1035 * might catch the task inside a stub such that it has already 1036 * read destination address from .plt, but hasn't jumped yet, 1037 * thus avoiding the breakpoint. */ 1038 1039 return CBS_CONT; 1040 } 1041 1042 static void 1043 ppc_plt_bp_retract(struct breakpoint *bp, struct process *proc) 1044 { 1045 /* On PPC64, we rewrite .plt with PLT entry addresses. This 1046 * needs to be undone. Unfortunately, the program may have 1047 * made decisions based on that value */ 1048 if (proc->e_machine == EM_PPC64 1049 && bp->libsym != NULL 1050 && bp->libsym->arch.type == PPC_PLT_RESOLVED) { 1051 each_task(proc->leader, NULL, detach_task_cb, bp); 1052 unresolve_plt_slot(proc, bp->libsym->arch.plt_slot_addr, 1053 bp->libsym->arch.resolved_value); 1054 } 1055 } 1056 1057 static void 1058 ppc_plt_bp_install(struct breakpoint *bp, struct process *proc) 1059 { 1060 /* This should not be an artificial breakpoint. */ 1061 struct library_symbol *libsym = bp->libsym; 1062 if (libsym == NULL) 1063 libsym = bp->arch.irel_libsym; 1064 assert(libsym != NULL); 1065 1066 if (libsym->arch.type == PPC_PLT_NEED_UNRESOLVE) { 1067 /* Unresolve the .plt slot. If the binary was 1068 * prelinked, this makes the code invalid, because in 1069 * case of prelinked binary, the dynamic linker 1070 * doesn't update .plt[0] and .plt[1] with addresses 1071 * of the resover. But we don't care, we will never 1072 * need to enter the resolver. That just means that 1073 * we have to un-un-resolve this back before we 1074 * detach. */ 1075 1076 struct ppc_unresolve_data *data = libsym->arch.data; 1077 libsym->arch.data = NULL; 1078 assert(data->self == data); 1079 1080 GElf_Addr plt_slot_addr = data->plt_slot_addr; 1081 GElf_Addr plt_slot_value = data->plt_slot_value; 1082 GElf_Addr plt_entry_addr = data->plt_entry_addr; 1083 1084 if (unresolve_plt_slot(proc, plt_slot_addr, 1085 plt_entry_addr) == 0) { 1086 if (! data->is_irelative) { 1087 mark_as_resolved(libsym, plt_slot_value); 1088 } else { 1089 libsym->arch.type = PPC_PLT_IRELATIVE; 1090 libsym->arch.resolved_value = plt_entry_addr; 1091 } 1092 } else { 1093 fprintf(stderr, "Couldn't unresolve %s@%p. Not tracing" 1094 " this symbol.\n", 1095 breakpoint_name(bp), bp->addr); 1096 proc_remove_breakpoint(proc, bp); 1097 } 1098 1099 free(data); 1100 } 1101 } 1102 1103 int 1104 arch_library_init(struct library *lib) 1105 { 1106 return 0; 1107 } 1108 1109 void 1110 arch_library_destroy(struct library *lib) 1111 { 1112 } 1113 1114 int 1115 arch_library_clone(struct library *retp, struct library *lib) 1116 { 1117 return 0; 1118 } 1119 1120 int 1121 arch_library_symbol_init(struct library_symbol *libsym) 1122 { 1123 /* We set type explicitly in the code above, where we have the 1124 * necessary context. This is for calls from ltrace-elf.c and 1125 * such. */ 1126 libsym->arch.type = PPC_DEFAULT; 1127 return 0; 1128 } 1129 1130 void 1131 arch_library_symbol_destroy(struct library_symbol *libsym) 1132 { 1133 if (libsym->arch.type == PPC_PLT_NEED_UNRESOLVE) { 1134 assert(libsym->arch.data->self == libsym->arch.data); 1135 free(libsym->arch.data); 1136 libsym->arch.data = NULL; 1137 } 1138 } 1139 1140 int 1141 arch_library_symbol_clone(struct library_symbol *retp, 1142 struct library_symbol *libsym) 1143 { 1144 retp->arch = libsym->arch; 1145 return 0; 1146 } 1147 1148 /* For some symbol types, we need to set up custom callbacks. XXX we 1149 * don't need PROC here, we can store the data in BP if it is of 1150 * interest to us. */ 1151 int 1152 arch_breakpoint_init(struct process *proc, struct breakpoint *bp) 1153 { 1154 bp->arch.irel_libsym = NULL; 1155 1156 /* Artificial and entry-point breakpoints are plain. */ 1157 if (bp->libsym == NULL || bp->libsym->plt_type != LS_TOPLT_EXEC) 1158 return 0; 1159 1160 /* On PPC, secure PLT and prelinked BSS PLT are plain. */ 1161 if (proc->e_machine == EM_PPC 1162 && bp->libsym->lib->arch.bss_plt_prelinked != 0) 1163 return 0; 1164 1165 /* On PPC64, stub PLT breakpoints are plain. */ 1166 if (proc->e_machine == EM_PPC64 1167 && bp->libsym->arch.type == PPC64_PLT_STUB) 1168 return 0; 1169 1170 static struct bp_callbacks cbs = { 1171 .on_continue = ppc_plt_bp_continue, 1172 .on_retract = ppc_plt_bp_retract, 1173 .on_install = ppc_plt_bp_install, 1174 }; 1175 breakpoint_set_callbacks(bp, &cbs); 1176 1177 /* For JMP_IREL breakpoints, make the breakpoint look 1178 * artificial by hiding the symbol. */ 1179 if (bp->libsym->arch.type == PPC_PLT_IRELATIVE) { 1180 bp->arch.irel_libsym = bp->libsym; 1181 bp->libsym = NULL; 1182 } 1183 1184 return 0; 1185 } 1186 1187 void 1188 arch_breakpoint_destroy(struct breakpoint *bp) 1189 { 1190 } 1191 1192 int 1193 arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp) 1194 { 1195 retp->arch = sbp->arch; 1196 return 0; 1197 } 1198 1199 int 1200 arch_process_init(struct process *proc) 1201 { 1202 proc->arch.dl_plt_update_bp = NULL; 1203 proc->arch.handler = NULL; 1204 return 0; 1205 } 1206 1207 void 1208 arch_process_destroy(struct process *proc) 1209 { 1210 } 1211 1212 int 1213 arch_process_clone(struct process *retp, struct process *proc) 1214 { 1215 retp->arch = proc->arch; 1216 1217 if (retp->arch.dl_plt_update_bp != NULL) { 1218 /* Point it to the corresponding breakpoint in RETP. 1219 * It must be there, this part of PROC has already 1220 * been cloned to RETP. */ 1221 retp->arch.dl_plt_update_bp 1222 = address2bpstruct(retp, 1223 retp->arch.dl_plt_update_bp->addr); 1224 1225 assert(retp->arch.dl_plt_update_bp != NULL); 1226 } 1227 1228 return 0; 1229 } 1230 1231 int 1232 arch_process_exec(struct process *proc) 1233 { 1234 return arch_process_init(proc); 1235 } 1236