1 2 /*--------------------------------------------------------------------*/ 3 /*--- User-mode execve() for ELF executables m_ume_elf.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2000-2012 Julian Seward 11 jseward (at) acm.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 #if defined(VGO_linux) 32 33 #include "pub_core_basics.h" 34 #include "pub_core_vki.h" 35 36 #include "pub_core_aspacemgr.h" // various mapping fns 37 #include "pub_core_debuglog.h" 38 #include "pub_core_libcassert.h" // VG_(exit), vg_assert 39 #include "pub_core_libcbase.h" // VG_(memcmp), etc 40 #include "pub_core_libcprint.h" 41 #include "pub_core_libcfile.h" // VG_(open) et al 42 #include "pub_core_machine.h" // VG_ELF_CLASS (XXX: which should be moved) 43 #include "pub_core_mallocfree.h" // VG_(malloc), VG_(free) 44 #include "pub_core_syscall.h" // VG_(strerror) 45 #include "pub_core_ume.h" // self 46 47 #include "priv_ume.h" 48 49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */ 50 #define _GNU_SOURCE 51 #define _FILE_OFFSET_BITS 64 52 /* This is for ELF types etc, and also the AT_ constants. */ 53 #include <elf.h> 54 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */ 55 56 57 #if VG_WORDSIZE == 8 58 #define ESZ(x) Elf64_##x 59 #elif VG_WORDSIZE == 4 60 #define ESZ(x) Elf32_##x 61 #else 62 #error VG_WORDSIZE needs to ==4 or ==8 63 #endif 64 65 struct elfinfo 66 { 67 ESZ(Ehdr) e; 68 ESZ(Phdr) *p; 69 Int fd; 70 }; 71 72 static void check_mmap(SysRes res, Addr base, SizeT len) 73 { 74 if (sr_isError(res)) { 75 VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME " 76 "with error %lu (%s).\n", 77 (ULong)base, (Long)len, 78 sr_Err(res), VG_(strerror)(sr_Err(res)) ); 79 if (sr_Err(res) == VKI_EINVAL) { 80 VG_(printf)("valgrind: this can be caused by executables with " 81 "very large text, data or bss segments.\n"); 82 } 83 VG_(exit)(1); 84 } 85 } 86 87 /*------------------------------------------------------------*/ 88 /*--- Loading ELF files ---*/ 89 /*------------------------------------------------------------*/ 90 91 static 92 struct elfinfo *readelf(Int fd, const char *filename) 93 { 94 SysRes sres; 95 struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e)); 96 Int phsz; 97 98 vg_assert(e); 99 e->fd = fd; 100 101 sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0); 102 if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) { 103 VG_(printf)("valgrind: %s: can't read ELF header: %s\n", 104 filename, VG_(strerror)(sr_Err(sres))); 105 goto bad; 106 } 107 108 if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) { 109 VG_(printf)("valgrind: %s: bad ELF magic number\n", filename); 110 goto bad; 111 } 112 if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) { 113 VG_(printf)("valgrind: wrong ELF executable class " 114 "(eg. 32-bit instead of 64-bit)\n"); 115 goto bad; 116 } 117 if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) { 118 VG_(printf)("valgrind: executable has wrong endian-ness\n"); 119 goto bad; 120 } 121 if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) { 122 VG_(printf)("valgrind: this is not an executable\n"); 123 goto bad; 124 } 125 126 if (e->e.e_machine != VG_ELF_MACHINE) { 127 VG_(printf)("valgrind: executable is not for " 128 "this architecture\n"); 129 goto bad; 130 } 131 132 if (e->e.e_phentsize != sizeof(ESZ(Phdr))) { 133 VG_(printf)("valgrind: sizeof ELF Phdr wrong\n"); 134 goto bad; 135 } 136 137 phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum; 138 e->p = VG_(malloc)("ume.re.2", phsz); 139 vg_assert(e->p); 140 141 sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff); 142 if (sr_isError(sres) || sr_Res(sres) != phsz) { 143 VG_(printf)("valgrind: can't read phdr: %s\n", 144 VG_(strerror)(sr_Err(sres))); 145 VG_(free)(e->p); 146 goto bad; 147 } 148 149 return e; 150 151 bad: 152 VG_(free)(e); 153 return NULL; 154 } 155 156 /* Map an ELF file. Returns the brk address. */ 157 static 158 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base) 159 { 160 Int i; 161 SysRes res; 162 ESZ(Addr) elfbrk = 0; 163 164 for (i = 0; i < e->e.e_phnum; i++) { 165 ESZ(Phdr) *ph = &e->p[i]; 166 ESZ(Addr) addr, brkaddr; 167 ESZ(Word) memsz; 168 169 if (ph->p_type != PT_LOAD) 170 continue; 171 172 addr = ph->p_vaddr+base; 173 memsz = ph->p_memsz; 174 brkaddr = addr+memsz; 175 176 if (brkaddr > elfbrk) 177 elfbrk = brkaddr; 178 } 179 180 for (i = 0; i < e->e.e_phnum; i++) { 181 ESZ(Phdr) *ph = &e->p[i]; 182 ESZ(Addr) addr, bss, brkaddr; 183 ESZ(Off) off; 184 ESZ(Word) filesz; 185 ESZ(Word) memsz; 186 unsigned prot = 0; 187 188 if (ph->p_type != PT_LOAD) 189 continue; 190 191 if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC; 192 if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE; 193 if (ph->p_flags & PF_R) prot |= VKI_PROT_READ; 194 195 addr = ph->p_vaddr+base; 196 off = ph->p_offset; 197 filesz = ph->p_filesz; 198 bss = addr+filesz; 199 memsz = ph->p_memsz; 200 brkaddr = addr+memsz; 201 202 // Tom says: In the following, do what the Linux kernel does and only 203 // map the pages that are required instead of rounding everything to 204 // the specified alignment (ph->p_align). (AMD64 doesn't work if you 205 // use ph->p_align -- part of stage2's memory gets trashed somehow.) 206 // 207 // The condition handles the case of a zero-length segment. 208 if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) { 209 if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n"); 210 res = VG_(am_mmap_file_fixed_client)( 211 VG_PGROUNDDN(addr), 212 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr), 213 prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */ 214 e->fd, VG_PGROUNDDN(off) 215 ); 216 if (0) VG_(am_show_nsegments)(0,"after #1"); 217 check_mmap(res, VG_PGROUNDDN(addr), 218 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr)); 219 } 220 221 // if memsz > filesz, fill the remainder with zeroed pages 222 if (memsz > filesz) { 223 UInt bytes; 224 225 bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss); 226 if (bytes > 0) { 227 if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n"); 228 res = VG_(am_mmap_anon_fixed_client)( 229 VG_PGROUNDUP(bss), bytes, 230 prot 231 ); 232 if (0) VG_(am_show_nsegments)(0,"after #2"); 233 check_mmap(res, VG_PGROUNDUP(bss), bytes); 234 } 235 236 bytes = bss & (VKI_PAGE_SIZE - 1); 237 238 // The 'prot' condition allows for a read-only bss 239 if ((prot & VKI_PROT_WRITE) && (bytes > 0)) { 240 bytes = VKI_PAGE_SIZE - bytes; 241 VG_(memset)((char *)bss, 0, bytes); 242 } 243 } 244 } 245 246 return elfbrk; 247 } 248 249 Bool VG_(match_ELF)(Char *hdr, Int len) 250 { 251 ESZ(Ehdr) *e = (ESZ(Ehdr) *)hdr; 252 return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0; 253 } 254 255 256 /* load_ELF pulls an ELF executable into the address space, prepares 257 it for execution, and writes info about it into INFO. In 258 particular it fills in .init_eip, which is the starting point. 259 260 Returns zero on success, non-zero (a VKI_E.. value) on failure. 261 262 The sequence of activities is roughly as follows: 263 264 - use readelf() to extract program header info from the exe file. 265 266 - scan the program header, collecting info (not sure what all those 267 info-> fields are, or whether they are used, but still) and in 268 particular looking out fo the PT_INTERP header, which describes 269 the interpreter. If such a field is found, the space needed to 270 hold the interpreter is computed into interp_size. 271 272 - map the executable in, by calling mapelf(). This maps in all 273 loadable sections, and I _think_ also creates any .bss areas 274 required. mapelf() returns the address just beyond the end of 275 the furthest-along mapping it creates. The executable is mapped 276 starting at EBASE, which is usually read from it (eg, 0x8048000 277 etc) except if it's a PIE, in which case I'm not sure what 278 happens. 279 280 The returned address is recorded in info->brkbase as the start 281 point of the brk (data) segment, as it is traditional to place 282 the data segment just after the executable. Neither load_ELF nor 283 mapelf creates the brk segment, though: that is for the caller of 284 load_ELF to attend to. 285 286 - If the initial phdr scan didn't find any mention of an 287 interpreter (interp == NULL), this must be a statically linked 288 executable, and we're pretty much done. 289 290 - Otherwise, we need to use mapelf() a second time to load the 291 interpreter. The interpreter can go anywhere, but mapelf() wants 292 to be told a specific address to put it at. So an advisory query 293 is passed to aspacem, asking where it would put an anonymous 294 client mapping of size INTERP_SIZE. That address is then used 295 as the mapping address for the interpreter. 296 297 - The entry point in INFO is set to the interpreter's entry point, 298 and we're done. */ 299 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info) 300 { 301 SysRes sres; 302 struct elfinfo *e; 303 struct elfinfo *interp = NULL; 304 ESZ(Addr) minaddr = ~0; /* lowest mapped address */ 305 ESZ(Addr) maxaddr = 0; /* highest mapped address */ 306 ESZ(Addr) interp_addr = 0; /* interpreter (ld.so) address */ 307 ESZ(Word) interp_size = 0; /* interpreter size */ 308 /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */ 309 Int i; 310 void *entry; 311 ESZ(Addr) ebase = 0; 312 313 /* The difference between where the interpreter got mapped and 314 where it asked to be mapped. Needed for computing the ppc64 ELF 315 entry point and initial tocptr (R2) value. */ 316 ESZ(Word) interp_offset = 0; 317 318 #ifdef HAVE_PIE 319 ebase = info->exe_base; 320 #endif 321 322 e = readelf(fd, name); 323 324 if (e == NULL) 325 return VKI_ENOEXEC; 326 327 /* The kernel maps position-independent executables at TASK_SIZE*2/3; 328 duplicate this behavior as close as we can. */ 329 if (e->e.e_type == ET_DYN && ebase == 0) { 330 ebase = VG_PGROUNDDN(info->exe_base 331 + (info->exe_end - info->exe_base) * 2 / 3); 332 /* We really don't want to load PIEs at zero or too close. It 333 works, but it's unrobust (NULL pointer reads and writes 334 become legit, which is really bad) and causes problems for 335 exp-ptrcheck, which assumes all numbers below 1MB are 336 nonpointers. So, hackily, move it above 1MB. */ 337 /* Later .. is appears ppc32-linux tries to put [vdso] at 1MB, 338 which totally screws things up, because nothing else can go 339 there. So bump the hacky load addess along by 0x8000, to 340 0x108000. */ 341 if (ebase < 0x108000) 342 ebase = 0x108000; 343 } 344 345 info->phnum = e->e.e_phnum; 346 info->entry = e->e.e_entry + ebase; 347 info->phdr = 0; 348 349 for (i = 0; i < e->e.e_phnum; i++) { 350 ESZ(Phdr) *ph = &e->p[i]; 351 352 switch(ph->p_type) { 353 case PT_PHDR: 354 info->phdr = ph->p_vaddr + ebase; 355 break; 356 357 case PT_LOAD: 358 if (ph->p_vaddr < minaddr) 359 minaddr = ph->p_vaddr; 360 if (ph->p_vaddr+ph->p_memsz > maxaddr) 361 maxaddr = ph->p_vaddr+ph->p_memsz; 362 break; 363 364 case PT_INTERP: { 365 HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1); 366 Int j; 367 Int intfd; 368 Int baseaddr_set; 369 370 vg_assert(buf); 371 VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset); 372 buf[ph->p_filesz] = '\0'; 373 374 sres = VG_(open)(buf, VKI_O_RDONLY, 0); 375 if (sr_isError(sres)) { 376 VG_(printf)("valgrind: m_ume.c: can't open interpreter\n"); 377 VG_(exit)(1); 378 } 379 intfd = sr_Res(sres); 380 381 interp = readelf(intfd, buf); 382 if (interp == NULL) { 383 VG_(printf)("valgrind: m_ume.c: can't read interpreter\n"); 384 return 1; 385 } 386 VG_(free)(buf); 387 388 baseaddr_set = 0; 389 for (j = 0; j < interp->e.e_phnum; j++) { 390 ESZ(Phdr) *iph = &interp->p[j]; 391 ESZ(Addr) end; 392 393 if (iph->p_type != PT_LOAD || iph->p_memsz == 0) 394 continue; 395 396 if (!baseaddr_set) { 397 interp_addr = iph->p_vaddr; 398 /* interp_align = iph->p_align; */ /* UNUSED */ 399 baseaddr_set = 1; 400 } 401 402 /* assumes that all segments in the interp are close */ 403 end = (iph->p_vaddr - interp_addr) + iph->p_memsz; 404 405 if (end > interp_size) 406 interp_size = end; 407 } 408 break; 409 410 default: 411 // do nothing 412 break; 413 } 414 } 415 } 416 417 if (info->phdr == 0) 418 info->phdr = minaddr + ebase + e->e.e_phoff; 419 420 if (info->exe_base != info->exe_end) { 421 if (minaddr >= maxaddr || 422 (minaddr + ebase < info->exe_base || 423 maxaddr + ebase > info->exe_end)) { 424 VG_(printf)("Executable range %p-%p is outside the\n" 425 "acceptable range %p-%p\n", 426 (char *)minaddr + ebase, (char *)maxaddr + ebase, 427 (char *)info->exe_base, (char *)info->exe_end); 428 return VKI_ENOMEM; 429 } 430 } 431 432 info->brkbase = mapelf(e, ebase); /* map the executable */ 433 434 if (info->brkbase == 0) 435 return VKI_ENOMEM; 436 437 if (interp != NULL) { 438 /* reserve a chunk of address space for interpreter */ 439 MapRequest mreq; 440 Addr advised; 441 Bool ok; 442 443 /* Don't actually reserve the space. Just get an advisory 444 indicating where it would be allocated, and pass that to 445 mapelf(), which in turn asks aspacem to do some fixed maps at 446 the specified address. This is a bit of hack, but it should 447 work because there should be no intervening transactions with 448 aspacem which could cause those fixed maps to fail. 449 450 Placement policy is: 451 452 if the interpreter asks to be loaded at zero 453 ignore that and put it wherever we like (mappings at zero 454 are bad news) 455 else 456 try and put it where it asks for, but if that doesn't work, 457 just put it anywhere. 458 */ 459 if (interp_addr == 0) { 460 mreq.rkind = MAny; 461 mreq.start = 0; 462 mreq.len = interp_size; 463 } else { 464 mreq.rkind = MHint; 465 mreq.start = interp_addr; 466 mreq.len = interp_size; 467 } 468 469 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok ); 470 471 if (!ok) { 472 /* bomb out */ 473 SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL); 474 if (0) VG_(printf)("reserve for interp: failed\n"); 475 check_mmap(res, (Addr)interp_addr, interp_size); 476 /*NOTREACHED*/ 477 } 478 479 (void)mapelf(interp, (ESZ(Addr))advised - interp_addr); 480 481 VG_(close)(interp->fd); 482 483 entry = (void *)(advised - interp_addr + interp->e.e_entry); 484 info->interp_base = (ESZ(Addr))advised; 485 interp_offset = advised - interp_addr; 486 487 VG_(free)(interp->p); 488 VG_(free)(interp); 489 } else 490 entry = (void *)(ebase + e->e.e_entry); 491 492 info->exe_base = minaddr + ebase; 493 info->exe_end = maxaddr + ebase; 494 495 #if defined(VGP_ppc64_linux) 496 /* On PPC64, a func ptr is represented by a TOC entry ptr. This 497 TOC entry contains three words; the first word is the function 498 address, the second word is the TOC ptr (r2), and the third word 499 is the static chain value. */ 500 info->init_ip = ((ULong*)entry)[0]; 501 info->init_toc = ((ULong*)entry)[1]; 502 info->init_ip += interp_offset; 503 info->init_toc += interp_offset; 504 #else 505 info->init_ip = (Addr)entry; 506 info->init_toc = 0; /* meaningless on this platform */ 507 (void) interp_offset; /* stop gcc complaining it is unused */ 508 #endif 509 VG_(free)(e->p); 510 VG_(free)(e); 511 512 return 0; 513 } 514 515 #endif // defined(VGO_linux) 516 517 /*--------------------------------------------------------------------*/ 518 /*--- end ---*/ 519 /*--------------------------------------------------------------------*/ 520