Home | History | Annotate | Download | only in m_ume
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2012 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #if defined(VGO_linux)
     32 
     33 #include "pub_core_basics.h"
     34 #include "pub_core_vki.h"
     35 
     36 #include "pub_core_aspacemgr.h"     // various mapping fns
     37 #include "pub_core_debuglog.h"
     38 #include "pub_core_libcassert.h"    // VG_(exit), vg_assert
     39 #include "pub_core_libcbase.h"      // VG_(memcmp), etc
     40 #include "pub_core_libcprint.h"
     41 #include "pub_core_libcfile.h"      // VG_(open) et al
     42 #include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
     43 #include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
     44 #include "pub_core_syscall.h"       // VG_(strerror)
     45 #include "pub_core_ume.h"           // self
     46 
     47 #include "priv_ume.h"
     48 
     49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
     50 #define _GNU_SOURCE
     51 #define _FILE_OFFSET_BITS 64
     52 /* This is for ELF types etc, and also the AT_ constants. */
     53 #include <elf.h>
     54 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
     55 
     56 
     57 #if     VG_WORDSIZE == 8
     58 #define ESZ(x)  Elf64_##x
     59 #elif   VG_WORDSIZE == 4
     60 #define ESZ(x)  Elf32_##x
     61 #else
     62 #error VG_WORDSIZE needs to ==4 or ==8
     63 #endif
     64 
     65 struct elfinfo
     66 {
     67    ESZ(Ehdr)    e;
     68    ESZ(Phdr)    *p;
     69    Int          fd;
     70 };
     71 
     72 static void check_mmap(SysRes res, Addr base, SizeT len)
     73 {
     74    if (sr_isError(res)) {
     75       VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
     76                   "with error %lu (%s).\n",
     77                   (ULong)base, (Long)len,
     78                   sr_Err(res), VG_(strerror)(sr_Err(res)) );
     79       if (sr_Err(res) == VKI_EINVAL) {
     80          VG_(printf)("valgrind: this can be caused by executables with "
     81                      "very large text, data or bss segments.\n");
     82       }
     83       VG_(exit)(1);
     84    }
     85 }
     86 
     87 /*------------------------------------------------------------*/
     88 /*--- Loading ELF files                                    ---*/
     89 /*------------------------------------------------------------*/
     90 
     91 static
     92 struct elfinfo *readelf(Int fd, const char *filename)
     93 {
     94    SysRes sres;
     95    struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
     96    Int phsz;
     97 
     98    vg_assert(e);
     99    e->fd = fd;
    100 
    101    sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
    102    if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
    103       VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
    104                   filename, VG_(strerror)(sr_Err(sres)));
    105       goto bad;
    106    }
    107 
    108    if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
    109       VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
    110       goto bad;
    111    }
    112    if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
    113       VG_(printf)("valgrind: wrong ELF executable class "
    114                   "(eg. 32-bit instead of 64-bit)\n");
    115       goto bad;
    116    }
    117    if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
    118       VG_(printf)("valgrind: executable has wrong endian-ness\n");
    119       goto bad;
    120    }
    121    if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
    122       VG_(printf)("valgrind: this is not an executable\n");
    123       goto bad;
    124    }
    125 
    126    if (e->e.e_machine != VG_ELF_MACHINE) {
    127       VG_(printf)("valgrind: executable is not for "
    128                   "this architecture\n");
    129       goto bad;
    130    }
    131 
    132    if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
    133       VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
    134       goto bad;
    135    }
    136 
    137    phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
    138    e->p = VG_(malloc)("ume.re.2", phsz);
    139    vg_assert(e->p);
    140 
    141    sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
    142    if (sr_isError(sres) || sr_Res(sres) != phsz) {
    143       VG_(printf)("valgrind: can't read phdr: %s\n",
    144                   VG_(strerror)(sr_Err(sres)));
    145       VG_(free)(e->p);
    146       goto bad;
    147    }
    148 
    149    return e;
    150 
    151   bad:
    152    VG_(free)(e);
    153    return NULL;
    154 }
    155 
    156 /* Map an ELF file.  Returns the brk address. */
    157 static
    158 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
    159 {
    160    Int    i;
    161    SysRes res;
    162    ESZ(Addr) elfbrk = 0;
    163 
    164    for (i = 0; i < e->e.e_phnum; i++) {
    165       ESZ(Phdr) *ph = &e->p[i];
    166       ESZ(Addr) addr, brkaddr;
    167       ESZ(Word) memsz;
    168 
    169       if (ph->p_type != PT_LOAD)
    170          continue;
    171 
    172       addr    = ph->p_vaddr+base;
    173       memsz   = ph->p_memsz;
    174       brkaddr = addr+memsz;
    175 
    176       if (brkaddr > elfbrk)
    177          elfbrk = brkaddr;
    178    }
    179 
    180    for (i = 0; i < e->e.e_phnum; i++) {
    181       ESZ(Phdr) *ph = &e->p[i];
    182       ESZ(Addr) addr, bss, brkaddr;
    183       ESZ(Off) off;
    184       ESZ(Word) filesz;
    185       ESZ(Word) memsz;
    186       unsigned prot = 0;
    187 
    188       if (ph->p_type != PT_LOAD)
    189          continue;
    190 
    191       if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
    192       if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
    193       if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
    194 
    195       addr    = ph->p_vaddr+base;
    196       off     = ph->p_offset;
    197       filesz  = ph->p_filesz;
    198       bss     = addr+filesz;
    199       memsz   = ph->p_memsz;
    200       brkaddr = addr+memsz;
    201 
    202       // Tom says: In the following, do what the Linux kernel does and only
    203       // map the pages that are required instead of rounding everything to
    204       // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
    205       // use ph->p_align -- part of stage2's memory gets trashed somehow.)
    206       //
    207       // The condition handles the case of a zero-length segment.
    208       if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
    209          if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
    210          res = VG_(am_mmap_file_fixed_client)(
    211                   VG_PGROUNDDN(addr),
    212                   VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
    213                   prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
    214                   e->fd, VG_PGROUNDDN(off)
    215                );
    216          if (0) VG_(am_show_nsegments)(0,"after #1");
    217          check_mmap(res, VG_PGROUNDDN(addr),
    218                          VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
    219       }
    220 
    221       // if memsz > filesz, fill the remainder with zeroed pages
    222       if (memsz > filesz) {
    223          UInt bytes;
    224 
    225          bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
    226          if (bytes > 0) {
    227             if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
    228             res = VG_(am_mmap_anon_fixed_client)(
    229                      VG_PGROUNDUP(bss), bytes,
    230                      prot
    231                   );
    232             if (0) VG_(am_show_nsegments)(0,"after #2");
    233             check_mmap(res, VG_PGROUNDUP(bss), bytes);
    234          }
    235 
    236          bytes = bss & (VKI_PAGE_SIZE - 1);
    237 
    238          // The 'prot' condition allows for a read-only bss
    239          if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
    240             bytes = VKI_PAGE_SIZE - bytes;
    241             VG_(memset)((char *)bss, 0, bytes);
    242          }
    243       }
    244    }
    245 
    246    return elfbrk;
    247 }
    248 
    249 Bool VG_(match_ELF)(Char *hdr, Int len)
    250 {
    251    ESZ(Ehdr) *e = (ESZ(Ehdr) *)hdr;
    252    return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
    253 }
    254 
    255 
    256 /* load_ELF pulls an ELF executable into the address space, prepares
    257    it for execution, and writes info about it into INFO.  In
    258    particular it fills in .init_eip, which is the starting point.
    259 
    260    Returns zero on success, non-zero (a VKI_E.. value) on failure.
    261 
    262    The sequence of activities is roughly as follows:
    263 
    264    - use readelf() to extract program header info from the exe file.
    265 
    266    - scan the program header, collecting info (not sure what all those
    267      info-> fields are, or whether they are used, but still) and in
    268      particular looking out fo the PT_INTERP header, which describes
    269      the interpreter.  If such a field is found, the space needed to
    270      hold the interpreter is computed into interp_size.
    271 
    272    - map the executable in, by calling mapelf().  This maps in all
    273      loadable sections, and I _think_ also creates any .bss areas
    274      required.  mapelf() returns the address just beyond the end of
    275      the furthest-along mapping it creates.  The executable is mapped
    276      starting at EBASE, which is usually read from it (eg, 0x8048000
    277      etc) except if it's a PIE, in which case I'm not sure what
    278      happens.
    279 
    280      The returned address is recorded in info->brkbase as the start
    281      point of the brk (data) segment, as it is traditional to place
    282      the data segment just after the executable.  Neither load_ELF nor
    283      mapelf creates the brk segment, though: that is for the caller of
    284      load_ELF to attend to.
    285 
    286    - If the initial phdr scan didn't find any mention of an
    287      interpreter (interp == NULL), this must be a statically linked
    288      executable, and we're pretty much done.
    289 
    290    - Otherwise, we need to use mapelf() a second time to load the
    291      interpreter.  The interpreter can go anywhere, but mapelf() wants
    292      to be told a specific address to put it at.  So an advisory query
    293      is passed to aspacem, asking where it would put an anonymous
    294      client mapping of size INTERP_SIZE.  That address is then used
    295      as the mapping address for the interpreter.
    296 
    297    - The entry point in INFO is set to the interpreter's entry point,
    298      and we're done.  */
    299 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
    300 {
    301    SysRes sres;
    302    struct elfinfo *e;
    303    struct elfinfo *interp = NULL;
    304    ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
    305    ESZ(Addr) maxaddr = 0;       /* highest mapped address */
    306    ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
    307    ESZ(Word) interp_size = 0;   /* interpreter size */
    308    /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
    309    Int i;
    310    void *entry;
    311    ESZ(Addr) ebase = 0;
    312 
    313    /* The difference between where the interpreter got mapped and
    314       where it asked to be mapped.  Needed for computing the ppc64 ELF
    315       entry point and initial tocptr (R2) value. */
    316    ESZ(Word) interp_offset = 0;
    317 
    318 #ifdef HAVE_PIE
    319    ebase = info->exe_base;
    320 #endif
    321 
    322    e = readelf(fd, name);
    323 
    324    if (e == NULL)
    325       return VKI_ENOEXEC;
    326 
    327    /* The kernel maps position-independent executables at TASK_SIZE*2/3;
    328       duplicate this behavior as close as we can. */
    329    if (e->e.e_type == ET_DYN && ebase == 0) {
    330       ebase = VG_PGROUNDDN(info->exe_base
    331                            + (info->exe_end - info->exe_base) * 2 / 3);
    332       /* We really don't want to load PIEs at zero or too close.  It
    333          works, but it's unrobust (NULL pointer reads and writes
    334          become legit, which is really bad) and causes problems for
    335          exp-ptrcheck, which assumes all numbers below 1MB are
    336          nonpointers.  So, hackily, move it above 1MB. */
    337       /* Later .. is appears ppc32-linux tries to put [vdso] at 1MB,
    338          which totally screws things up, because nothing else can go
    339          there.  So bump the hacky load addess along by 0x8000, to
    340          0x108000. */
    341       if (ebase < 0x108000)
    342          ebase = 0x108000;
    343    }
    344 
    345    info->phnum = e->e.e_phnum;
    346    info->entry = e->e.e_entry + ebase;
    347    info->phdr = 0;
    348 
    349    for (i = 0; i < e->e.e_phnum; i++) {
    350       ESZ(Phdr) *ph = &e->p[i];
    351 
    352       switch(ph->p_type) {
    353       case PT_PHDR:
    354          info->phdr = ph->p_vaddr + ebase;
    355          break;
    356 
    357       case PT_LOAD:
    358          if (ph->p_vaddr < minaddr)
    359             minaddr = ph->p_vaddr;
    360          if (ph->p_vaddr+ph->p_memsz > maxaddr)
    361             maxaddr = ph->p_vaddr+ph->p_memsz;
    362          break;
    363 
    364       case PT_INTERP: {
    365          HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
    366          Int j;
    367          Int intfd;
    368          Int baseaddr_set;
    369 
    370          vg_assert(buf);
    371          VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
    372          buf[ph->p_filesz] = '\0';
    373 
    374          sres = VG_(open)(buf, VKI_O_RDONLY, 0);
    375          if (sr_isError(sres)) {
    376             VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
    377             VG_(exit)(1);
    378          }
    379          intfd = sr_Res(sres);
    380 
    381          interp = readelf(intfd, buf);
    382          if (interp == NULL) {
    383             VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
    384             return 1;
    385          }
    386          VG_(free)(buf);
    387 
    388          baseaddr_set = 0;
    389          for (j = 0; j < interp->e.e_phnum; j++) {
    390             ESZ(Phdr) *iph = &interp->p[j];
    391             ESZ(Addr) end;
    392 
    393             if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
    394                continue;
    395 
    396             if (!baseaddr_set) {
    397                interp_addr  = iph->p_vaddr;
    398                /* interp_align = iph->p_align; */ /* UNUSED */
    399                baseaddr_set = 1;
    400             }
    401 
    402             /* assumes that all segments in the interp are close */
    403             end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
    404 
    405             if (end > interp_size)
    406                interp_size = end;
    407          }
    408          break;
    409 
    410       default:
    411          // do nothing
    412          break;
    413       }
    414       }
    415    }
    416 
    417    if (info->phdr == 0)
    418       info->phdr = minaddr + ebase + e->e.e_phoff;
    419 
    420    if (info->exe_base != info->exe_end) {
    421       if (minaddr >= maxaddr ||
    422           (minaddr + ebase < info->exe_base ||
    423            maxaddr + ebase > info->exe_end)) {
    424          VG_(printf)("Executable range %p-%p is outside the\n"
    425                      "acceptable range %p-%p\n",
    426                      (char *)minaddr + ebase, (char *)maxaddr + ebase,
    427                      (char *)info->exe_base,  (char *)info->exe_end);
    428          return VKI_ENOMEM;
    429       }
    430    }
    431 
    432    info->brkbase = mapelf(e, ebase);    /* map the executable */
    433 
    434    if (info->brkbase == 0)
    435       return VKI_ENOMEM;
    436 
    437    if (interp != NULL) {
    438       /* reserve a chunk of address space for interpreter */
    439       MapRequest mreq;
    440       Addr       advised;
    441       Bool       ok;
    442 
    443       /* Don't actually reserve the space.  Just get an advisory
    444          indicating where it would be allocated, and pass that to
    445          mapelf(), which in turn asks aspacem to do some fixed maps at
    446          the specified address.  This is a bit of hack, but it should
    447          work because there should be no intervening transactions with
    448          aspacem which could cause those fixed maps to fail.
    449 
    450          Placement policy is:
    451 
    452          if the interpreter asks to be loaded at zero
    453             ignore that and put it wherever we like (mappings at zero
    454             are bad news)
    455          else
    456             try and put it where it asks for, but if that doesn't work,
    457             just put it anywhere.
    458       */
    459       if (interp_addr == 0) {
    460          mreq.rkind = MAny;
    461          mreq.start = 0;
    462          mreq.len   = interp_size;
    463       } else {
    464          mreq.rkind = MHint;
    465          mreq.start = interp_addr;
    466          mreq.len   = interp_size;
    467       }
    468 
    469       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
    470 
    471       if (!ok) {
    472          /* bomb out */
    473          SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
    474          if (0) VG_(printf)("reserve for interp: failed\n");
    475          check_mmap(res, (Addr)interp_addr, interp_size);
    476          /*NOTREACHED*/
    477       }
    478 
    479       (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
    480 
    481       VG_(close)(interp->fd);
    482 
    483       entry = (void *)(advised - interp_addr + interp->e.e_entry);
    484       info->interp_base = (ESZ(Addr))advised;
    485       interp_offset = advised - interp_addr;
    486 
    487       VG_(free)(interp->p);
    488       VG_(free)(interp);
    489    } else
    490       entry = (void *)(ebase + e->e.e_entry);
    491 
    492    info->exe_base = minaddr + ebase;
    493    info->exe_end  = maxaddr + ebase;
    494 
    495 #if defined(VGP_ppc64_linux)
    496    /* On PPC64, a func ptr is represented by a TOC entry ptr.  This
    497       TOC entry contains three words; the first word is the function
    498       address, the second word is the TOC ptr (r2), and the third word
    499       is the static chain value. */
    500    info->init_ip  = ((ULong*)entry)[0];
    501    info->init_toc = ((ULong*)entry)[1];
    502    info->init_ip  += interp_offset;
    503    info->init_toc += interp_offset;
    504 #else
    505    info->init_ip  = (Addr)entry;
    506    info->init_toc = 0; /* meaningless on this platform */
    507    (void) interp_offset; /* stop gcc complaining it is unused */
    508 #endif
    509    VG_(free)(e->p);
    510    VG_(free)(e);
    511 
    512    return 0;
    513 }
    514 
    515 #endif // defined(VGO_linux)
    516 
    517 /*--------------------------------------------------------------------*/
    518 /*--- end                                                          ---*/
    519 /*--------------------------------------------------------------------*/
    520