Home | History | Annotate | Download | only in m_ume
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2013 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #if defined(VGO_linux)
     32 
     33 #include "pub_core_basics.h"
     34 #include "pub_core_vki.h"
     35 
     36 #include "pub_core_aspacemgr.h"     // various mapping fns
     37 #include "pub_core_debuglog.h"
     38 #include "pub_core_libcassert.h"    // VG_(exit), vg_assert
     39 #include "pub_core_libcbase.h"      // VG_(memcmp), etc
     40 #include "pub_core_libcprint.h"
     41 #include "pub_core_libcfile.h"      // VG_(open) et al
     42 #include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
     43 #include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
     44 #include "pub_core_syscall.h"       // VG_(strerror)
     45 #include "pub_core_ume.h"           // self
     46 
     47 #include "priv_ume.h"
     48 
     49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
     50 #define _GNU_SOURCE
     51 #define _FILE_OFFSET_BITS 64
     52 /* This is for ELF types etc, and also the AT_ constants. */
     53 #include <elf.h>
     54 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
     55 
     56 
     57 #if     VG_WORDSIZE == 8
     58 #define ESZ(x)  Elf64_##x
     59 #elif   VG_WORDSIZE == 4
     60 #define ESZ(x)  Elf32_##x
     61 #else
     62 #error VG_WORDSIZE needs to ==4 or ==8
     63 #endif
     64 
     65 struct elfinfo
     66 {
     67    ESZ(Ehdr)    e;
     68    ESZ(Phdr)    *p;
     69    Int          fd;
     70 };
     71 
     72 static void check_mmap(SysRes res, Addr base, SizeT len)
     73 {
     74    if (sr_isError(res)) {
     75       VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
     76                   "with error %lu (%s).\n",
     77                   (ULong)base, (Long)len,
     78                   sr_Err(res), VG_(strerror)(sr_Err(res)) );
     79       if (sr_Err(res) == VKI_EINVAL) {
     80          VG_(printf)("valgrind: this can be caused by executables with "
     81                      "very large text, data or bss segments.\n");
     82       }
     83       VG_(exit)(1);
     84    }
     85 }
     86 
     87 /*------------------------------------------------------------*/
     88 /*--- Loading ELF files                                    ---*/
     89 /*------------------------------------------------------------*/
     90 
     91 static
     92 struct elfinfo *readelf(Int fd, const HChar *filename)
     93 {
     94    SysRes sres;
     95    struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
     96    Int phsz;
     97 
     98    vg_assert(e);
     99    e->fd = fd;
    100 
    101    sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
    102    if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
    103       VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
    104                   filename, VG_(strerror)(sr_Err(sres)));
    105       goto bad;
    106    }
    107 
    108    if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
    109       VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
    110       goto bad;
    111    }
    112    if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
    113       VG_(printf)("valgrind: wrong ELF executable class "
    114                   "(eg. 32-bit instead of 64-bit)\n");
    115       goto bad;
    116    }
    117    if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
    118       VG_(printf)("valgrind: executable has wrong endian-ness\n");
    119       goto bad;
    120    }
    121    if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
    122       VG_(printf)("valgrind: this is not an executable\n");
    123       goto bad;
    124    }
    125 
    126    if (e->e.e_machine != VG_ELF_MACHINE) {
    127       VG_(printf)("valgrind: executable is not for "
    128                   "this architecture\n");
    129       goto bad;
    130    }
    131 
    132    if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
    133       VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
    134       goto bad;
    135    }
    136 
    137    phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
    138    e->p = VG_(malloc)("ume.re.2", phsz);
    139    vg_assert(e->p);
    140 
    141    sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
    142    if (sr_isError(sres) || sr_Res(sres) != phsz) {
    143       VG_(printf)("valgrind: can't read phdr: %s\n",
    144                   VG_(strerror)(sr_Err(sres)));
    145       VG_(free)(e->p);
    146       goto bad;
    147    }
    148 
    149    return e;
    150 
    151   bad:
    152    VG_(free)(e);
    153    return NULL;
    154 }
    155 
    156 /* Map an ELF file.  Returns the brk address. */
    157 static
    158 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
    159 {
    160    Int    i;
    161    SysRes res;
    162    ESZ(Addr) elfbrk = 0;
    163 
    164    for (i = 0; i < e->e.e_phnum; i++) {
    165       ESZ(Phdr) *ph = &e->p[i];
    166       ESZ(Addr) addr, brkaddr;
    167       ESZ(Word) memsz;
    168 
    169       if (ph->p_type != PT_LOAD)
    170          continue;
    171 
    172       addr    = ph->p_vaddr+base;
    173       memsz   = ph->p_memsz;
    174       brkaddr = addr+memsz;
    175 
    176       if (brkaddr > elfbrk)
    177          elfbrk = brkaddr;
    178    }
    179 
    180    for (i = 0; i < e->e.e_phnum; i++) {
    181       ESZ(Phdr) *ph = &e->p[i];
    182       ESZ(Addr) addr, bss, brkaddr;
    183       ESZ(Off) off;
    184       ESZ(Word) filesz;
    185       ESZ(Word) memsz;
    186       unsigned prot = 0;
    187 
    188       if (ph->p_type != PT_LOAD)
    189          continue;
    190 
    191       if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
    192       if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
    193       if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
    194 
    195       addr    = ph->p_vaddr+base;
    196       off     = ph->p_offset;
    197       filesz  = ph->p_filesz;
    198       bss     = addr+filesz;
    199       memsz   = ph->p_memsz;
    200       brkaddr = addr+memsz;
    201 
    202       // Tom says: In the following, do what the Linux kernel does and only
    203       // map the pages that are required instead of rounding everything to
    204       // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
    205       // use ph->p_align -- part of stage2's memory gets trashed somehow.)
    206       //
    207       // The condition handles the case of a zero-length segment.
    208       if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
    209          if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
    210          res = VG_(am_mmap_file_fixed_client)(
    211                   VG_PGROUNDDN(addr),
    212                   VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
    213                   prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
    214                   e->fd, VG_PGROUNDDN(off)
    215                );
    216          if (0) VG_(am_show_nsegments)(0,"after #1");
    217          check_mmap(res, VG_PGROUNDDN(addr),
    218                          VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
    219       }
    220 
    221       // if memsz > filesz, fill the remainder with zeroed pages
    222       if (memsz > filesz) {
    223          UInt bytes;
    224 
    225          bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
    226          if (bytes > 0) {
    227             if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
    228             res = VG_(am_mmap_anon_fixed_client)(
    229                      VG_PGROUNDUP(bss), bytes,
    230                      prot
    231                   );
    232             if (0) VG_(am_show_nsegments)(0,"after #2");
    233             check_mmap(res, VG_PGROUNDUP(bss), bytes);
    234          }
    235 
    236          bytes = bss & (VKI_PAGE_SIZE - 1);
    237 
    238          // The 'prot' condition allows for a read-only bss
    239          if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
    240             bytes = VKI_PAGE_SIZE - bytes;
    241             VG_(memset)((void *)bss, 0, bytes);
    242          }
    243       }
    244    }
    245 
    246    return elfbrk;
    247 }
    248 
    249 Bool VG_(match_ELF)(const void *hdr, Int len)
    250 {
    251    const ESZ(Ehdr) *e = hdr;
    252    return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
    253 }
    254 
    255 
    256 /* load_ELF pulls an ELF executable into the address space, prepares
    257    it for execution, and writes info about it into INFO.  In
    258    particular it fills in .init_eip, which is the starting point.
    259 
    260    Returns zero on success, non-zero (a VKI_E.. value) on failure.
    261 
    262    The sequence of activities is roughly as follows:
    263 
    264    - use readelf() to extract program header info from the exe file.
    265 
    266    - scan the program header, collecting info (not sure what all those
    267      info-> fields are, or whether they are used, but still) and in
    268      particular looking out fo the PT_INTERP header, which describes
    269      the interpreter.  If such a field is found, the space needed to
    270      hold the interpreter is computed into interp_size.
    271 
    272    - map the executable in, by calling mapelf().  This maps in all
    273      loadable sections, and I _think_ also creates any .bss areas
    274      required.  mapelf() returns the address just beyond the end of
    275      the furthest-along mapping it creates.  The executable is mapped
    276      starting at EBASE, which is usually read from it (eg, 0x8048000
    277      etc) except if it's a PIE, in which case I'm not sure what
    278      happens.
    279 
    280      The returned address is recorded in info->brkbase as the start
    281      point of the brk (data) segment, as it is traditional to place
    282      the data segment just after the executable.  Neither load_ELF nor
    283      mapelf creates the brk segment, though: that is for the caller of
    284      load_ELF to attend to.
    285 
    286    - If the initial phdr scan didn't find any mention of an
    287      interpreter (interp == NULL), this must be a statically linked
    288      executable, and we're pretty much done.
    289 
    290    - Otherwise, we need to use mapelf() a second time to load the
    291      interpreter.  The interpreter can go anywhere, but mapelf() wants
    292      to be told a specific address to put it at.  So an advisory query
    293      is passed to aspacem, asking where it would put an anonymous
    294      client mapping of size INTERP_SIZE.  That address is then used
    295      as the mapping address for the interpreter.
    296 
    297    - The entry point in INFO is set to the interpreter's entry point,
    298      and we're done.  */
    299 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
    300 {
    301    SysRes sres;
    302    struct elfinfo *e;
    303    struct elfinfo *interp = NULL;
    304    ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
    305    ESZ(Addr) maxaddr = 0;       /* highest mapped address */
    306    ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
    307    ESZ(Word) interp_size = 0;   /* interpreter size */
    308    /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
    309    Int i;
    310    void *entry;
    311    ESZ(Addr) ebase = 0;
    312 
    313 #  if defined(HAVE_PIE)
    314    ebase = info->exe_base;
    315 #  endif
    316 
    317    e = readelf(fd, name);
    318 
    319    if (e == NULL)
    320       return VKI_ENOEXEC;
    321 
    322    /* The kernel maps position-independent executables at TASK_SIZE*2/3;
    323       duplicate this behavior as close as we can. */
    324    if (e->e.e_type == ET_DYN && ebase == 0) {
    325       ebase = VG_PGROUNDDN(info->exe_base
    326                            + (info->exe_end - info->exe_base) * 2 / 3);
    327       /* We really don't want to load PIEs at zero or too close.  It
    328          works, but it's unrobust (NULL pointer reads and writes
    329          become legit, which is really bad) and causes problems for
    330          exp-ptrcheck, which assumes all numbers below 1MB are
    331          nonpointers.  So, hackily, move it above 1MB. */
    332       /* Later .. it appears ppc32-linux tries to put [vdso] at 1MB,
    333          which totally screws things up, because nothing else can go
    334          there.  The size of [vdso] is around 2 or 3 pages, so bump
    335          the hacky load addess along by 8 * VKI_PAGE_SIZE to be safe. */
    336       /* Later .. on mips64 we can't use 0x108000, because mapelf will
    337          fail. */
    338 #     if defined(VGP_mips64_linux)
    339       if (ebase < 0x100000)
    340          ebase = 0x100000;
    341 #     else
    342       vg_assert(VKI_PAGE_SIZE >= 4096); /* stay sane */
    343       ESZ(Addr) hacky_load_address = 0x100000 + 8 * VKI_PAGE_SIZE;
    344       if (ebase < hacky_load_address)
    345          ebase = hacky_load_address;
    346 #     endif
    347    }
    348 
    349    info->phnum = e->e.e_phnum;
    350    info->entry = e->e.e_entry + ebase;
    351    info->phdr = 0;
    352    info->stack_prot = VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC;
    353 
    354    for (i = 0; i < e->e.e_phnum; i++) {
    355       ESZ(Phdr) *ph = &e->p[i];
    356 
    357       switch(ph->p_type) {
    358       case PT_PHDR:
    359          info->phdr = ph->p_vaddr + ebase;
    360          break;
    361 
    362       case PT_LOAD:
    363          if (ph->p_vaddr < minaddr)
    364             minaddr = ph->p_vaddr;
    365          if (ph->p_vaddr+ph->p_memsz > maxaddr)
    366             maxaddr = ph->p_vaddr+ph->p_memsz;
    367          break;
    368 
    369       case PT_INTERP: {
    370          HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
    371          Int j;
    372          Int intfd;
    373          Int baseaddr_set;
    374 
    375          vg_assert(buf);
    376          VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
    377          buf[ph->p_filesz] = '\0';
    378 
    379          sres = VG_(open)(buf, VKI_O_RDONLY, 0);
    380          if (sr_isError(sres)) {
    381             VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
    382             VG_(exit)(1);
    383          }
    384          intfd = sr_Res(sres);
    385 
    386          interp = readelf(intfd, buf);
    387          if (interp == NULL) {
    388             VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
    389             return 1;
    390          }
    391          VG_(free)(buf);
    392 
    393          baseaddr_set = 0;
    394          for (j = 0; j < interp->e.e_phnum; j++) {
    395             ESZ(Phdr) *iph = &interp->p[j];
    396             ESZ(Addr) end;
    397 
    398             if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
    399                continue;
    400 
    401             if (!baseaddr_set) {
    402                interp_addr  = iph->p_vaddr;
    403                /* interp_align = iph->p_align; */ /* UNUSED */
    404                baseaddr_set = 1;
    405             }
    406 
    407             /* assumes that all segments in the interp are close */
    408             end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
    409 
    410             if (end > interp_size)
    411                interp_size = end;
    412          }
    413          break;
    414 
    415 #     if defined(PT_GNU_STACK)
    416       /* Android's elf.h doesn't appear to have PT_GNU_STACK. */
    417       case PT_GNU_STACK:
    418          if ((ph->p_flags & PF_X) == 0) info->stack_prot &= ~VKI_PROT_EXEC;
    419          if ((ph->p_flags & PF_W) == 0) info->stack_prot &= ~VKI_PROT_WRITE;
    420          if ((ph->p_flags & PF_R) == 0) info->stack_prot &= ~VKI_PROT_READ;
    421          break;
    422 #     endif
    423 
    424       default:
    425          // do nothing
    426          break;
    427       }
    428       }
    429    }
    430 
    431    if (info->phdr == 0)
    432       info->phdr = minaddr + ebase + e->e.e_phoff;
    433 
    434    if (info->exe_base != info->exe_end) {
    435       if (minaddr >= maxaddr ||
    436           (minaddr + ebase < info->exe_base ||
    437            maxaddr + ebase > info->exe_end)) {
    438          VG_(printf)("Executable range %p-%p is outside the\n"
    439                      "acceptable range %p-%p\n",
    440                      (char *)minaddr + ebase, (char *)maxaddr + ebase,
    441                      (char *)info->exe_base,  (char *)info->exe_end);
    442          return VKI_ENOMEM;
    443       }
    444    }
    445 
    446    info->brkbase = mapelf(e, ebase);    /* map the executable */
    447 
    448    if (info->brkbase == 0)
    449       return VKI_ENOMEM;
    450 
    451    if (interp != NULL) {
    452       /* reserve a chunk of address space for interpreter */
    453       MapRequest mreq;
    454       Addr       advised;
    455       Bool       ok;
    456 
    457       /* Don't actually reserve the space.  Just get an advisory
    458          indicating where it would be allocated, and pass that to
    459          mapelf(), which in turn asks aspacem to do some fixed maps at
    460          the specified address.  This is a bit of hack, but it should
    461          work because there should be no intervening transactions with
    462          aspacem which could cause those fixed maps to fail.
    463 
    464          Placement policy is:
    465 
    466          if the interpreter asks to be loaded at zero
    467             ignore that and put it wherever we like (mappings at zero
    468             are bad news)
    469          else
    470             try and put it where it asks for, but if that doesn't work,
    471             just put it anywhere.
    472       */
    473       if (interp_addr == 0) {
    474          mreq.rkind = MAny;
    475          mreq.start = 0;
    476          mreq.len   = interp_size;
    477       } else {
    478          mreq.rkind = MHint;
    479          mreq.start = interp_addr;
    480          mreq.len   = interp_size;
    481       }
    482 
    483       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
    484 
    485       if (!ok) {
    486          /* bomb out */
    487          SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
    488          if (0) VG_(printf)("reserve for interp: failed\n");
    489          check_mmap(res, (Addr)interp_addr, interp_size);
    490          /*NOTREACHED*/
    491       }
    492 
    493       (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
    494 
    495       VG_(close)(interp->fd);
    496 
    497       entry = (void *)(advised - interp_addr + interp->e.e_entry);
    498       info->interp_offset = advised - interp_addr;
    499 
    500       VG_(free)(interp->p);
    501       VG_(free)(interp);
    502    } else
    503       entry = (void *)(ebase + e->e.e_entry);
    504 
    505    info->exe_base = minaddr + ebase;
    506    info->exe_end  = maxaddr + ebase;
    507 
    508 #if defined(VGP_ppc64_linux)
    509    /* On PPC64, a func ptr is represented by a TOC entry ptr.  This
    510       TOC entry contains three words; the first word is the function
    511       address, the second word is the TOC ptr (r2), and the third word
    512       is the static chain value. */
    513    info->init_ip  = ((ULong*)entry)[0];
    514    info->init_toc = ((ULong*)entry)[1];
    515    info->init_ip  += info->interp_offset;
    516    info->init_toc += info->interp_offset;
    517 #else
    518    info->init_ip  = (Addr)entry;
    519    info->init_toc = 0; /* meaningless on this platform */
    520 #endif
    521    VG_(free)(e->p);
    522    VG_(free)(e);
    523 
    524    return 0;
    525 }
    526 
    527 #endif // defined(VGO_linux)
    528 
    529 /*--------------------------------------------------------------------*/
    530 /*--- end                                                          ---*/
    531 /*--------------------------------------------------------------------*/
    532