Home | History | Annotate | Download | only in m_ume
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2011 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #if defined(VGO_linux)
     32 
     33 #include "pub_core_basics.h"
     34 #include "pub_core_vki.h"
     35 
     36 #include "pub_core_aspacemgr.h"     // various mapping fns
     37 #include "pub_core_debuglog.h"
     38 #include "pub_core_libcassert.h"    // VG_(exit), vg_assert
     39 #include "pub_core_libcbase.h"      // VG_(memcmp), etc
     40 #include "pub_core_libcprint.h"
     41 #include "pub_core_libcfile.h"      // VG_(open) et al
     42 #include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
     43 #include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
     44 #include "pub_core_syscall.h"       // VG_(strerror)
     45 #include "pub_core_ume.h"           // self
     46 #include "pub_tool_libcproc.h"      // VG_(getenv)
     47 
     48 #include "priv_ume.h"
     49 
     50 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
     51 #define _GNU_SOURCE
     52 #define _FILE_OFFSET_BITS 64
     53 /* This is for ELF types etc, and also the AT_ constants. */
     54 #include <elf.h>
     55 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
     56 
     57 
     58 #if     VG_WORDSIZE == 8
     59 #define ESZ(x)  Elf64_##x
     60 #elif   VG_WORDSIZE == 4
     61 #define ESZ(x)  Elf32_##x
     62 #else
     63 #error VG_WORDSIZE needs to ==4 or ==8
     64 #endif
     65 
     66 struct elfinfo
     67 {
     68    ESZ(Ehdr)    e;
     69    ESZ(Phdr)    *p;
     70    Int          fd;
     71 };
     72 
     73 static void check_mmap(SysRes res, Addr base, SizeT len)
     74 {
     75    if (sr_isError(res)) {
     76       VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
     77                   "with error %lu (%s).\n",
     78                   (ULong)base, (Long)len,
     79                   sr_Err(res), VG_(strerror)(sr_Err(res)) );
     80       if (sr_Err(res) == VKI_EINVAL) {
     81          VG_(printf)("valgrind: this can be caused by executables with "
     82                      "very large text, data or bss segments.\n");
     83       }
     84       VG_(exit)(1);
     85    }
     86 }
     87 
     88 /*------------------------------------------------------------*/
     89 /*--- Loading ELF files                                    ---*/
     90 /*------------------------------------------------------------*/
     91 
     92 static
     93 struct elfinfo *readelf(Int fd, const char *filename)
     94 {
     95    SysRes sres;
     96    struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
     97    Int phsz;
     98 
     99    vg_assert(e);
    100    e->fd = fd;
    101 
    102    sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
    103    if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
    104       VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
    105                   filename, VG_(strerror)(sr_Err(sres)));
    106       goto bad;
    107    }
    108 
    109    if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
    110       VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
    111       goto bad;
    112    }
    113    if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
    114       VG_(printf)("valgrind: wrong ELF executable class "
    115                   "(eg. 32-bit instead of 64-bit)\n");
    116       goto bad;
    117    }
    118    if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
    119       VG_(printf)("valgrind: executable has wrong endian-ness\n");
    120       goto bad;
    121    }
    122    if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
    123       VG_(printf)("valgrind: this is not an executable\n");
    124       goto bad;
    125    }
    126 
    127    if (e->e.e_machine != VG_ELF_MACHINE) {
    128       VG_(printf)("valgrind: executable is not for "
    129                   "this architecture\n");
    130       goto bad;
    131    }
    132 
    133    if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
    134       VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
    135       goto bad;
    136    }
    137 
    138    phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
    139    e->p = VG_(malloc)("ume.re.2", phsz);
    140    vg_assert(e->p);
    141 
    142    sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
    143    if (sr_isError(sres) || sr_Res(sres) != phsz) {
    144       VG_(printf)("valgrind: can't read phdr: %s\n",
    145                   VG_(strerror)(sr_Err(sres)));
    146       VG_(free)(e->p);
    147       goto bad;
    148    }
    149 
    150    return e;
    151 
    152   bad:
    153    VG_(free)(e);
    154    return NULL;
    155 }
    156 
    157 /* Map an ELF file.  Returns the brk address. */
    158 static
    159 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
    160 {
    161    Int    i;
    162    SysRes res;
    163    ESZ(Addr) elfbrk = 0;
    164 
    165    for (i = 0; i < e->e.e_phnum; i++) {
    166       ESZ(Phdr) *ph = &e->p[i];
    167       ESZ(Addr) addr, brkaddr;
    168       ESZ(Word) memsz;
    169 
    170       if (ph->p_type != PT_LOAD)
    171          continue;
    172 
    173       addr    = ph->p_vaddr+base;
    174       memsz   = ph->p_memsz;
    175       brkaddr = addr+memsz;
    176 
    177       if (brkaddr > elfbrk)
    178          elfbrk = brkaddr;
    179    }
    180 
    181    for (i = 0; i < e->e.e_phnum; i++) {
    182       ESZ(Phdr) *ph = &e->p[i];
    183       ESZ(Addr) addr, bss, brkaddr;
    184       ESZ(Off) off;
    185       ESZ(Word) filesz;
    186       ESZ(Word) memsz;
    187       unsigned prot = 0;
    188 
    189       if (ph->p_type != PT_LOAD)
    190          continue;
    191 
    192       if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
    193       if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
    194       if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
    195 
    196       addr    = ph->p_vaddr+base;
    197       off     = ph->p_offset;
    198       filesz  = ph->p_filesz;
    199       bss     = addr+filesz;
    200       memsz   = ph->p_memsz;
    201       brkaddr = addr+memsz;
    202 
    203       // Tom says: In the following, do what the Linux kernel does and only
    204       // map the pages that are required instead of rounding everything to
    205       // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
    206       // use ph->p_align -- part of stage2's memory gets trashed somehow.)
    207       //
    208       // The condition handles the case of a zero-length segment.
    209       if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
    210          if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
    211          res = VG_(am_mmap_file_fixed_client)(
    212                   VG_PGROUNDDN(addr),
    213                   VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
    214                   prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
    215                   e->fd, VG_PGROUNDDN(off)
    216                );
    217          if (0) VG_(am_show_nsegments)(0,"after #1");
    218          check_mmap(res, VG_PGROUNDDN(addr),
    219                          VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
    220       }
    221 
    222       // if memsz > filesz, fill the remainder with zeroed pages
    223       if (memsz > filesz) {
    224          UInt bytes;
    225 
    226          bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
    227          if (bytes > 0) {
    228             if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
    229             res = VG_(am_mmap_anon_fixed_client)(
    230                      VG_PGROUNDUP(bss), bytes,
    231                      prot
    232                   );
    233             if (0) VG_(am_show_nsegments)(0,"after #2");
    234             check_mmap(res, VG_PGROUNDUP(bss), bytes);
    235          }
    236 
    237          bytes = bss & (VKI_PAGE_SIZE - 1);
    238 
    239          // The 'prot' condition allows for a read-only bss
    240          if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
    241             bytes = VKI_PAGE_SIZE - bytes;
    242             VG_(memset)((char *)bss, 0, bytes);
    243          }
    244       }
    245    }
    246 
    247    return elfbrk;
    248 }
    249 
    250 Bool VG_(match_ELF)(Char *hdr, Int len)
    251 {
    252    ESZ(Ehdr) *e = (ESZ(Ehdr) *)hdr;
    253    return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
    254 }
    255 
    256 
    257 /* load_ELF pulls an ELF executable into the address space, prepares
    258    it for execution, and writes info about it into INFO.  In
    259    particular it fills in .init_eip, which is the starting point.
    260 
    261    Returns zero on success, non-zero (a VKI_E.. value) on failure.
    262 
    263    The sequence of activities is roughly as follows:
    264 
    265    - use readelf() to extract program header info from the exe file.
    266 
    267    - scan the program header, collecting info (not sure what all those
    268      info-> fields are, or whether they are used, but still) and in
    269      particular looking out fo the PT_INTERP header, which describes
    270      the interpreter.  If such a field is found, the space needed to
    271      hold the interpreter is computed into interp_size.
    272 
    273    - map the executable in, by calling mapelf().  This maps in all
    274      loadable sections, and I _think_ also creates any .bss areas
    275      required.  mapelf() returns the address just beyond the end of
    276      the furthest-along mapping it creates.  The executable is mapped
    277      starting at EBASE, which is usually read from it (eg, 0x8048000
    278      etc) except if it's a PIE, in which case I'm not sure what
    279      happens.
    280 
    281      The returned address is recorded in info->brkbase as the start
    282      point of the brk (data) segment, as it is traditional to place
    283      the data segment just after the executable.  Neither load_ELF nor
    284      mapelf creates the brk segment, though: that is for the caller of
    285      load_ELF to attend to.
    286 
    287    - If the initial phdr scan didn't find any mention of an
    288      interpreter (interp == NULL), this must be a statically linked
    289      executable, and we're pretty much done.
    290 
    291    - Otherwise, we need to use mapelf() a second time to load the
    292      interpreter.  The interpreter can go anywhere, but mapelf() wants
    293      to be told a specific address to put it at.  So an advisory query
    294      is passed to aspacem, asking where it would put an anonymous
    295      client mapping of size INTERP_SIZE.  That address is then used
    296      as the mapping address for the interpreter.
    297 
    298    - The entry point in INFO is set to the interpreter's entry point,
    299      and we're done.  */
    300 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
    301 {
    302    SysRes sres;
    303    struct elfinfo *e;
    304    struct elfinfo *interp = NULL;
    305    ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
    306    ESZ(Addr) maxaddr = 0;       /* highest mapped address */
    307    ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
    308    ESZ(Word) interp_size = 0;   /* interpreter size */
    309    /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
    310    Int i;
    311    void *entry;
    312    ESZ(Addr) ebase = 0;
    313 
    314    /* The difference between where the interpreter got mapped and
    315       where it asked to be mapped.  Needed for computing the ppc64 ELF
    316       entry point and initial tocptr (R2) value. */
    317    ESZ(Word) interp_offset = 0;
    318 
    319 #ifdef HAVE_PIE
    320    ebase = info->exe_base;
    321 #endif
    322 
    323    e = readelf(fd, name);
    324 
    325    if (e == NULL)
    326       return VKI_ENOEXEC;
    327 
    328    /* The kernel maps position-independent executables at TASK_SIZE*2/3;
    329       duplicate this behavior as close as we can. */
    330    if (e->e.e_type == ET_DYN && ebase == 0) {
    331       ebase = VG_PGROUNDDN(info->exe_base
    332                            + (info->exe_end - info->exe_base) * 2 / 3);
    333       /* We really don't want to load PIEs at zero or too close.  It
    334          works, but it's unrobust (NULL pointer reads and writes
    335          become legit, which is really bad) and causes problems for
    336          exp-ptrcheck, which assumes all numbers below 1MB are
    337          nonpointers.  So, hackily, move it above 1MB. */
    338       /* Later .. is appears ppc32-linux tries to put [vdso] at 1MB,
    339          which totally screws things up, because nothing else can go
    340          there.  So bump the hacky load addess along by 0x8000, to
    341          0x108000. */
    342       if (ebase < 0x108000)
    343          ebase = 0x108000;
    344    }
    345 
    346    info->phnum = e->e.e_phnum;
    347    info->entry = e->e.e_entry + ebase;
    348    info->phdr = 0;
    349 
    350    for (i = 0; i < e->e.e_phnum; i++) {
    351       ESZ(Phdr) *ph = &e->p[i];
    352 
    353       switch(ph->p_type) {
    354       case PT_PHDR:
    355          info->phdr = ph->p_vaddr + ebase;
    356          break;
    357 
    358       case PT_LOAD:
    359          if (ph->p_vaddr < minaddr)
    360             minaddr = ph->p_vaddr;
    361          if (ph->p_vaddr+ph->p_memsz > maxaddr)
    362             maxaddr = ph->p_vaddr+ph->p_memsz;
    363          break;
    364 
    365       case PT_INTERP: {
    366          HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
    367          Int j;
    368          Int intfd;
    369          Int baseaddr_set;
    370 
    371          vg_assert(buf);
    372          VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
    373          buf[ph->p_filesz] = '\0';
    374 
    375          sres = VG_(open)(buf, VKI_O_RDONLY, 0);
    376          if (sr_isError(sres)) {
    377             VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
    378             VG_(exit)(1);
    379          }
    380          intfd = sr_Res(sres);
    381 
    382          interp = readelf(intfd, buf);
    383          if (interp == NULL) {
    384             VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
    385             return 1;
    386          }
    387          VG_(free)(buf);
    388 
    389          baseaddr_set = 0;
    390          for (j = 0; j < interp->e.e_phnum; j++) {
    391             ESZ(Phdr) *iph = &interp->p[j];
    392             ESZ(Addr) end;
    393 
    394             if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
    395                continue;
    396 
    397 #ifdef ANDROID
    398             // On older versions of Android, the first LOAD segment of
    399             // /system/bin/linker has vaddr=0, memsz=0, but subsequent
    400             // segments start at 0xb0001000.
    401             //
    402             // On newer versions of Android, the linker is ET_DYN and
    403             // we don't have to worry about iph->p_vaddr
    404             if (!baseaddr_set
    405                 && (iph->p_vaddr || (interp->e.e_type == ET_DYN))) {
    406 #else
    407             if (!baseaddr_set) {
    408 #endif
    409                interp_addr  = iph->p_vaddr;
    410                /* interp_align = iph->p_align; */ /* UNUSED */
    411                baseaddr_set = 1;
    412             }
    413 
    414             /* assumes that all segments in the interp are close */
    415             end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
    416 
    417             if (end > interp_size)
    418                interp_size = end;
    419          }
    420          break;
    421 
    422       default:
    423          // do nothing
    424          break;
    425       }
    426       }
    427    }
    428 
    429    if (info->phdr == 0)
    430       info->phdr = minaddr + ebase + e->e.e_phoff;
    431 
    432    if (info->exe_base != info->exe_end) {
    433       if (minaddr >= maxaddr ||
    434           (minaddr + ebase < info->exe_base ||
    435            maxaddr + ebase > info->exe_end)) {
    436          VG_(printf)("Executable range %p-%p is outside the\n"
    437                      "acceptable range %p-%p\n",
    438                      (char *)minaddr + ebase, (char *)maxaddr + ebase,
    439                      (char *)info->exe_base,  (char *)info->exe_end);
    440          return VKI_ENOMEM;
    441       }
    442    }
    443 
    444    info->brkbase = mapelf(e, ebase);    /* map the executable */
    445 
    446    if (info->brkbase == 0)
    447       return VKI_ENOMEM;
    448 
    449    if (interp != NULL) {
    450       /* reserve a chunk of address space for interpreter */
    451       MapRequest mreq;
    452       Addr       advised;
    453       Bool       ok;
    454 
    455       /* Don't actually reserve the space.  Just get an advisory
    456          indicating where it would be allocated, and pass that to
    457          mapelf(), which in turn asks aspacem to do some fixed maps at
    458          the specified address.  This is a bit of hack, but it should
    459          work because there should be no intervening transactions with
    460          aspacem which could cause those fixed maps to fail.
    461 
    462          Placement policy is:
    463 
    464          if the interpreter asks to be loaded at zero
    465             ignore that and put it wherever we like (mappings at zero
    466             are bad news)
    467          else
    468             try and put it where it asks for, but if that doesn't work,
    469             just put it anywhere.
    470       */
    471       if (interp_addr == 0) {
    472          mreq.rkind = MAny;
    473          mreq.start = 0;
    474          mreq.len   = interp_size;
    475       } else {
    476          mreq.rkind = MHint;
    477          mreq.start = interp_addr;
    478          mreq.len   = interp_size;
    479       }
    480 
    481       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
    482 
    483       if (!ok) {
    484          /* bomb out */
    485          SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
    486          if (0) VG_(printf)("reserve for interp: failed\n");
    487          check_mmap(res, (Addr)interp_addr, interp_size);
    488          /*NOTREACHED*/
    489       }
    490 
    491       (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
    492 
    493       VG_(close)(interp->fd);
    494 
    495       entry = (void *)(advised - interp_addr + interp->e.e_entry);
    496       info->interp_base = (ESZ(Addr))advised;
    497       interp_offset = advised - interp_addr;
    498 
    499       VG_(free)(interp->p);
    500       VG_(free)(interp);
    501    } else {
    502       Char *exit_if_static = VG_(getenv)("VALGRIND_EXIT_IF_STATIC");
    503       if (exit_if_static && VG_(strcmp)(exit_if_static, "0") != 0) {
    504         VG_(printf)("******* You are running Valgrind on a static binary: %s\n",
    505                     name);
    506         VG_(printf)("******* This is not supported, exiting\n");
    507         VG_(exit)(1);
    508       }
    509       entry = (void *)(ebase + e->e.e_entry);
    510    }
    511 
    512    info->exe_base = minaddr + ebase;
    513    info->exe_end  = maxaddr + ebase;
    514 
    515 #if defined(VGP_ppc64_linux)
    516    /* On PPC64, a func ptr is represented by a TOC entry ptr.  This
    517       TOC entry contains three words; the first word is the function
    518       address, the second word is the TOC ptr (r2), and the third word
    519       is the static chain value. */
    520    info->init_ip  = ((ULong*)entry)[0];
    521    info->init_toc = ((ULong*)entry)[1];
    522    info->init_ip  += interp_offset;
    523    info->init_toc += interp_offset;
    524 #else
    525    info->init_ip  = (Addr)entry;
    526    info->init_toc = 0; /* meaningless on this platform */
    527    (void) interp_offset; /* stop gcc complaining it is unused */
    528 #endif
    529    VG_(free)(e->p);
    530    VG_(free)(e);
    531 
    532    return 0;
    533 }
    534 
    535 #endif // defined(VGO_linux)
    536 
    537 /*--------------------------------------------------------------------*/
    538 /*--- end                                                          ---*/
    539 /*--------------------------------------------------------------------*/
    540