Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Wrappers for generic Unix system calls                       ---*/
      4 /*---                                            syswrap-generic.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2000-2013 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #if defined(VGO_linux) || defined(VGO_darwin)
     33 
     34 #include "pub_core_basics.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_vkiscnums.h"
     37 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
     38 #include "pub_core_threadstate.h"
     39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
     40 #include "pub_core_aspacemgr.h"
     41 #include "pub_core_transtab.h"      // VG_(discard_translations)
     42 #include "pub_core_xarray.h"
     43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
     44 #include "pub_core_debuglog.h"
     45 #include "pub_core_errormgr.h"
     46 #include "pub_core_gdbserver.h"     // VG_(gdbserver)
     47 #include "pub_core_libcbase.h"
     48 #include "pub_core_libcassert.h"
     49 #include "pub_core_libcfile.h"
     50 #include "pub_core_libcprint.h"
     51 #include "pub_core_libcproc.h"
     52 #include "pub_core_libcsignal.h"
     53 #include "pub_core_machine.h"       // VG_(get_SP)
     54 #include "pub_core_mallocfree.h"
     55 #include "pub_core_options.h"
     56 #include "pub_core_scheduler.h"
     57 #include "pub_core_signals.h"
     58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     59 #include "pub_core_syscall.h"
     60 #include "pub_core_syswrap.h"
     61 #include "pub_core_tooliface.h"
     62 #include "pub_core_ume.h"
     63 
     64 #include "priv_types_n_macros.h"
     65 #include "priv_syswrap-generic.h"
     66 
     67 #include "config.h"
     68 
     69 
     70 /* Returns True iff address range is something the client can
     71    plausibly mess with: all of it is either already belongs to the
     72    client or is free or a reservation. */
     73 
     74 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
     75                                    const HChar *syscallname)
     76 {
     77    Bool ret;
     78 
     79    if (size == 0)
     80       return True;
     81 
     82    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
     83             (start,size,VKI_PROT_NONE);
     84 
     85    if (0)
     86       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
     87 		  syscallname, start, start+size-1, (Int)ret);
     88 
     89    if (!ret && syscallname != NULL) {
     90       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
     91                                "to modify addresses %#lx-%#lx\n",
     92                                syscallname, start, start+size-1);
     93       if (VG_(clo_verbosity) > 1) {
     94          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
     95       }
     96    }
     97 
     98    return ret;
     99 }
    100 
    101 
    102 Bool ML_(client_signal_OK)(Int sigNo)
    103 {
    104    /* signal 0 is OK for kill */
    105    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
    106 
    107    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
    108 
    109    return ret;
    110 }
    111 
    112 
    113 /* Handy small function to help stop wrappers from segfaulting when
    114    presented with bogus client addresses.  Is not used for generating
    115    user-visible errors. */
    116 
    117 Bool ML_(safe_to_deref) ( void* start, SizeT size )
    118 {
    119    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
    120 }
    121 
    122 
    123 /* ---------------------------------------------------------------------
    124    Doing mmap, mremap
    125    ------------------------------------------------------------------ */
    126 
    127 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
    128    munmap, mprotect (and mremap??) work at the page level.  So addresses
    129    and lengths must be adjusted for this. */
    130 
    131 /* Mash around start and length so that the area exactly covers
    132    an integral number of pages.  If we don't do that, memcheck's
    133    idea of addressible memory diverges from that of the
    134    kernel's, which causes the leak detector to crash. */
    135 static
    136 void page_align_addr_and_len( Addr* a, SizeT* len)
    137 {
    138    Addr ra;
    139 
    140    ra = VG_PGROUNDDN(*a);
    141    *len = VG_PGROUNDUP(*a + *len) - ra;
    142    *a = ra;
    143 }
    144 
    145 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
    146                                 UInt flags, Int fd, Off64T offset)
    147 {
    148    Bool d;
    149 
    150    /* 'a' is the return value from a real kernel mmap, hence: */
    151    vg_assert(VG_IS_PAGE_ALIGNED(a));
    152    /* whereas len is whatever the syscall supplied.  So: */
    153    len = VG_PGROUNDUP(len);
    154 
    155    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
    156 
    157    if (d)
    158       VG_(discard_translations)( (Addr64)a, (ULong)len,
    159                                  "notify_core_of_mmap" );
    160 }
    161 
    162 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
    163 {
    164    Bool rr, ww, xx;
    165 
    166    /* 'a' is the return value from a real kernel mmap, hence: */
    167    vg_assert(VG_IS_PAGE_ALIGNED(a));
    168    /* whereas len is whatever the syscall supplied.  So: */
    169    len = VG_PGROUNDUP(len);
    170 
    171    rr = toBool(prot & VKI_PROT_READ);
    172    ww = toBool(prot & VKI_PROT_WRITE);
    173    xx = toBool(prot & VKI_PROT_EXEC);
    174 
    175    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
    176 }
    177 
    178 
    179 /* When a client mmap has been successfully done, this function must
    180    be called.  It notifies both aspacem and the tool of the new
    181    mapping.
    182 
    183    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
    184    it is called from is POST(sys_io_setup).  In particular,
    185    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
    186    client mmap.  But it doesn't call this function; instead it does the
    187    relevant notifications itself.  Here, we just pass di_handle=0 to
    188    notify_tool_of_mmap as we have no better information.  But really this
    189    function should be done away with; problem is I don't understand what
    190    POST(sys_io_setup) does or how it works.
    191 
    192    [However, this function is used lots for Darwin, because
    193     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
    194  */
    195 void
    196 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
    197                                     UInt flags, Int fd, Off64T offset )
    198 {
    199    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
    200    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
    201    // Should it?  --njn
    202    notify_core_of_mmap(a, len, prot, flags, fd, offset);
    203    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
    204 }
    205 
    206 void
    207 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
    208 {
    209    Bool d;
    210 
    211    page_align_addr_and_len(&a, &len);
    212    d = VG_(am_notify_munmap)(a, len);
    213    VG_TRACK( die_mem_munmap, a, len );
    214    VG_(di_notify_munmap)( a, len );
    215    if (d)
    216       VG_(discard_translations)( (Addr64)a, (ULong)len,
    217                                  "ML_(notify_core_and_tool_of_munmap)" );
    218 }
    219 
    220 void
    221 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
    222 {
    223    Bool rr = toBool(prot & VKI_PROT_READ);
    224    Bool ww = toBool(prot & VKI_PROT_WRITE);
    225    Bool xx = toBool(prot & VKI_PROT_EXEC);
    226    Bool d;
    227 
    228    page_align_addr_and_len(&a, &len);
    229    d = VG_(am_notify_mprotect)(a, len, prot);
    230    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
    231    VG_(di_notify_mprotect)( a, len, prot );
    232    if (d)
    233       VG_(discard_translations)( (Addr64)a, (ULong)len,
    234                                  "ML_(notify_core_and_tool_of_mprotect)" );
    235 }
    236 
    237 
    238 
    239 #if HAVE_MREMAP
    240 /* Expand (or shrink) an existing mapping, potentially moving it at
    241    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
    242 */
    243 static
    244 SysRes do_mremap( Addr old_addr, SizeT old_len,
    245                   Addr new_addr, SizeT new_len,
    246                   UWord flags, ThreadId tid )
    247 {
    248 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
    249 
    250    Bool      ok, d;
    251    NSegment const* old_seg;
    252    Addr      advised;
    253    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
    254    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
    255 
    256    if (0)
    257       VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
    258                   old_addr,old_len,new_addr,new_len,
    259                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
    260                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
    261    if (0)
    262       VG_(am_show_nsegments)(0, "do_remap: before");
    263 
    264    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
    265       goto eINVAL;
    266 
    267    if (!VG_IS_PAGE_ALIGNED(old_addr))
    268       goto eINVAL;
    269 
    270    old_len = VG_PGROUNDUP(old_len);
    271    new_len = VG_PGROUNDUP(new_len);
    272 
    273    if (new_len == 0)
    274       goto eINVAL;
    275 
    276    /* kernel doesn't reject this, but we do. */
    277    if (old_len == 0)
    278       goto eINVAL;
    279 
    280    /* reject wraparounds */
    281    if (old_addr + old_len < old_addr)
    282       goto eINVAL;
    283    if (f_fixed == True && new_addr + new_len < new_len)
    284       goto eINVAL;
    285 
    286    /* kernel rejects all fixed, no-move requests (which are
    287       meaningless). */
    288    if (f_fixed == True && f_maymove == False)
    289       goto eINVAL;
    290 
    291    /* Stay away from non-client areas. */
    292    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
    293       goto eINVAL;
    294 
    295    /* In all remaining cases, if the old range does not fall within a
    296       single segment, fail. */
    297    old_seg = VG_(am_find_nsegment)( old_addr );
    298    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
    299       goto eINVAL;
    300    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
    301       goto eINVAL;
    302 
    303    vg_assert(old_len > 0);
    304    vg_assert(new_len > 0);
    305    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
    306    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
    307    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
    308 
    309    /* There are 3 remaining cases:
    310 
    311       * maymove == False
    312 
    313         new space has to be at old address, so:
    314             - shrink    -> unmap end
    315             - same size -> do nothing
    316             - grow      -> if can grow in-place, do so, else fail
    317 
    318       * maymove == True, fixed == False
    319 
    320         new space can be anywhere, so:
    321             - shrink    -> unmap end
    322             - same size -> do nothing
    323             - grow      -> if can grow in-place, do so, else
    324                            move to anywhere large enough, else fail
    325 
    326       * maymove == True, fixed == True
    327 
    328         new space must be at new address, so:
    329 
    330             - if new address is not page aligned, fail
    331             - if new address range overlaps old one, fail
    332             - if new address range cannot be allocated, fail
    333             - else move to new address range with new size
    334             - else fail
    335    */
    336 
    337    if (f_maymove == False) {
    338       /* new space has to be at old address */
    339       if (new_len < old_len)
    340          goto shrink_in_place;
    341       if (new_len > old_len)
    342          goto grow_in_place_or_fail;
    343       goto same_in_place;
    344    }
    345 
    346    if (f_maymove == True && f_fixed == False) {
    347       /* new space can be anywhere */
    348       if (new_len < old_len)
    349          goto shrink_in_place;
    350       if (new_len > old_len)
    351          goto grow_in_place_or_move_anywhere_or_fail;
    352       goto same_in_place;
    353    }
    354 
    355    if (f_maymove == True && f_fixed == True) {
    356       /* new space can only be at the new address */
    357       if (!VG_IS_PAGE_ALIGNED(new_addr))
    358          goto eINVAL;
    359       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
    360          /* no overlap */
    361       } else {
    362          goto eINVAL;
    363       }
    364       if (new_addr == 0)
    365          goto eINVAL;
    366          /* VG_(am_get_advisory_client_simple) interprets zero to mean
    367             non-fixed, which is not what we want */
    368       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
    369       if (!ok || advised != new_addr)
    370          goto eNOMEM;
    371       ok = VG_(am_relocate_nooverlap_client)
    372               ( &d, old_addr, old_len, new_addr, new_len );
    373       if (ok) {
    374          VG_TRACK( copy_mem_remap, old_addr, new_addr,
    375                                    MIN_SIZET(old_len,new_len) );
    376          if (new_len > old_len)
    377             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
    378                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
    379                       0/*di_handle*/ );
    380          VG_TRACK(die_mem_munmap, old_addr, old_len);
    381          if (d) {
    382             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
    383             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
    384          }
    385          return VG_(mk_SysRes_Success)( new_addr );
    386       }
    387       goto eNOMEM;
    388    }
    389 
    390    /* end of the 3 cases */
    391    /*NOTREACHED*/ vg_assert(0);
    392 
    393   grow_in_place_or_move_anywhere_or_fail:
    394    {
    395    /* try growing it in-place */
    396    Addr   needA = old_addr + old_len;
    397    SSizeT needL = new_len - old_len;
    398 
    399    vg_assert(needL > 0);
    400    if (needA == 0)
    401       goto eINVAL;
    402       /* VG_(am_get_advisory_client_simple) interprets zero to mean
    403          non-fixed, which is not what we want */
    404    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    405    if (ok) {
    406       /* Fixes bug #129866. */
    407       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
    408    }
    409    if (ok && advised == needA) {
    410       ok = VG_(am_extend_map_client)( &d, old_seg, needL );
    411       if (ok) {
    412          VG_TRACK( new_mem_mmap, needA, needL,
    413                                  old_seg->hasR,
    414                                  old_seg->hasW, old_seg->hasX,
    415                                  0/*di_handle*/ );
    416          if (d)
    417             VG_(discard_translations)( needA, needL, "do_remap(3)" );
    418          return VG_(mk_SysRes_Success)( old_addr );
    419       }
    420    }
    421 
    422    /* that failed.  Look elsewhere. */
    423    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
    424    if (ok) {
    425       Bool oldR = old_seg->hasR;
    426       Bool oldW = old_seg->hasW;
    427       Bool oldX = old_seg->hasX;
    428       /* assert new area does not overlap old */
    429       vg_assert(advised+new_len-1 < old_addr
    430                 || advised > old_addr+old_len-1);
    431       ok = VG_(am_relocate_nooverlap_client)
    432               ( &d, old_addr, old_len, advised, new_len );
    433       if (ok) {
    434          VG_TRACK( copy_mem_remap, old_addr, advised,
    435                                    MIN_SIZET(old_len,new_len) );
    436          if (new_len > old_len)
    437             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
    438                       oldR, oldW, oldX, 0/*di_handle*/ );
    439          VG_TRACK(die_mem_munmap, old_addr, old_len);
    440          if (d) {
    441             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
    442             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
    443          }
    444          return VG_(mk_SysRes_Success)( advised );
    445       }
    446    }
    447    goto eNOMEM;
    448    }
    449    /*NOTREACHED*/ vg_assert(0);
    450 
    451   grow_in_place_or_fail:
    452    {
    453    Addr  needA = old_addr + old_len;
    454    SizeT needL = new_len - old_len;
    455    if (needA == 0)
    456       goto eINVAL;
    457       /* VG_(am_get_advisory_client_simple) interprets zero to mean
    458          non-fixed, which is not what we want */
    459    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    460    if (ok) {
    461       /* Fixes bug #129866. */
    462       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
    463    }
    464    if (!ok || advised != needA)
    465       goto eNOMEM;
    466    ok = VG_(am_extend_map_client)( &d, old_seg, needL );
    467    if (!ok)
    468       goto eNOMEM;
    469    VG_TRACK( new_mem_mmap, needA, needL,
    470                            old_seg->hasR, old_seg->hasW, old_seg->hasX,
    471                            0/*di_handle*/ );
    472    if (d)
    473       VG_(discard_translations)( needA, needL, "do_remap(6)" );
    474    return VG_(mk_SysRes_Success)( old_addr );
    475    }
    476    /*NOTREACHED*/ vg_assert(0);
    477 
    478   shrink_in_place:
    479    {
    480    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
    481    if (sr_isError(sres))
    482       return sres;
    483    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
    484    if (d)
    485       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
    486                                  "do_remap(7)" );
    487    return VG_(mk_SysRes_Success)( old_addr );
    488    }
    489    /*NOTREACHED*/ vg_assert(0);
    490 
    491   same_in_place:
    492    return VG_(mk_SysRes_Success)( old_addr );
    493    /*NOTREACHED*/ vg_assert(0);
    494 
    495   eINVAL:
    496    return VG_(mk_SysRes_Error)( VKI_EINVAL );
    497   eNOMEM:
    498    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
    499 
    500 #  undef MIN_SIZET
    501 }
    502 #endif /* HAVE_MREMAP */
    503 
    504 
    505 /* ---------------------------------------------------------------------
    506    File-descriptor tracking
    507    ------------------------------------------------------------------ */
    508 
    509 /* One of these is allocated for each open file descriptor.  */
    510 typedef struct OpenFd
    511 {
    512    Int fd;                        /* The file descriptor */
    513    HChar *pathname;               /* NULL if not a regular file or unknown */
    514    ExeContext *where;             /* NULL if inherited from parent */
    515    struct OpenFd *next, *prev;
    516 } OpenFd;
    517 
    518 /* List of allocated file descriptors. */
    519 static OpenFd *allocated_fds = NULL;
    520 
    521 /* Count of open file descriptors. */
    522 static Int fd_count = 0;
    523 
    524 
    525 /* Note the fact that a file descriptor was just closed. */
    526 static
    527 void record_fd_close(Int fd)
    528 {
    529    OpenFd *i = allocated_fds;
    530 
    531    if (fd >= VG_(fd_hard_limit))
    532       return;			/* Valgrind internal */
    533 
    534    while(i) {
    535       if(i->fd == fd) {
    536          if(i->prev)
    537             i->prev->next = i->next;
    538          else
    539             allocated_fds = i->next;
    540          if(i->next)
    541             i->next->prev = i->prev;
    542          if(i->pathname)
    543             VG_(arena_free) (VG_AR_CORE, i->pathname);
    544          VG_(arena_free) (VG_AR_CORE, i);
    545          fd_count--;
    546          break;
    547       }
    548       i = i->next;
    549    }
    550 }
    551 
    552 /* Note the fact that a file descriptor was just opened.  If the
    553    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
    554    this either indicates a non-standard file (i.e. a pipe or socket or
    555    some such thing) or that we don't know the filename.  If the fd is
    556    already open, then we're probably doing a dup2() to an existing fd,
    557    so just overwrite the existing one. */
    558 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
    559 {
    560    OpenFd *i;
    561 
    562    if (fd >= VG_(fd_hard_limit))
    563       return;			/* Valgrind internal */
    564 
    565    /* Check to see if this fd is already open. */
    566    i = allocated_fds;
    567    while (i) {
    568       if (i->fd == fd) {
    569          if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
    570          break;
    571       }
    572       i = i->next;
    573    }
    574 
    575    /* Not already one: allocate an OpenFd */
    576    if (i == NULL) {
    577       i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
    578 
    579       i->prev = NULL;
    580       i->next = allocated_fds;
    581       if(allocated_fds) allocated_fds->prev = i;
    582       allocated_fds = i;
    583       fd_count++;
    584    }
    585 
    586    i->fd = fd;
    587    i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
    588    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
    589 }
    590 
    591 // Record opening of an fd, and find its name.
    592 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
    593 {
    594    static HChar buf[VKI_PATH_MAX];
    595    HChar* name;
    596    if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
    597       name = buf;
    598    else
    599       name = NULL;
    600 
    601    ML_(record_fd_open_with_given_name)(tid, fd, name);
    602 }
    603 
    604 // Record opening of a nameless fd.
    605 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
    606 {
    607    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
    608 }
    609 
    610 static
    611 HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
    612 {
    613    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
    614       VG_(sprintf)(name, "<unknown>");
    615    } else {
    616       VG_(sprintf)(name, "%s", sa->sun_path);
    617    }
    618 
    619    return name;
    620 }
    621 
    622 static
    623 HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
    624 {
    625    if (sa == NULL || len == 0) {
    626       VG_(sprintf)(name, "<unknown>");
    627    } else if (sa->sin_port == 0) {
    628       VG_(sprintf)(name, "<unbound>");
    629    } else {
    630       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
    631       VG_(sprintf)(name, "%u.%u.%u.%u:%u",
    632                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
    633                    (addr>>8) & 0xFF, addr & 0xFF,
    634                    VG_(ntohs)(sa->sin_port));
    635    }
    636 
    637    return name;
    638 }
    639 
    640 static
    641 void inet6_format(HChar *s, const UChar ip[16])
    642 {
    643    static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
    644 
    645    if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
    646       struct vki_in_addr *sin_addr = (struct vki_in_addr *)(ip + 12);
    647       UInt addr = VG_(ntohl)(sin_addr->s_addr);
    648 
    649       VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
    650                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
    651                    (addr>>8) & 0xFF, addr & 0xFF);
    652    } else {
    653       Bool compressing = False;
    654       Bool compressed = False;
    655       Int len = 0;
    656       Int i;
    657 
    658       for (i = 0; i < 16; i += 2) {
    659          UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
    660          if (word == 0 && !compressed) {
    661             compressing = True;
    662          } else {
    663             if (compressing) {
    664                compressing = False;
    665                compressed = True;
    666                s[len++] = ':';
    667             }
    668             if (i > 0) {
    669                s[len++] = ':';
    670             }
    671             len += VG_(sprintf)(s + len, "%x", word);
    672          }
    673       }
    674 
    675       if (compressing) {
    676          s[len++] = ':';
    677          s[len++] = ':';
    678       }
    679 
    680       s[len++] = 0;
    681    }
    682 
    683    return;
    684 }
    685 
    686 static
    687 HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
    688 {
    689    if (sa == NULL || len == 0) {
    690       VG_(sprintf)(name, "<unknown>");
    691    } else if (sa->sin6_port == 0) {
    692       VG_(sprintf)(name, "<unbound>");
    693    } else {
    694       char addr[128];
    695       inet6_format(addr, (void *)&(sa->sin6_addr));
    696       VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
    697    }
    698 
    699    return name;
    700 }
    701 
    702 /*
    703  * Try get some details about a socket.
    704  */
    705 static void
    706 getsockdetails(Int fd)
    707 {
    708    union u {
    709       struct vki_sockaddr a;
    710       struct vki_sockaddr_in in;
    711       struct vki_sockaddr_in6 in6;
    712       struct vki_sockaddr_un un;
    713    } laddr;
    714    Int llen;
    715 
    716    llen = sizeof(laddr);
    717    VG_(memset)(&laddr, 0, llen);
    718 
    719    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
    720       switch(laddr.a.sa_family) {
    721       case VKI_AF_INET: {
    722          static char lname[32];
    723          static char pname[32];
    724          struct vki_sockaddr_in paddr;
    725          Int plen = sizeof(struct vki_sockaddr_in);
    726 
    727          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
    728             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
    729                          inet_to_name(&(laddr.in), llen, lname),
    730                          inet_to_name(&paddr, plen, pname));
    731          } else {
    732             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
    733                          fd, inet_to_name(&(laddr.in), llen, lname));
    734          }
    735          return;
    736          }
    737       case VKI_AF_INET6: {
    738          static char lname[128];
    739          static char pname[128];
    740          struct vki_sockaddr_in6 paddr;
    741          Int plen = sizeof(struct vki_sockaddr_in6);
    742 
    743          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
    744             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> %s\n", fd,
    745                          inet6_to_name(&(laddr.in6), llen, lname),
    746                          inet6_to_name(&paddr, plen, pname));
    747          } else {
    748             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> unbound\n",
    749                          fd, inet6_to_name(&(laddr.in6), llen, lname));
    750          }
    751          return;
    752          }
    753       case VKI_AF_UNIX: {
    754          static char lname[256];
    755          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
    756                       unix_to_name(&(laddr.un), llen, lname));
    757          return;
    758          }
    759       default:
    760          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
    761                       laddr.a.sa_family, fd);
    762          return;
    763       }
    764    }
    765 
    766    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
    767 }
    768 
    769 
    770 /* Dump out a summary, and a more detailed list, of open file descriptors. */
    771 void VG_(show_open_fds) (const HChar* when)
    772 {
    773    OpenFd *i = allocated_fds;
    774 
    775    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
    776 
    777    while (i) {
    778       if (i->pathname) {
    779          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
    780                       i->pathname);
    781       } else {
    782          Int val;
    783          Int len = sizeof(val);
    784 
    785          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
    786              == -1) {
    787             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
    788          } else {
    789             getsockdetails(i->fd);
    790          }
    791       }
    792 
    793       if(i->where) {
    794          VG_(pp_ExeContext)(i->where);
    795          VG_(message)(Vg_UserMsg, "\n");
    796       } else {
    797          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
    798          VG_(message)(Vg_UserMsg, "\n");
    799       }
    800 
    801       i = i->next;
    802    }
    803 
    804    VG_(message)(Vg_UserMsg, "\n");
    805 }
    806 
    807 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
    808    have /proc support compiled in, or a non-Linux kernel), then we need to
    809    find out what file descriptors we inherited from our parent process the
    810    hard way - by checking each fd in turn. */
    811 static
    812 void init_preopened_fds_without_proc_self_fd(void)
    813 {
    814    struct vki_rlimit lim;
    815    UInt count;
    816    Int i;
    817 
    818    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
    819       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
    820          an arbitrarily high number.  1024 happens to be the limit in
    821          the 2.4 Linux kernels. */
    822       count = 1024;
    823    } else {
    824       count = lim.rlim_cur;
    825    }
    826 
    827    for (i = 0; i < count; i++)
    828       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
    829          ML_(record_fd_open_named)(-1, i);
    830 }
    831 
    832 /* Initialize the list of open file descriptors with the file descriptors
    833    we inherited from out parent process. */
    834 
    835 void VG_(init_preopened_fds)(void)
    836 {
    837 // DDD: should probably use HAVE_PROC here or similar, instead.
    838 #if defined(VGO_linux)
    839    Int ret;
    840    struct vki_dirent d;
    841    SysRes f;
    842 
    843    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
    844    if (sr_isError(f)) {
    845       init_preopened_fds_without_proc_self_fd();
    846       return;
    847    }
    848 
    849    while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
    850       if (ret == -1)
    851          goto out;
    852 
    853       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
    854          HChar* s;
    855          Int fno = VG_(strtoll10)(d.d_name, &s);
    856          if (*s == '\0') {
    857             if (fno != sr_Res(f))
    858                if (VG_(clo_track_fds))
    859                   ML_(record_fd_open_named)(-1, fno);
    860          } else {
    861             VG_(message)(Vg_DebugMsg,
    862                "Warning: invalid file name in /proc/self/fd: %s\n",
    863                d.d_name);
    864          }
    865       }
    866 
    867       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
    868    }
    869 
    870   out:
    871    VG_(close)(sr_Res(f));
    872 
    873 #elif defined(VGO_darwin)
    874    init_preopened_fds_without_proc_self_fd();
    875 
    876 #else
    877 #  error Unknown OS
    878 #endif
    879 }
    880 
    881 static
    882 HChar *strdupcat ( const HChar* cc, const HChar *s1, const HChar *s2,
    883                    ArenaId aid )
    884 {
    885    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
    886    HChar *result = VG_(arena_malloc) ( aid, cc, len );
    887    VG_(strcpy) ( result, s1 );
    888    VG_(strcat) ( result, s2 );
    889    return result;
    890 }
    891 
    892 static
    893 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
    894                             const HChar *msg, Addr base, SizeT size )
    895 {
    896    HChar *outmsg = strdupcat ( "di.syswrap.pmrs.1",
    897                                "sendmsg", msg, VG_AR_CORE );
    898    PRE_MEM_READ( outmsg, base, size );
    899    VG_(arena_free) ( VG_AR_CORE, outmsg );
    900 }
    901 
    902 static
    903 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
    904                              const HChar *msg, Addr base, SizeT size )
    905 {
    906    HChar *outmsg = strdupcat ( "di.syswrap.pmwr.1",
    907                                "recvmsg", msg, VG_AR_CORE );
    908    if ( read )
    909       PRE_MEM_READ( outmsg, base, size );
    910    else
    911       PRE_MEM_WRITE( outmsg, base, size );
    912    VG_(arena_free) ( VG_AR_CORE, outmsg );
    913 }
    914 
    915 static
    916 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
    917                               const HChar *fieldName, Addr base, SizeT size )
    918 {
    919    if ( !read )
    920       POST_MEM_WRITE( base, size );
    921 }
    922 
    923 static
    924 void msghdr_foreachfield (
    925         ThreadId tid,
    926         const HChar *name,
    927         struct vki_msghdr *msg,
    928         UInt length,
    929         void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
    930         Bool recv
    931      )
    932 {
    933    HChar *fieldName;
    934 
    935    if ( !msg )
    936       return;
    937 
    938    fieldName = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.mfef", VG_(strlen)(name) + 32 );
    939 
    940    VG_(sprintf) ( fieldName, "(%s)", name );
    941 
    942    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
    943    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
    944    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
    945    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
    946    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
    947    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
    948 
    949    /* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
    950       the field, but does write to it. */
    951    if ( recv )
    952       foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
    953 
    954    if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
    955         && msg->msg_name ) {
    956       VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
    957       foreach_func ( tid, False, fieldName,
    958                      (Addr)msg->msg_name, msg->msg_namelen );
    959    }
    960 
    961    if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
    962         && msg->msg_iov ) {
    963       struct vki_iovec *iov = msg->msg_iov;
    964       UInt i;
    965 
    966       VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
    967 
    968       foreach_func ( tid, True, fieldName,
    969                      (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
    970 
    971       for ( i = 0; i < msg->msg_iovlen; ++i, ++iov ) {
    972          UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
    973          VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
    974          foreach_func ( tid, False, fieldName,
    975                         (Addr)iov->iov_base, iov_len );
    976          length = length - iov_len;
    977       }
    978    }
    979 
    980    if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
    981         && msg->msg_control )
    982    {
    983       VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
    984       foreach_func ( tid, False, fieldName,
    985                      (Addr)msg->msg_control, msg->msg_controllen );
    986    }
    987 
    988    VG_(arena_free) ( VG_AR_CORE, fieldName );
    989 }
    990 
    991 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
    992 {
    993    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
    994 
    995    while (cm) {
    996       if (cm->cmsg_level == VKI_SOL_SOCKET &&
    997           cm->cmsg_type == VKI_SCM_RIGHTS ) {
    998          Int *fds = (Int *) VKI_CMSG_DATA(cm);
    999          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
   1000                          / sizeof(int);
   1001          Int i;
   1002 
   1003          for (i = 0; i < fdc; i++)
   1004             if(VG_(clo_track_fds))
   1005                // XXX: must we check the range on these fds with
   1006                //      ML_(fd_allowed)()?
   1007                ML_(record_fd_open_named)(tid, fds[i]);
   1008       }
   1009 
   1010       cm = VKI_CMSG_NXTHDR(msg, cm);
   1011    }
   1012 }
   1013 
   1014 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
   1015 static
   1016 void pre_mem_read_sockaddr ( ThreadId tid,
   1017                              const HChar *description,
   1018                              struct vki_sockaddr *sa, UInt salen )
   1019 {
   1020    HChar *outmsg;
   1021    struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
   1022    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
   1023    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
   1024 #ifdef VKI_AF_BLUETOOTH
   1025    struct vki_sockaddr_rc*  rc   = (struct vki_sockaddr_rc *)sa;
   1026 #endif
   1027 
   1028    /* NULL/zero-length sockaddrs are legal */
   1029    if ( sa == NULL || salen == 0 ) return;
   1030 
   1031    outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
   1032                                 VG_(strlen)( description ) + 30 );
   1033 
   1034    VG_(sprintf) ( outmsg, description, "sa_family" );
   1035    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
   1036 
   1037    switch (sa->sa_family) {
   1038 
   1039       case VKI_AF_UNIX:
   1040          VG_(sprintf) ( outmsg, description, "sun_path" );
   1041          PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
   1042          // GrP fixme max of sun_len-2? what about nul char?
   1043          break;
   1044 
   1045       case VKI_AF_INET:
   1046          VG_(sprintf) ( outmsg, description, "sin_port" );
   1047          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
   1048          VG_(sprintf) ( outmsg, description, "sin_addr" );
   1049          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
   1050          break;
   1051 
   1052       case VKI_AF_INET6:
   1053          VG_(sprintf) ( outmsg, description, "sin6_port" );
   1054          PRE_MEM_READ( outmsg,
   1055             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
   1056          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
   1057          PRE_MEM_READ( outmsg,
   1058             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
   1059          VG_(sprintf) ( outmsg, description, "sin6_addr" );
   1060          PRE_MEM_READ( outmsg,
   1061             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
   1062          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
   1063          PRE_MEM_READ( outmsg,
   1064             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
   1065          break;
   1066 
   1067 #ifdef VKI_AF_BLUETOOTH
   1068       case VKI_AF_BLUETOOTH:
   1069          VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
   1070          PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
   1071          VG_(sprintf) ( outmsg, description, "rc_channel" );
   1072          PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
   1073          break;
   1074 #endif
   1075 
   1076       default:
   1077          VG_(sprintf) ( outmsg, description, "" );
   1078          PRE_MEM_READ( outmsg, (Addr) sa, salen );
   1079          break;
   1080    }
   1081 
   1082    VG_(arena_free) ( VG_AR_CORE, outmsg );
   1083 }
   1084 
   1085 /* Dereference a pointer to a UInt. */
   1086 static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
   1087 {
   1088    UInt* a_p = (UInt*)a;
   1089    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
   1090    if (a_p == NULL)
   1091       return 0;
   1092    else
   1093       return *a_p;
   1094 }
   1095 
   1096 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
   1097                                   const HChar* buf_s, const HChar* buflen_s )
   1098 {
   1099    if (VG_(tdict).track_pre_mem_write) {
   1100       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
   1101       if (buflen_in > 0) {
   1102          VG_(tdict).track_pre_mem_write(
   1103             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
   1104       }
   1105    }
   1106 }
   1107 
   1108 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
   1109                                    Addr buf_p, Addr buflen_p, const HChar* s )
   1110 {
   1111    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
   1112       UInt buflen_out = deref_UInt( tid, buflen_p, s);
   1113       if (buflen_out > 0 && buf_p != (Addr)NULL) {
   1114          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
   1115       }
   1116    }
   1117 }
   1118 
   1119 /* ---------------------------------------------------------------------
   1120    Data seg end, for brk()
   1121    ------------------------------------------------------------------ */
   1122 
   1123 /*   +--------+------------+
   1124      | anon   |    resvn   |
   1125      +--------+------------+
   1126 
   1127      ^     ^  ^
   1128      |     |  boundary is page aligned
   1129      |     VG_(brk_limit) -- no alignment constraint
   1130      VG_(brk_base) -- page aligned -- does not move
   1131 
   1132      Both the anon part and the reservation part are always at least
   1133      one page.
   1134 */
   1135 
   1136 /* Set the new data segment end to NEWBRK.  If this succeeds, return
   1137    NEWBRK, else return the current data segment end. */
   1138 
   1139 static Addr do_brk ( Addr newbrk )
   1140 {
   1141    NSegment const* aseg;
   1142    NSegment const* rseg;
   1143    Addr newbrkP;
   1144    SizeT delta;
   1145    Bool ok;
   1146    Bool debug = False;
   1147 
   1148    if (debug)
   1149       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
   1150 		  VG_(brk_base), VG_(brk_limit), newbrk);
   1151 
   1152 #  if 0
   1153    if (0) show_segments("in_brk");
   1154 #  endif
   1155 
   1156    if (newbrk < VG_(brk_base))
   1157       /* Clearly impossible. */
   1158       goto bad;
   1159 
   1160    if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
   1161       /* shrinking the data segment.  Be lazy and don't munmap the
   1162          excess area. */
   1163       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
   1164       if (seg && seg->hasT)
   1165          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
   1166                                     "do_brk(shrink)" );
   1167       /* Since we're being lazy and not unmapping pages, we have to
   1168          zero out the area, so that if the area later comes back into
   1169          circulation, it will be filled with zeroes, as if it really
   1170          had been unmapped and later remapped.  Be a bit paranoid and
   1171          try hard to ensure we're not going to segfault by doing the
   1172          write - check both ends of the range are in the same segment
   1173          and that segment is writable. */
   1174       if (seg) {
   1175          /* pre: newbrk < VG_(brk_limit)
   1176               => newbrk <= VG_(brk_limit)-1 */
   1177          NSegment const * seg2;
   1178          vg_assert(newbrk < VG_(brk_limit));
   1179          seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1180          if (seg2 && seg == seg2 && seg->hasW)
   1181             VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
   1182       }
   1183 
   1184       VG_(brk_limit) = newbrk;
   1185       return newbrk;
   1186    }
   1187 
   1188    /* otherwise we're expanding the brk segment. */
   1189    if (VG_(brk_limit) > VG_(brk_base))
   1190       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1191    else
   1192       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
   1193    rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
   1194 
   1195    /* These should be assured by setup_client_dataseg in m_main. */
   1196    vg_assert(aseg);
   1197    vg_assert(rseg);
   1198    vg_assert(aseg->kind == SkAnonC);
   1199    vg_assert(rseg->kind == SkResvn);
   1200    vg_assert(aseg->end+1 == rseg->start);
   1201 
   1202    vg_assert(newbrk >= VG_(brk_base));
   1203    if (newbrk <= rseg->start) {
   1204       /* still fits within the anon segment. */
   1205       VG_(brk_limit) = newbrk;
   1206       return newbrk;
   1207    }
   1208 
   1209    if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
   1210       /* request is too large -- the resvn would fall below 1 page,
   1211          which isn't allowed. */
   1212       goto bad;
   1213    }
   1214 
   1215    newbrkP = VG_PGROUNDUP(newbrk);
   1216    vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
   1217    delta = newbrkP - rseg->start;
   1218    vg_assert(delta > 0);
   1219    vg_assert(VG_IS_PAGE_ALIGNED(delta));
   1220 
   1221    ok = VG_(am_extend_into_adjacent_reservation_client)( aseg, delta );
   1222    if (!ok) goto bad;
   1223 
   1224    VG_(brk_limit) = newbrk;
   1225    return newbrk;
   1226 
   1227   bad:
   1228    return VG_(brk_limit);
   1229 }
   1230 
   1231 
   1232 /* ---------------------------------------------------------------------
   1233    Vet file descriptors for sanity
   1234    ------------------------------------------------------------------ */
   1235 /*
   1236 > - what does the "Bool soft" parameter mean?
   1237 
   1238 (Tom Hughes, 3 Oct 05):
   1239 
   1240 Whether or not to consider a file descriptor invalid if it is above
   1241 the current soft limit.
   1242 
   1243 Basically if we are testing whether a newly created file descriptor is
   1244 valid (in a post handler) then we set soft to true, and if we are
   1245 testing whether a file descriptor that is about to be used (in a pre
   1246 handler) is valid [viz, an already-existing fd] then we set it to false.
   1247 
   1248 The point is that if the (virtual) soft limit is lowered then any
   1249 existing descriptors can still be read/written/closed etc (so long as
   1250 they are below the valgrind reserved descriptors) but no new
   1251 descriptors can be created above the new soft limit.
   1252 
   1253 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
   1254 */
   1255 
   1256 /* Return true if we're allowed to use or create this fd */
   1257 Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
   1258                      Bool isNewFd)
   1259 {
   1260    Bool allowed = True;
   1261 
   1262    /* hard limits always apply */
   1263    if (fd < 0 || fd >= VG_(fd_hard_limit))
   1264       allowed = False;
   1265 
   1266    /* hijacking the output fds is never allowed */
   1267    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
   1268       allowed = False;
   1269 
   1270    /* if creating a new fd (rather than using an existing one), the
   1271       soft limit must also be observed */
   1272    if (isNewFd && fd >= VG_(fd_soft_limit))
   1273       allowed = False;
   1274 
   1275    /* this looks like it ought to be included, but causes problems: */
   1276    /*
   1277    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
   1278       allowed = False;
   1279    */
   1280    /* The difficulty is as follows: consider a program P which expects
   1281       to be able to mess with (redirect) its own stderr (fd 2).
   1282       Usually to deal with P we would issue command line flags to send
   1283       logging somewhere other than stderr, so as not to disrupt P.
   1284       The problem is that -d unilaterally hijacks stderr with no
   1285       consultation with P.  And so, if this check is enabled, P will
   1286       work OK normally but fail if -d is issued.
   1287 
   1288       Basically -d is a hack and you take your chances when using it.
   1289       It's very useful for low level debugging -- particularly at
   1290       startup -- and having its presence change the behaviour of the
   1291       client is exactly what we don't want.  */
   1292 
   1293    /* croak? */
   1294    if ((!allowed) && VG_(showing_core_errors)() ) {
   1295       VG_(message)(Vg_UserMsg,
   1296          "Warning: invalid file descriptor %d in syscall %s()\n",
   1297          fd, syscallname);
   1298       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
   1299 	 VG_(message)(Vg_UserMsg,
   1300             "   Use --log-fd=<number> to select an alternative log fd.\n");
   1301       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
   1302 	 VG_(message)(Vg_UserMsg,
   1303             "   Use --xml-fd=<number> to select an alternative XML "
   1304             "output fd.\n");
   1305       // DDD: consider always printing this stack trace, it's useful.
   1306       // Also consider also making this a proper core error, ie.
   1307       // suppressible and all that.
   1308       if (VG_(clo_verbosity) > 1) {
   1309          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1310       }
   1311    }
   1312 
   1313    return allowed;
   1314 }
   1315 
   1316 
   1317 /* ---------------------------------------------------------------------
   1318    Deal with a bunch of socket-related syscalls
   1319    ------------------------------------------------------------------ */
   1320 
   1321 /* ------ */
   1322 
   1323 void
   1324 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
   1325                                   UWord arg0, UWord arg1,
   1326                                   UWord arg2, UWord arg3 )
   1327 {
   1328    /* int socketpair(int d, int type, int protocol, int sv[2]); */
   1329    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
   1330                   arg3, 2*sizeof(int) );
   1331 }
   1332 
   1333 SysRes
   1334 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
   1335                                    SysRes res,
   1336                                    UWord arg0, UWord arg1,
   1337                                    UWord arg2, UWord arg3 )
   1338 {
   1339    SysRes r = res;
   1340    Int fd1 = ((Int*)arg3)[0];
   1341    Int fd2 = ((Int*)arg3)[1];
   1342    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1343    POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1344    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
   1345        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
   1346       VG_(close)(fd1);
   1347       VG_(close)(fd2);
   1348       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1349    } else {
   1350       POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1351       if (VG_(clo_track_fds)) {
   1352          ML_(record_fd_open_nameless)(tid, fd1);
   1353          ML_(record_fd_open_nameless)(tid, fd2);
   1354       }
   1355    }
   1356    return r;
   1357 }
   1358 
   1359 /* ------ */
   1360 
   1361 SysRes
   1362 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
   1363 {
   1364    SysRes r = res;
   1365    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1366    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
   1367       VG_(close)(sr_Res(res));
   1368       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1369    } else {
   1370       if (VG_(clo_track_fds))
   1371          ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1372    }
   1373    return r;
   1374 }
   1375 
   1376 /* ------ */
   1377 
   1378 void
   1379 ML_(generic_PRE_sys_bind) ( ThreadId tid,
   1380                             UWord arg0, UWord arg1, UWord arg2 )
   1381 {
   1382    /* int bind(int sockfd, struct sockaddr *my_addr,
   1383                int addrlen); */
   1384    pre_mem_read_sockaddr(
   1385       tid, "socketcall.bind(my_addr.%s)",
   1386       (struct vki_sockaddr *) arg1, arg2
   1387    );
   1388 }
   1389 
   1390 /* ------ */
   1391 
   1392 void
   1393 ML_(generic_PRE_sys_accept) ( ThreadId tid,
   1394                               UWord arg0, UWord arg1, UWord arg2 )
   1395 {
   1396    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
   1397    Addr addr_p     = arg1;
   1398    Addr addrlen_p  = arg2;
   1399    if (addr_p != (Addr)NULL)
   1400       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
   1401                                    "socketcall.accept(addr)",
   1402                                    "socketcall.accept(addrlen_in)" );
   1403 }
   1404 
   1405 SysRes
   1406 ML_(generic_POST_sys_accept) ( ThreadId tid,
   1407                                SysRes res,
   1408                                UWord arg0, UWord arg1, UWord arg2 )
   1409 {
   1410    SysRes r = res;
   1411    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1412    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
   1413       VG_(close)(sr_Res(res));
   1414       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1415    } else {
   1416       Addr addr_p     = arg1;
   1417       Addr addrlen_p  = arg2;
   1418       if (addr_p != (Addr)NULL)
   1419          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
   1420                                        "socketcall.accept(addrlen_out)" );
   1421       if (VG_(clo_track_fds))
   1422           ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1423    }
   1424    return r;
   1425 }
   1426 
   1427 /* ------ */
   1428 
   1429 void
   1430 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
   1431                               UWord arg0, UWord arg1, UWord arg2,
   1432                               UWord arg3, UWord arg4, UWord arg5 )
   1433 {
   1434    /* int sendto(int s, const void *msg, int len,
   1435                  unsigned int flags,
   1436                  const struct sockaddr *to, int tolen); */
   1437    PRE_MEM_READ( "socketcall.sendto(msg)",
   1438                  arg1, /* msg */
   1439                  arg2  /* len */ );
   1440    pre_mem_read_sockaddr(
   1441       tid, "socketcall.sendto(to.%s)",
   1442       (struct vki_sockaddr *) arg4, arg5
   1443    );
   1444 }
   1445 
   1446 /* ------ */
   1447 
   1448 void
   1449 ML_(generic_PRE_sys_send) ( ThreadId tid,
   1450                             UWord arg0, UWord arg1, UWord arg2 )
   1451 {
   1452    /* int send(int s, const void *msg, size_t len, int flags); */
   1453    PRE_MEM_READ( "socketcall.send(msg)",
   1454                   arg1, /* msg */
   1455                   arg2  /* len */ );
   1456 
   1457 }
   1458 
   1459 /* ------ */
   1460 
   1461 void
   1462 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
   1463                                 UWord arg0, UWord arg1, UWord arg2,
   1464                                 UWord arg3, UWord arg4, UWord arg5 )
   1465 {
   1466    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
   1467                    struct sockaddr *from, int *fromlen); */
   1468    Addr buf_p      = arg1;
   1469    Int  len        = arg2;
   1470    Addr from_p     = arg4;
   1471    Addr fromlen_p  = arg5;
   1472    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
   1473    if (from_p != (Addr)NULL)
   1474       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
   1475                                    "socketcall.recvfrom(from)",
   1476                                    "socketcall.recvfrom(fromlen_in)" );
   1477 }
   1478 
   1479 void
   1480 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
   1481                                  SysRes res,
   1482                                  UWord arg0, UWord arg1, UWord arg2,
   1483                                  UWord arg3, UWord arg4, UWord arg5 )
   1484 {
   1485    Addr buf_p      = arg1;
   1486    Int  len        = arg2;
   1487    Addr from_p     = arg4;
   1488    Addr fromlen_p  = arg5;
   1489 
   1490    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1491    if (from_p != (Addr)NULL)
   1492       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
   1493                                     "socketcall.recvfrom(fromlen_out)" );
   1494    POST_MEM_WRITE( buf_p, len );
   1495 }
   1496 
   1497 /* ------ */
   1498 
   1499 void
   1500 ML_(generic_PRE_sys_recv) ( ThreadId tid,
   1501                             UWord arg0, UWord arg1, UWord arg2 )
   1502 {
   1503    /* int recv(int s, void *buf, int len, unsigned int flags); */
   1504    /* man 2 recv says:
   1505       The  recv call is normally used only on a connected socket
   1506       (see connect(2)) and is identical to recvfrom with a  NULL
   1507       from parameter.
   1508    */
   1509    PRE_MEM_WRITE( "socketcall.recv(buf)",
   1510                   arg1, /* buf */
   1511                   arg2  /* len */ );
   1512 }
   1513 
   1514 void
   1515 ML_(generic_POST_sys_recv) ( ThreadId tid,
   1516                              UWord res,
   1517                              UWord arg0, UWord arg1, UWord arg2 )
   1518 {
   1519    if (res >= 0 && arg1 != 0) {
   1520       POST_MEM_WRITE( arg1, /* buf */
   1521                       arg2  /* len */ );
   1522    }
   1523 }
   1524 
   1525 /* ------ */
   1526 
   1527 void
   1528 ML_(generic_PRE_sys_connect) ( ThreadId tid,
   1529                                UWord arg0, UWord arg1, UWord arg2 )
   1530 {
   1531    /* int connect(int sockfd,
   1532                   struct sockaddr *serv_addr, int addrlen ); */
   1533    pre_mem_read_sockaddr( tid,
   1534                           "socketcall.connect(serv_addr.%s)",
   1535                           (struct vki_sockaddr *) arg1, arg2);
   1536 }
   1537 
   1538 /* ------ */
   1539 
   1540 void
   1541 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
   1542                                   UWord arg0, UWord arg1, UWord arg2,
   1543                                   UWord arg3, UWord arg4 )
   1544 {
   1545    /* int setsockopt(int s, int level, int optname,
   1546                      const void *optval, int optlen); */
   1547    PRE_MEM_READ( "socketcall.setsockopt(optval)",
   1548                  arg3, /* optval */
   1549                  arg4  /* optlen */ );
   1550 }
   1551 
   1552 /* ------ */
   1553 
   1554 void
   1555 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
   1556                                    UWord arg0, UWord arg1, UWord arg2 )
   1557 {
   1558    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
   1559    Addr name_p     = arg1;
   1560    Addr namelen_p  = arg2;
   1561    /* Nb: name_p cannot be NULL */
   1562    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1563                                 "socketcall.getsockname(name)",
   1564                                 "socketcall.getsockname(namelen_in)" );
   1565 }
   1566 
   1567 void
   1568 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
   1569                                     SysRes res,
   1570                                     UWord arg0, UWord arg1, UWord arg2 )
   1571 {
   1572    Addr name_p     = arg1;
   1573    Addr namelen_p  = arg2;
   1574    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1575    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1576                                  "socketcall.getsockname(namelen_out)" );
   1577 }
   1578 
   1579 /* ------ */
   1580 
   1581 void
   1582 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
   1583                                    UWord arg0, UWord arg1, UWord arg2 )
   1584 {
   1585    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
   1586    Addr name_p     = arg1;
   1587    Addr namelen_p  = arg2;
   1588    /* Nb: name_p cannot be NULL */
   1589    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1590                                 "socketcall.getpeername(name)",
   1591                                 "socketcall.getpeername(namelen_in)" );
   1592 }
   1593 
   1594 void
   1595 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
   1596                                     SysRes res,
   1597                                     UWord arg0, UWord arg1, UWord arg2 )
   1598 {
   1599    Addr name_p     = arg1;
   1600    Addr namelen_p  = arg2;
   1601    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1602    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1603                                  "socketcall.getpeername(namelen_out)" );
   1604 }
   1605 
   1606 /* ------ */
   1607 
   1608 void
   1609 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
   1610                                struct vki_msghdr *msg )
   1611 {
   1612    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
   1613 }
   1614 
   1615 /* ------ */
   1616 
   1617 void
   1618 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
   1619                                struct vki_msghdr *msg )
   1620 {
   1621    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
   1622 }
   1623 
   1624 void
   1625 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
   1626                                 struct vki_msghdr *msg, UInt length )
   1627 {
   1628    msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
   1629    check_cmsg_for_fds( tid, msg );
   1630 }
   1631 
   1632 
   1633 /* ---------------------------------------------------------------------
   1634    Deal with a bunch of IPC related syscalls
   1635    ------------------------------------------------------------------ */
   1636 
   1637 /* ------ */
   1638 
   1639 void
   1640 ML_(generic_PRE_sys_semop) ( ThreadId tid,
   1641                              UWord arg0, UWord arg1, UWord arg2 )
   1642 {
   1643    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
   1644    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1645 }
   1646 
   1647 /* ------ */
   1648 
   1649 void
   1650 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
   1651                                   UWord arg0, UWord arg1,
   1652                                   UWord arg2, UWord arg3 )
   1653 {
   1654    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
   1655                      struct timespec *timeout); */
   1656    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1657    if (arg3 != 0)
   1658       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
   1659 }
   1660 
   1661 /* ------ */
   1662 
   1663 static
   1664 UInt get_sem_count( Int semid )
   1665 {
   1666    struct vki_semid_ds buf;
   1667    union vki_semun arg;
   1668    SysRes res;
   1669 
   1670    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
   1671       (experimental) otherwise complains that the use in the return
   1672       statement below is uninitialised. */
   1673    buf.sem_nsems = 0;
   1674 
   1675    arg.buf = &buf;
   1676 
   1677 #  ifdef __NR_semctl
   1678    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
   1679 #  else
   1680    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
   1681                           VKI_IPC_STAT, (UWord)&arg);
   1682 #  endif
   1683    if (sr_isError(res))
   1684       return 0;
   1685 
   1686    return buf.sem_nsems;
   1687 }
   1688 
   1689 void
   1690 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
   1691                               UWord arg0, UWord arg1,
   1692                               UWord arg2, UWord arg3 )
   1693 {
   1694    /* int semctl(int semid, int semnum, int cmd, ...); */
   1695    union vki_semun arg = *(union vki_semun *)&arg3;
   1696    UInt nsems;
   1697    switch (arg2 /* cmd */) {
   1698 #if defined(VKI_IPC_INFO)
   1699    case VKI_IPC_INFO:
   1700    case VKI_SEM_INFO:
   1701    case VKI_IPC_INFO|VKI_IPC_64:
   1702    case VKI_SEM_INFO|VKI_IPC_64:
   1703       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
   1704                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1705       break;
   1706 #endif
   1707 
   1708    case VKI_IPC_STAT:
   1709 #if defined(VKI_SEM_STAT)
   1710    case VKI_SEM_STAT:
   1711 #endif
   1712       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1713                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1714       break;
   1715 
   1716 #if defined(VKI_IPC_64)
   1717    case VKI_IPC_STAT|VKI_IPC_64:
   1718 #if defined(VKI_SEM_STAT)
   1719    case VKI_SEM_STAT|VKI_IPC_64:
   1720 #endif
   1721       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1722                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1723       break;
   1724 #endif
   1725 
   1726    case VKI_IPC_SET:
   1727       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1728                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1729       break;
   1730 
   1731 #if defined(VKI_IPC_64)
   1732    case VKI_IPC_SET|VKI_IPC_64:
   1733       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1734                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1735       break;
   1736 #endif
   1737 
   1738    case VKI_GETALL:
   1739 #if defined(VKI_IPC_64)
   1740    case VKI_GETALL|VKI_IPC_64:
   1741 #endif
   1742       nsems = get_sem_count( arg0 );
   1743       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
   1744                      (Addr)arg.array, sizeof(unsigned short) * nsems );
   1745       break;
   1746 
   1747    case VKI_SETALL:
   1748 #if defined(VKI_IPC_64)
   1749    case VKI_SETALL|VKI_IPC_64:
   1750 #endif
   1751       nsems = get_sem_count( arg0 );
   1752       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
   1753                     (Addr)arg.array, sizeof(unsigned short) * nsems );
   1754       break;
   1755    }
   1756 }
   1757 
   1758 void
   1759 ML_(generic_POST_sys_semctl) ( ThreadId tid,
   1760                                UWord res,
   1761                                UWord arg0, UWord arg1,
   1762                                UWord arg2, UWord arg3 )
   1763 {
   1764    union vki_semun arg = *(union vki_semun *)&arg3;
   1765    UInt nsems;
   1766    switch (arg2 /* cmd */) {
   1767 #if defined(VKI_IPC_INFO)
   1768    case VKI_IPC_INFO:
   1769    case VKI_SEM_INFO:
   1770    case VKI_IPC_INFO|VKI_IPC_64:
   1771    case VKI_SEM_INFO|VKI_IPC_64:
   1772       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1773       break;
   1774 #endif
   1775 
   1776    case VKI_IPC_STAT:
   1777 #if defined(VKI_SEM_STAT)
   1778    case VKI_SEM_STAT:
   1779 #endif
   1780       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1781       break;
   1782 
   1783 #if defined(VKI_IPC_64)
   1784    case VKI_IPC_STAT|VKI_IPC_64:
   1785    case VKI_SEM_STAT|VKI_IPC_64:
   1786       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1787       break;
   1788 #endif
   1789 
   1790    case VKI_GETALL:
   1791 #if defined(VKI_IPC_64)
   1792    case VKI_GETALL|VKI_IPC_64:
   1793 #endif
   1794       nsems = get_sem_count( arg0 );
   1795       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
   1796       break;
   1797    }
   1798 }
   1799 
   1800 /* ------ */
   1801 
   1802 /* ------ */
   1803 
   1804 static
   1805 SizeT get_shm_size ( Int shmid )
   1806 {
   1807 #ifdef __NR_shmctl
   1808 #  ifdef VKI_IPC_64
   1809    struct vki_shmid64_ds buf;
   1810 #    ifdef VGP_amd64_linux
   1811      /* See bug 222545 comment 7 */
   1812      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1813                                      VKI_IPC_STAT, (UWord)&buf);
   1814 #    else
   1815      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1816                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
   1817 #    endif
   1818 #  else /* !def VKI_IPC_64 */
   1819    struct vki_shmid_ds buf;
   1820    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
   1821 #  endif /* def VKI_IPC_64 */
   1822 #else
   1823    struct vki_shmid_ds buf;
   1824    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
   1825                                  VKI_IPC_STAT, 0, (UWord)&buf);
   1826 #endif
   1827    if (sr_isError(__res))
   1828       return 0;
   1829 
   1830    return (SizeT) buf.shm_segsz;
   1831 }
   1832 
   1833 UWord
   1834 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
   1835                              UWord arg0, UWord arg1, UWord arg2 )
   1836 {
   1837    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
   1838    SizeT  segmentSize = get_shm_size ( arg0 );
   1839    UWord tmp;
   1840    Bool  ok;
   1841    if (arg1 == 0) {
   1842       /* arm-linux only: work around the fact that
   1843          VG_(am_get_advisory_client_simple) produces something that is
   1844          VKI_PAGE_SIZE aligned, whereas what we want is something
   1845          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
   1846          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
   1847          then round the result up to the next VKI_SHMLBA boundary.
   1848          See bug 222545 comment 15.  So far, arm-linux is the only
   1849          platform where this is known to be necessary. */
   1850       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
   1851       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1852          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
   1853       }
   1854       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
   1855       if (ok) {
   1856          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1857             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
   1858          } else {
   1859             arg1 = tmp;
   1860          }
   1861       }
   1862    }
   1863    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
   1864       arg1 = 0;
   1865    return arg1;
   1866 }
   1867 
   1868 void
   1869 ML_(generic_POST_sys_shmat) ( ThreadId tid,
   1870                               UWord res,
   1871                               UWord arg0, UWord arg1, UWord arg2 )
   1872 {
   1873    SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
   1874    if ( segmentSize > 0 ) {
   1875       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
   1876       Bool d;
   1877 
   1878       if (arg2 & VKI_SHM_RDONLY)
   1879          prot &= ~VKI_PROT_WRITE;
   1880       /* It isn't exactly correct to pass 0 for the fd and offset
   1881          here.  The kernel seems to think the corresponding section
   1882          does have dev/ino numbers:
   1883 
   1884          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
   1885 
   1886          However there is no obvious way to find them.  In order to
   1887          cope with the discrepancy, aspacem's sync checker omits the
   1888          dev/ino correspondence check in cases where V does not know
   1889          the dev/ino. */
   1890       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
   1891 
   1892       /* we don't distinguish whether it's read-only or
   1893        * read-write -- it doesn't matter really. */
   1894       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
   1895                               0/*di_handle*/ );
   1896       if (d)
   1897          VG_(discard_translations)( (Addr64)res,
   1898                                     (ULong)VG_PGROUNDUP(segmentSize),
   1899                                     "ML_(generic_POST_sys_shmat)" );
   1900    }
   1901 }
   1902 
   1903 /* ------ */
   1904 
   1905 Bool
   1906 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
   1907 {
   1908    /* int shmdt(const void *shmaddr); */
   1909    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
   1910 }
   1911 
   1912 void
   1913 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
   1914 {
   1915    NSegment const* s = VG_(am_find_nsegment)(arg0);
   1916 
   1917    if (s != NULL) {
   1918       Addr  s_start = s->start;
   1919       SizeT s_len   = s->end+1 - s->start;
   1920       Bool  d;
   1921 
   1922       vg_assert(s->kind == SkShmC);
   1923       vg_assert(s->start == arg0);
   1924 
   1925       d = VG_(am_notify_munmap)(s_start, s_len);
   1926       s = NULL; /* s is now invalid */
   1927       VG_TRACK( die_mem_munmap, s_start, s_len );
   1928       if (d)
   1929          VG_(discard_translations)( (Addr64)s_start,
   1930                                     (ULong)s_len,
   1931                                     "ML_(generic_POST_sys_shmdt)" );
   1932    }
   1933 }
   1934 /* ------ */
   1935 
   1936 void
   1937 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
   1938                               UWord arg0, UWord arg1, UWord arg2 )
   1939 {
   1940    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
   1941    switch (arg1 /* cmd */) {
   1942 #if defined(VKI_IPC_INFO)
   1943    case VKI_IPC_INFO:
   1944       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   1945                      arg2, sizeof(struct vki_shminfo) );
   1946       break;
   1947 #if defined(VKI_IPC_64)
   1948    case VKI_IPC_INFO|VKI_IPC_64:
   1949       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   1950                      arg2, sizeof(struct vki_shminfo64) );
   1951       break;
   1952 #endif
   1953 #endif
   1954 
   1955 #if defined(VKI_SHM_INFO)
   1956    case VKI_SHM_INFO:
   1957 #if defined(VKI_IPC_64)
   1958    case VKI_SHM_INFO|VKI_IPC_64:
   1959 #endif
   1960       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
   1961                      arg2, sizeof(struct vki_shm_info) );
   1962       break;
   1963 #endif
   1964 
   1965    case VKI_IPC_STAT:
   1966 #if defined(VKI_SHM_STAT)
   1967    case VKI_SHM_STAT:
   1968 #endif
   1969       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
   1970                      arg2, sizeof(struct vki_shmid_ds) );
   1971       break;
   1972 
   1973 #if defined(VKI_IPC_64)
   1974    case VKI_IPC_STAT|VKI_IPC_64:
   1975    case VKI_SHM_STAT|VKI_IPC_64:
   1976       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
   1977                      arg2, sizeof(struct vki_shmid64_ds) );
   1978       break;
   1979 #endif
   1980 
   1981    case VKI_IPC_SET:
   1982       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   1983                     arg2, sizeof(struct vki_shmid_ds) );
   1984       break;
   1985 
   1986 #if defined(VKI_IPC_64)
   1987    case VKI_IPC_SET|VKI_IPC_64:
   1988       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   1989                     arg2, sizeof(struct vki_shmid64_ds) );
   1990       break;
   1991 #endif
   1992    }
   1993 }
   1994 
   1995 void
   1996 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
   1997                                UWord res,
   1998                                UWord arg0, UWord arg1, UWord arg2 )
   1999 {
   2000    switch (arg1 /* cmd */) {
   2001 #if defined(VKI_IPC_INFO)
   2002    case VKI_IPC_INFO:
   2003       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
   2004       break;
   2005    case VKI_IPC_INFO|VKI_IPC_64:
   2006       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
   2007       break;
   2008 #endif
   2009 
   2010 #if defined(VKI_SHM_INFO)
   2011    case VKI_SHM_INFO:
   2012    case VKI_SHM_INFO|VKI_IPC_64:
   2013       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
   2014       break;
   2015 #endif
   2016 
   2017    case VKI_IPC_STAT:
   2018 #if defined(VKI_SHM_STAT)
   2019    case VKI_SHM_STAT:
   2020 #endif
   2021       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
   2022       break;
   2023 
   2024 #if defined(VKI_IPC_64)
   2025    case VKI_IPC_STAT|VKI_IPC_64:
   2026    case VKI_SHM_STAT|VKI_IPC_64:
   2027       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
   2028       break;
   2029 #endif
   2030 
   2031 
   2032    }
   2033 }
   2034 
   2035 /* ---------------------------------------------------------------------
   2036    Generic handler for mmap
   2037    ------------------------------------------------------------------ */
   2038 
   2039 /*
   2040  * Although mmap is specified by POSIX and the argument are generally
   2041  * consistent across platforms the precise details of the low level
   2042  * argument passing conventions differ. For example:
   2043  *
   2044  * - On x86-linux there is mmap (aka old_mmap) which takes the
   2045  *   arguments in a memory block and the offset in bytes; and
   2046  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   2047  *   way and the offset in pages.
   2048  *
   2049  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
   2050  *   arguments in the normal way and the offset in bytes; and
   2051  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   2052  *   way and the offset in pages.
   2053  *
   2054  * - On amd64-linux everything is simple and there is just the one
   2055  *   call, mmap (aka sys_mmap)  which takes the arguments in the
   2056  *   normal way and the offset in bytes.
   2057  *
   2058  * - On s390x-linux there is mmap (aka old_mmap) which takes the
   2059  *   arguments in a memory block and the offset in bytes. mmap2
   2060  *   is also available (but not exported via unistd.h) with
   2061  *   arguments in a memory block and the offset in pages.
   2062  *
   2063  * To cope with all this we provide a generic handler function here
   2064  * and then each platform implements one or more system call handlers
   2065  * which call this generic routine after extracting and normalising
   2066  * the arguments.
   2067  */
   2068 
   2069 SysRes
   2070 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
   2071                             UWord arg1, UWord arg2, UWord arg3,
   2072                             UWord arg4, UWord arg5, Off64T arg6 )
   2073 {
   2074    Addr       advised;
   2075    SysRes     sres;
   2076    MapRequest mreq;
   2077    Bool       mreq_ok;
   2078 
   2079 #  if defined(VGO_darwin)
   2080    // Nb: we can't use this on Darwin, it has races:
   2081    // * needs to RETRY if advisory succeeds but map fails
   2082    //   (could have been some other thread in a nonblocking call)
   2083    // * needs to not use fixed-position mmap() on Darwin
   2084    //   (mmap will cheerfully smash whatever's already there, which might
   2085    //   be a new mapping from some other thread in a nonblocking call)
   2086    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
   2087 #  endif
   2088 
   2089    if (arg2 == 0) {
   2090       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
   2091          shall be established. */
   2092       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2093    }
   2094 
   2095    if (!VG_IS_PAGE_ALIGNED(arg1)) {
   2096       /* zap any misaligned addresses. */
   2097       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
   2098          to fail.   Here, we catch them all. */
   2099       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2100    }
   2101 
   2102    if (!VG_IS_PAGE_ALIGNED(arg6)) {
   2103       /* zap any misaligned offsets. */
   2104       /* SuSV3 says: The off argument is constrained to be aligned and
   2105          sized according to the value returned by sysconf() when
   2106          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
   2107       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2108    }
   2109 
   2110 #  if defined(VKI_MAP_32BIT)
   2111    /* We can't support MAP_32BIT (at least, not without significant
   2112       complication), and it's royally unportable, so if the client
   2113       asks for it, just fail it. */
   2114    if (arg4 & VKI_MAP_32BIT) {
   2115       return VG_(mk_SysRes_Error)( VKI_ENOMEM );
   2116    }
   2117 #  endif
   2118 
   2119    /* Figure out what kind of allocation constraints there are
   2120       (fixed/hint/any), and ask aspacem what we should do. */
   2121    mreq.start = arg1;
   2122    mreq.len   = arg2;
   2123    if (arg4 & VKI_MAP_FIXED) {
   2124       mreq.rkind = MFixed;
   2125    } else
   2126    if (arg1 != 0) {
   2127       mreq.rkind = MHint;
   2128    } else {
   2129       mreq.rkind = MAny;
   2130    }
   2131 
   2132    /* Enquire ... */
   2133    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2134    if (!mreq_ok) {
   2135       /* Our request was bounced, so we'd better fail. */
   2136       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2137    }
   2138 
   2139    /* Otherwise we're OK (so far).  Install aspacem's choice of
   2140       address, and let the mmap go through.  */
   2141    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2142                                     arg4 | VKI_MAP_FIXED,
   2143                                     arg5, arg6);
   2144 
   2145    /* A refinement: it may be that the kernel refused aspacem's choice
   2146       of address.  If we were originally asked for a hinted mapping,
   2147       there is still a last chance: try again at any address.
   2148       Hence: */
   2149    if (mreq.rkind == MHint && sr_isError(sres)) {
   2150       mreq.start = 0;
   2151       mreq.len   = arg2;
   2152       mreq.rkind = MAny;
   2153       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2154       if (!mreq_ok) {
   2155          /* Our request was bounced, so we'd better fail. */
   2156          return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2157       }
   2158       /* and try again with the kernel */
   2159       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2160                                        arg4 | VKI_MAP_FIXED,
   2161                                        arg5, arg6);
   2162    }
   2163 
   2164    if (!sr_isError(sres)) {
   2165       ULong di_handle;
   2166       /* Notify aspacem. */
   2167       notify_core_of_mmap(
   2168          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2169          arg2, /* length */
   2170          arg3, /* prot */
   2171          arg4, /* the original flags value */
   2172          arg5, /* fd */
   2173          arg6  /* offset */
   2174       );
   2175       /* Load symbols? */
   2176       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
   2177                                        False/*allow_SkFileV*/, (Int)arg5 );
   2178       /* Notify the tool. */
   2179       notify_tool_of_mmap(
   2180          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2181          arg2, /* length */
   2182          arg3, /* prot */
   2183          di_handle /* so the tool can refer to the read debuginfo later,
   2184                       if it wants. */
   2185       );
   2186    }
   2187 
   2188    /* Stay sane */
   2189    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
   2190       vg_assert(sr_Res(sres) == arg1);
   2191 
   2192    return sres;
   2193 }
   2194 
   2195 
   2196 /* ---------------------------------------------------------------------
   2197    The Main Entertainment ... syscall wrappers
   2198    ------------------------------------------------------------------ */
   2199 
   2200 /* Note: the PRE() and POST() wrappers are for the actual functions
   2201    implementing the system calls in the OS kernel.  These mostly have
   2202    names like sys_write();  a few have names like old_mmap().  See the
   2203    comment for ML_(syscall_table)[] for important info about the __NR_foo
   2204    constants and their relationship to the sys_foo() functions.
   2205 
   2206    Some notes about names used for syscalls and args:
   2207    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
   2208      ambiguity.
   2209 
   2210    - For error messages, we generally use a somewhat generic name
   2211      for the syscall (eg. "write" rather than "sys_write").  This should be
   2212      good enough for the average user to understand what is happening,
   2213      without confusing them with names like "sys_write".
   2214 
   2215    - Also, for error messages the arg names are mostly taken from the man
   2216      pages (even though many of those man pages are really for glibc
   2217      functions of the same name), rather than from the OS kernel source,
   2218      for the same reason -- a user presented with a "bogus foo(bar)" arg
   2219      will most likely look at the "foo" man page to see which is the "bar"
   2220      arg.
   2221 
   2222    Note that we use our own vki_* types.  The one exception is in
   2223    PRE_REG_READn calls, where pointer types haven't been changed, because
   2224    they don't need to be -- eg. for "foo*" to be used, the type foo need not
   2225    be visible.
   2226 
   2227    XXX: some of these are arch-specific, and should be factored out.
   2228 */
   2229 
   2230 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
   2231 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
   2232 
   2233 // Macros to support 64-bit syscall args split into two 32 bit values
   2234 #if defined(VG_LITTLEENDIAN)
   2235 #define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2236 #define MERGE64_FIRST(name) name##_low
   2237 #define MERGE64_SECOND(name) name##_high
   2238 #elif defined(VG_BIGENDIAN)
   2239 #define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2240 #define MERGE64_FIRST(name) name##_high
   2241 #define MERGE64_SECOND(name) name##_low
   2242 #else
   2243 #error Unknown endianness
   2244 #endif
   2245 
   2246 PRE(sys_exit)
   2247 {
   2248    ThreadState* tst;
   2249    /* simple; just make this thread exit */
   2250    PRINT("exit( %ld )", ARG1);
   2251    PRE_REG_READ1(void, "exit", int, status);
   2252    tst = VG_(get_ThreadState)(tid);
   2253    /* Set the thread's status to be exiting, then claim that the
   2254       syscall succeeded. */
   2255    tst->exitreason = VgSrc_ExitThread;
   2256    tst->os_state.exitcode = ARG1;
   2257    SET_STATUS_Success(0);
   2258 }
   2259 
   2260 PRE(sys_ni_syscall)
   2261 {
   2262    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
   2263       VG_SYSNUM_STRING(SYSNO));
   2264    PRE_REG_READ0(long, "ni_syscall");
   2265    SET_STATUS_Failure( VKI_ENOSYS );
   2266 }
   2267 
   2268 PRE(sys_iopl)
   2269 {
   2270    PRINT("sys_iopl ( %ld )", ARG1);
   2271    PRE_REG_READ1(long, "iopl", unsigned long, level);
   2272 }
   2273 
   2274 PRE(sys_fsync)
   2275 {
   2276    *flags |= SfMayBlock;
   2277    PRINT("sys_fsync ( %ld )", ARG1);
   2278    PRE_REG_READ1(long, "fsync", unsigned int, fd);
   2279 }
   2280 
   2281 PRE(sys_fdatasync)
   2282 {
   2283    *flags |= SfMayBlock;
   2284    PRINT("sys_fdatasync ( %ld )", ARG1);
   2285    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
   2286 }
   2287 
   2288 PRE(sys_msync)
   2289 {
   2290    *flags |= SfMayBlock;
   2291    PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   2292    PRE_REG_READ3(long, "msync",
   2293                  unsigned long, start, vki_size_t, length, int, flags);
   2294    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
   2295 }
   2296 
   2297 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
   2298 // versions of LiS (Linux Streams).  They are not part of the kernel.
   2299 // Therefore, we have to provide this type ourself, rather than getting it
   2300 // from the kernel sources.
   2301 struct vki_pmsg_strbuf {
   2302    int     maxlen;         /* no. of bytes in buffer */
   2303    int     len;            /* no. of bytes returned */
   2304    vki_caddr_t buf;        /* pointer to data */
   2305 };
   2306 PRE(sys_getpmsg)
   2307 {
   2308    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
   2309    struct vki_pmsg_strbuf *ctrl;
   2310    struct vki_pmsg_strbuf *data;
   2311    *flags |= SfMayBlock;
   2312    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
   2313    PRE_REG_READ5(int, "getpmsg",
   2314                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2315                  int *, bandp, int *, flagsp);
   2316    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2317    data = (struct vki_pmsg_strbuf *)ARG3;
   2318    if (ctrl && ctrl->maxlen > 0)
   2319       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
   2320    if (data && data->maxlen > 0)
   2321       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
   2322    if (ARG4)
   2323       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
   2324    if (ARG5)
   2325       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
   2326 }
   2327 POST(sys_getpmsg)
   2328 {
   2329    struct vki_pmsg_strbuf *ctrl;
   2330    struct vki_pmsg_strbuf *data;
   2331    vg_assert(SUCCESS);
   2332    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2333    data = (struct vki_pmsg_strbuf *)ARG3;
   2334    if (RES == 0 && ctrl && ctrl->len > 0) {
   2335       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
   2336    }
   2337    if (RES == 0 && data && data->len > 0) {
   2338       POST_MEM_WRITE( (Addr)data->buf, data->len);
   2339    }
   2340 }
   2341 
   2342 PRE(sys_putpmsg)
   2343 {
   2344    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
   2345    struct vki_pmsg_strbuf *ctrl;
   2346    struct vki_pmsg_strbuf *data;
   2347    *flags |= SfMayBlock;
   2348    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
   2349    PRE_REG_READ5(int, "putpmsg",
   2350                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2351                  int, band, int, flags);
   2352    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2353    data = (struct vki_pmsg_strbuf *)ARG3;
   2354    if (ctrl && ctrl->len > 0)
   2355       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
   2356    if (data && data->len > 0)
   2357       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
   2358 }
   2359 
   2360 PRE(sys_getitimer)
   2361 {
   2362    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2363    PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
   2364    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
   2365 
   2366    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
   2367    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
   2368 }
   2369 
   2370 POST(sys_getitimer)
   2371 {
   2372    if (ARG2 != (Addr)NULL) {
   2373       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2374       POST_timeval_WRITE( &(value->it_interval) );
   2375       POST_timeval_WRITE( &(value->it_value) );
   2376    }
   2377 }
   2378 
   2379 PRE(sys_setitimer)
   2380 {
   2381    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
   2382    PRE_REG_READ3(long, "setitimer",
   2383                  int, which,
   2384                  struct itimerval *, value, struct itimerval *, ovalue);
   2385    if (ARG2 != (Addr)NULL) {
   2386       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2387       PRE_timeval_READ( "setitimer(&value->it_interval)",
   2388                          &(value->it_interval));
   2389       PRE_timeval_READ( "setitimer(&value->it_value)",
   2390                          &(value->it_value));
   2391    }
   2392    if (ARG3 != (Addr)NULL) {
   2393       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2394       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
   2395                          &(ovalue->it_interval));
   2396       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
   2397                          &(ovalue->it_value));
   2398    }
   2399 }
   2400 
   2401 POST(sys_setitimer)
   2402 {
   2403    if (ARG3 != (Addr)NULL) {
   2404       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2405       POST_timeval_WRITE( &(ovalue->it_interval) );
   2406       POST_timeval_WRITE( &(ovalue->it_value) );
   2407    }
   2408 }
   2409 
   2410 PRE(sys_chroot)
   2411 {
   2412    PRINT("sys_chroot ( %#lx )", ARG1);
   2413    PRE_REG_READ1(long, "chroot", const char *, path);
   2414    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
   2415 }
   2416 
   2417 PRE(sys_madvise)
   2418 {
   2419    *flags |= SfMayBlock;
   2420    PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   2421    PRE_REG_READ3(long, "madvise",
   2422                  unsigned long, start, vki_size_t, length, int, advice);
   2423 }
   2424 
   2425 #if HAVE_MREMAP
   2426 PRE(sys_mremap)
   2427 {
   2428    // Nb: this is different to the glibc version described in the man pages,
   2429    // which lacks the fifth 'new_address' argument.
   2430    if (ARG4 & VKI_MREMAP_FIXED) {
   2431       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
   2432             ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
   2433       PRE_REG_READ5(unsigned long, "mremap",
   2434                     unsigned long, old_addr, unsigned long, old_size,
   2435                     unsigned long, new_size, unsigned long, flags,
   2436                     unsigned long, new_addr);
   2437    } else {
   2438       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
   2439             ARG1, (ULong)ARG2, ARG3, ARG4);
   2440       PRE_REG_READ4(unsigned long, "mremap",
   2441                     unsigned long, old_addr, unsigned long, old_size,
   2442                     unsigned long, new_size, unsigned long, flags);
   2443    }
   2444    SET_STATUS_from_SysRes(
   2445       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
   2446    );
   2447 }
   2448 #endif /* HAVE_MREMAP */
   2449 
   2450 PRE(sys_nice)
   2451 {
   2452    PRINT("sys_nice ( %ld )", ARG1);
   2453    PRE_REG_READ1(long, "nice", int, inc);
   2454 }
   2455 
   2456 PRE(sys_mlock)
   2457 {
   2458    *flags |= SfMayBlock;
   2459    PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
   2460    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
   2461 }
   2462 
   2463 PRE(sys_munlock)
   2464 {
   2465    *flags |= SfMayBlock;
   2466    PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
   2467    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
   2468 }
   2469 
   2470 PRE(sys_mlockall)
   2471 {
   2472    *flags |= SfMayBlock;
   2473    PRINT("sys_mlockall ( %lx )", ARG1);
   2474    PRE_REG_READ1(long, "mlockall", int, flags);
   2475 }
   2476 
   2477 PRE(sys_setpriority)
   2478 {
   2479    PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
   2480    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
   2481 }
   2482 
   2483 PRE(sys_getpriority)
   2484 {
   2485    PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
   2486    PRE_REG_READ2(long, "getpriority", int, which, int, who);
   2487 }
   2488 
   2489 PRE(sys_pwrite64)
   2490 {
   2491    *flags |= SfMayBlock;
   2492 #if VG_WORDSIZE == 4
   2493    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
   2494          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
   2495    PRE_REG_READ5(ssize_t, "pwrite64",
   2496                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2497                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2498 #elif VG_WORDSIZE == 8
   2499    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
   2500          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
   2501    PRE_REG_READ4(ssize_t, "pwrite64",
   2502                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2503                  Word, offset);
   2504 #else
   2505 #  error Unexpected word size
   2506 #endif
   2507    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
   2508 }
   2509 
   2510 PRE(sys_sync)
   2511 {
   2512    *flags |= SfMayBlock;
   2513    PRINT("sys_sync ( )");
   2514    PRE_REG_READ0(long, "sync");
   2515 }
   2516 
   2517 PRE(sys_fstatfs)
   2518 {
   2519    FUSE_COMPATIBLE_MAY_BLOCK();
   2520    PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
   2521    PRE_REG_READ2(long, "fstatfs",
   2522                  unsigned int, fd, struct statfs *, buf);
   2523    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
   2524 }
   2525 
   2526 POST(sys_fstatfs)
   2527 {
   2528    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   2529 }
   2530 
   2531 PRE(sys_fstatfs64)
   2532 {
   2533    FUSE_COMPATIBLE_MAY_BLOCK();
   2534    PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
   2535    PRE_REG_READ3(long, "fstatfs64",
   2536                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
   2537    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
   2538 }
   2539 POST(sys_fstatfs64)
   2540 {
   2541    POST_MEM_WRITE( ARG3, ARG2 );
   2542 }
   2543 
   2544 PRE(sys_getsid)
   2545 {
   2546    PRINT("sys_getsid ( %ld )", ARG1);
   2547    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
   2548 }
   2549 
   2550 PRE(sys_pread64)
   2551 {
   2552    *flags |= SfMayBlock;
   2553 #if VG_WORDSIZE == 4
   2554    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
   2555          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
   2556    PRE_REG_READ5(ssize_t, "pread64",
   2557                  unsigned int, fd, char *, buf, vki_size_t, count,
   2558                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2559 #elif VG_WORDSIZE == 8
   2560    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
   2561          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
   2562    PRE_REG_READ4(ssize_t, "pread64",
   2563                  unsigned int, fd, char *, buf, vki_size_t, count,
   2564                  Word, offset);
   2565 #else
   2566 #  error Unexpected word size
   2567 #endif
   2568    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
   2569 }
   2570 POST(sys_pread64)
   2571 {
   2572    vg_assert(SUCCESS);
   2573    if (RES > 0) {
   2574       POST_MEM_WRITE( ARG2, RES );
   2575    }
   2576 }
   2577 
   2578 PRE(sys_mknod)
   2579 {
   2580    FUSE_COMPATIBLE_MAY_BLOCK();
   2581    PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
   2582    PRE_REG_READ3(long, "mknod",
   2583                  const char *, pathname, int, mode, unsigned, dev);
   2584    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
   2585 }
   2586 
   2587 PRE(sys_flock)
   2588 {
   2589    *flags |= SfMayBlock;
   2590    PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
   2591    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
   2592 }
   2593 
   2594 // Pre_read a char** argument.
   2595 static void pre_argv_envp(Addr a, ThreadId tid, const HChar* s1, const HChar* s2)
   2596 {
   2597    while (True) {
   2598       Addr a_deref;
   2599       Addr* a_p = (Addr*)a;
   2600       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
   2601       a_deref = *a_p;
   2602       if (0 == a_deref)
   2603          break;
   2604       PRE_MEM_RASCIIZ( s2, a_deref );
   2605       a += sizeof(char*);
   2606    }
   2607 }
   2608 
   2609 static Bool i_am_the_only_thread ( void )
   2610 {
   2611    Int c = VG_(count_living_threads)();
   2612    vg_assert(c >= 1); /* stay sane */
   2613    return c == 1;
   2614 }
   2615 
   2616 /* Wait until all other threads disappear. */
   2617 void VG_(reap_threads)(ThreadId self)
   2618 {
   2619    while (!i_am_the_only_thread()) {
   2620       /* Let other thread(s) run */
   2621       VG_(vg_yield)();
   2622       VG_(poll_signals)(self);
   2623    }
   2624    vg_assert(i_am_the_only_thread());
   2625 }
   2626 
   2627 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
   2628 // but it seems to work nonetheless...
   2629 PRE(sys_execve)
   2630 {
   2631    HChar*       path = NULL;       /* path to executable */
   2632    HChar**      envp = NULL;
   2633    HChar**      argv = NULL;
   2634    HChar**      arg2copy;
   2635    HChar*       launcher_basename = NULL;
   2636    ThreadState* tst;
   2637    Int          i, j, tot_args;
   2638    SysRes       res;
   2639    Bool         setuid_allowed, trace_this_child;
   2640 
   2641    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
   2642    PRE_REG_READ3(vki_off_t, "execve",
   2643                  char *, filename, char **, argv, char **, envp);
   2644    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
   2645    if (ARG2 != 0)
   2646       pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
   2647    if (ARG3 != 0)
   2648       pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
   2649 
   2650    vg_assert(VG_(is_valid_tid)(tid));
   2651    tst = VG_(get_ThreadState)(tid);
   2652 
   2653    /* Erk.  If the exec fails, then the following will have made a
   2654       mess of things which makes it hard for us to continue.  The
   2655       right thing to do is piece everything together again in
   2656       POST(execve), but that's close to impossible.  Instead, we make
   2657       an effort to check that the execve will work before actually
   2658       doing it. */
   2659 
   2660    /* Check that the name at least begins in client-accessible storage. */
   2661    if (ARG1 == 0 /* obviously bogus */
   2662        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
   2663       SET_STATUS_Failure( VKI_EFAULT );
   2664       return;
   2665    }
   2666 
   2667    // debug-only printing
   2668    if (0) {
   2669       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
   2670       if (ARG2) {
   2671          VG_(printf)("ARG2 = ");
   2672          Int q;
   2673          HChar** vec = (HChar**)ARG2;
   2674          for (q = 0; vec[q]; q++)
   2675             VG_(printf)("%p(%s) ", vec[q], vec[q]);
   2676          VG_(printf)("\n");
   2677       } else {
   2678          VG_(printf)("ARG2 = null\n");
   2679       }
   2680    }
   2681 
   2682    // Decide whether or not we want to follow along
   2683    { // Make 'child_argv' be a pointer to the child's arg vector
   2684      // (skipping the exe name)
   2685      HChar** child_argv = (HChar**)ARG2;
   2686      if (child_argv && child_argv[0] == NULL)
   2687         child_argv = NULL;
   2688      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
   2689    }
   2690 
   2691    // Do the important checks:  it is a file, is executable, permissions are
   2692    // ok, etc.  We allow setuid executables to run only in the case when
   2693    // we are not simulating them, that is, they to be run natively.
   2694    setuid_allowed = trace_this_child  ? False  : True;
   2695    res = VG_(pre_exec_check)((const HChar *)ARG1, NULL, setuid_allowed);
   2696    if (sr_isError(res)) {
   2697       SET_STATUS_Failure( sr_Err(res) );
   2698       return;
   2699    }
   2700 
   2701    /* If we're tracing the child, and the launcher name looks bogus
   2702       (possibly because launcher.c couldn't figure it out, see
   2703       comments therein) then we have no option but to fail. */
   2704    if (trace_this_child
   2705        && (VG_(name_of_launcher) == NULL
   2706            || VG_(name_of_launcher)[0] != '/')) {
   2707       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
   2708       return;
   2709    }
   2710 
   2711    /* After this point, we can't recover if the execve fails. */
   2712    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)ARG1);
   2713 
   2714 
   2715    // Terminate gdbserver if it is active.
   2716    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
   2717       // If the child will not be traced, we need to terminate gdbserver
   2718       // to cleanup the gdbserver resources (e.g. the FIFO files).
   2719       // If child will be traced, we also terminate gdbserver: the new
   2720       // Valgrind will start a fresh gdbserver after exec.
   2721       VG_(gdbserver) (0);
   2722    }
   2723 
   2724    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
   2725       this. (Really, nuke them all, since the new process will make
   2726       its own new thread.) */
   2727    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
   2728    VG_(reap_threads)(tid);
   2729 
   2730    // Set up the child's exe path.
   2731    //
   2732    if (trace_this_child) {
   2733 
   2734       // We want to exec the launcher.  Get its pre-remembered path.
   2735       path = VG_(name_of_launcher);
   2736       // VG_(name_of_launcher) should have been acquired by m_main at
   2737       // startup.
   2738       vg_assert(path);
   2739 
   2740       launcher_basename = VG_(strrchr)(path, '/');
   2741       if (launcher_basename == NULL || launcher_basename[1] == 0) {
   2742          launcher_basename = path;  // hmm, tres dubious
   2743       } else {
   2744          launcher_basename++;
   2745       }
   2746 
   2747    } else {
   2748       path = (HChar*)ARG1;
   2749    }
   2750 
   2751    // Set up the child's environment.
   2752    //
   2753    // Remove the valgrind-specific stuff from the environment so the
   2754    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
   2755    // This is done unconditionally, since if we are tracing the child,
   2756    // the child valgrind will set up the appropriate client environment.
   2757    // Nb: we make a copy of the environment before trying to mangle it
   2758    // as it might be in read-only memory (this was bug #101881).
   2759    //
   2760    // Then, if tracing the child, set VALGRIND_LIB for it.
   2761    //
   2762    if (ARG3 == 0) {
   2763       envp = NULL;
   2764    } else {
   2765       envp = VG_(env_clone)( (HChar**)ARG3 );
   2766       if (envp == NULL) goto hosed;
   2767       VG_(env_remove_valgrind_env_stuff)( envp );
   2768    }
   2769 
   2770    if (trace_this_child) {
   2771       // Set VALGRIND_LIB in ARG3 (the environment)
   2772       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
   2773    }
   2774 
   2775    // Set up the child's args.  If not tracing it, they are
   2776    // simply ARG2.  Otherwise, they are
   2777    //
   2778    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
   2779    //
   2780    // except that the first VG_(args_for_valgrind_noexecpass) args
   2781    // are omitted.
   2782    //
   2783    if (!trace_this_child) {
   2784       argv = (HChar**)ARG2;
   2785    } else {
   2786       vg_assert( VG_(args_for_valgrind) );
   2787       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
   2788       vg_assert( VG_(args_for_valgrind_noexecpass)
   2789                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
   2790       /* how many args in total will there be? */
   2791       // launcher basename
   2792       tot_args = 1;
   2793       // V's args
   2794       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
   2795       tot_args -= VG_(args_for_valgrind_noexecpass);
   2796       // name of client exe
   2797       tot_args++;
   2798       // args for client exe, skipping [0]
   2799       arg2copy = (HChar**)ARG2;
   2800       if (arg2copy && arg2copy[0]) {
   2801          for (i = 1; arg2copy[i]; i++)
   2802             tot_args++;
   2803       }
   2804       // allocate
   2805       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
   2806                           (tot_args+1) * sizeof(HChar*) );
   2807       if (argv == 0) goto hosed;
   2808       // copy
   2809       j = 0;
   2810       argv[j++] = launcher_basename;
   2811       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
   2812          if (i < VG_(args_for_valgrind_noexecpass))
   2813             continue;
   2814          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
   2815       }
   2816       argv[j++] = (HChar*)ARG1;
   2817       if (arg2copy && arg2copy[0])
   2818          for (i = 1; arg2copy[i]; i++)
   2819             argv[j++] = arg2copy[i];
   2820       argv[j++] = NULL;
   2821       // check
   2822       vg_assert(j == tot_args+1);
   2823    }
   2824 
   2825    /* restore the DATA rlimit for the child */
   2826    VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
   2827 
   2828    /*
   2829       Set the signal state up for exec.
   2830 
   2831       We need to set the real signal state to make sure the exec'd
   2832       process gets SIG_IGN properly.
   2833 
   2834       Also set our real sigmask to match the client's sigmask so that
   2835       the exec'd child will get the right mask.  First we need to
   2836       clear out any pending signals so they they don't get delivered,
   2837       which would confuse things.
   2838 
   2839       XXX This is a bug - the signals should remain pending, and be
   2840       delivered to the new process after exec.  There's also a
   2841       race-condition, since if someone delivers us a signal between
   2842       the sigprocmask and the execve, we'll still get the signal. Oh
   2843       well.
   2844    */
   2845    {
   2846       vki_sigset_t allsigs;
   2847       vki_siginfo_t info;
   2848 
   2849       /* What this loop does: it queries SCSS (the signal state that
   2850          the client _thinks_ the kernel is in) by calling
   2851          VG_(do_sys_sigaction), and modifies the real kernel signal
   2852          state accordingly. */
   2853       for (i = 1; i < VG_(max_signal); i++) {
   2854          vki_sigaction_fromK_t sa_f;
   2855          vki_sigaction_toK_t   sa_t;
   2856          VG_(do_sys_sigaction)(i, NULL, &sa_f);
   2857          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
   2858          if (sa_t.ksa_handler == VKI_SIG_IGN)
   2859             VG_(sigaction)(i, &sa_t, NULL);
   2860          else {
   2861             sa_t.ksa_handler = VKI_SIG_DFL;
   2862             VG_(sigaction)(i, &sa_t, NULL);
   2863          }
   2864       }
   2865 
   2866       VG_(sigfillset)(&allsigs);
   2867       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
   2868          ;
   2869 
   2870       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
   2871    }
   2872 
   2873    if (0) {
   2874       HChar **cpp;
   2875       VG_(printf)("exec: %s\n", path);
   2876       for (cpp = argv; cpp && *cpp; cpp++)
   2877          VG_(printf)("argv: %s\n", *cpp);
   2878       if (0)
   2879          for (cpp = envp; cpp && *cpp; cpp++)
   2880             VG_(printf)("env: %s\n", *cpp);
   2881    }
   2882 
   2883    SET_STATUS_from_SysRes(
   2884       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
   2885    );
   2886 
   2887    /* If we got here, then the execve failed.  We've already made way
   2888       too much of a mess to continue, so we have to abort. */
   2889   hosed:
   2890    vg_assert(FAILURE);
   2891    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
   2892                 ARG1, (char*)ARG1, ARG2, ARG3, ERR);
   2893    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
   2894                             "execve() failing, so I'm dying.\n");
   2895    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
   2896                             "or work out how to recover.\n");
   2897    VG_(exit)(101);
   2898 }
   2899 
   2900 PRE(sys_access)
   2901 {
   2902    PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   2903    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
   2904    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
   2905 }
   2906 
   2907 PRE(sys_alarm)
   2908 {
   2909    PRINT("sys_alarm ( %ld )", ARG1);
   2910    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
   2911 }
   2912 
   2913 PRE(sys_brk)
   2914 {
   2915    Addr brk_limit = VG_(brk_limit);
   2916    Addr brk_new;
   2917 
   2918    /* libc   says: int   brk(void *end_data_segment);
   2919       kernel says: void* brk(void* end_data_segment);  (more or less)
   2920 
   2921       libc returns 0 on success, and -1 (and sets errno) on failure.
   2922       Nb: if you ask to shrink the dataseg end below what it
   2923       currently is, that always succeeds, even if the dataseg end
   2924       doesn't actually change (eg. brk(0)).  Unless it seg faults.
   2925 
   2926       Kernel returns the new dataseg end.  If the brk() failed, this
   2927       will be unchanged from the old one.  That's why calling (kernel)
   2928       brk(0) gives the current dataseg end (libc brk() just returns
   2929       zero in that case).
   2930 
   2931       Both will seg fault if you shrink it back into a text segment.
   2932    */
   2933    PRINT("sys_brk ( %#lx )", ARG1);
   2934    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
   2935 
   2936    brk_new = do_brk(ARG1);
   2937    SET_STATUS_Success( brk_new );
   2938 
   2939    if (brk_new == ARG1) {
   2940       /* brk() succeeded */
   2941       if (brk_new < brk_limit) {
   2942          /* successfully shrunk the data segment. */
   2943          VG_TRACK( die_mem_brk, (Addr)ARG1,
   2944 		   brk_limit-ARG1 );
   2945       } else
   2946       if (brk_new > brk_limit) {
   2947          /* successfully grew the data segment */
   2948          VG_TRACK( new_mem_brk, brk_limit,
   2949                    ARG1-brk_limit, tid );
   2950       }
   2951    } else {
   2952       /* brk() failed */
   2953       vg_assert(brk_limit == brk_new);
   2954    }
   2955 }
   2956 
   2957 PRE(sys_chdir)
   2958 {
   2959    FUSE_COMPATIBLE_MAY_BLOCK();
   2960    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   2961    PRE_REG_READ1(long, "chdir", const char *, path);
   2962    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
   2963 }
   2964 
   2965 PRE(sys_chmod)
   2966 {
   2967    FUSE_COMPATIBLE_MAY_BLOCK();
   2968    PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   2969    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
   2970    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
   2971 }
   2972 
   2973 PRE(sys_chown)
   2974 {
   2975    FUSE_COMPATIBLE_MAY_BLOCK();
   2976    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   2977    PRE_REG_READ3(long, "chown",
   2978                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   2979    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
   2980 }
   2981 
   2982 PRE(sys_lchown)
   2983 {
   2984    FUSE_COMPATIBLE_MAY_BLOCK();
   2985    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   2986    PRE_REG_READ3(long, "lchown",
   2987                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   2988    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
   2989 }
   2990 
   2991 PRE(sys_close)
   2992 {
   2993    FUSE_COMPATIBLE_MAY_BLOCK();
   2994    PRINT("sys_close ( %ld )", ARG1);
   2995    PRE_REG_READ1(long, "close", unsigned int, fd);
   2996 
   2997    /* Detect and negate attempts by the client to close Valgrind's log fd */
   2998    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
   2999         /* If doing -d style logging (which is to fd=2), don't
   3000            allow that to be closed either. */
   3001         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
   3002       SET_STATUS_Failure( VKI_EBADF );
   3003 }
   3004 
   3005 POST(sys_close)
   3006 {
   3007    if (VG_(clo_track_fds)) record_fd_close(ARG1);
   3008 }
   3009 
   3010 PRE(sys_dup)
   3011 {
   3012    PRINT("sys_dup ( %ld )", ARG1);
   3013    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
   3014 }
   3015 
   3016 POST(sys_dup)
   3017 {
   3018    vg_assert(SUCCESS);
   3019    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
   3020       VG_(close)(RES);
   3021       SET_STATUS_Failure( VKI_EMFILE );
   3022    } else {
   3023       if (VG_(clo_track_fds))
   3024          ML_(record_fd_open_named)(tid, RES);
   3025    }
   3026 }
   3027 
   3028 PRE(sys_dup2)
   3029 {
   3030    PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
   3031    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
   3032    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
   3033       SET_STATUS_Failure( VKI_EBADF );
   3034 }
   3035 
   3036 POST(sys_dup2)
   3037 {
   3038    vg_assert(SUCCESS);
   3039    if (VG_(clo_track_fds))
   3040       ML_(record_fd_open_named)(tid, RES);
   3041 }
   3042 
   3043 PRE(sys_fchdir)
   3044 {
   3045    FUSE_COMPATIBLE_MAY_BLOCK();
   3046    PRINT("sys_fchdir ( %ld )", ARG1);
   3047    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
   3048 }
   3049 
   3050 PRE(sys_fchown)
   3051 {
   3052    FUSE_COMPATIBLE_MAY_BLOCK();
   3053    PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
   3054    PRE_REG_READ3(long, "fchown",
   3055                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
   3056 }
   3057 
   3058 PRE(sys_fchmod)
   3059 {
   3060    FUSE_COMPATIBLE_MAY_BLOCK();
   3061    PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
   3062    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
   3063 }
   3064 
   3065 PRE(sys_newfstat)
   3066 {
   3067    FUSE_COMPATIBLE_MAY_BLOCK();
   3068    PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
   3069    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
   3070    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
   3071 }
   3072 
   3073 POST(sys_newfstat)
   3074 {
   3075    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3076 }
   3077 
   3078 static vki_sigset_t fork_saved_mask;
   3079 
   3080 // In Linux, the sys_fork() function varies across architectures, but we
   3081 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
   3082 PRE(sys_fork)
   3083 {
   3084    Bool is_child;
   3085    Int child_pid;
   3086    vki_sigset_t mask;
   3087 
   3088    PRINT("sys_fork ( )");
   3089    PRE_REG_READ0(long, "fork");
   3090 
   3091    /* Block all signals during fork, so that we can fix things up in
   3092       the child without being interrupted. */
   3093    VG_(sigfillset)(&mask);
   3094    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
   3095 
   3096    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
   3097 
   3098    if (!SUCCESS) return;
   3099 
   3100 #if defined(VGO_linux)
   3101    // RES is 0 for child, non-0 (the child's PID) for parent.
   3102    is_child = ( RES == 0 ? True : False );
   3103    child_pid = ( is_child ? -1 : RES );
   3104 #elif defined(VGO_darwin)
   3105    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
   3106    is_child = RESHI;
   3107    child_pid = RES;
   3108 #else
   3109 #  error Unknown OS
   3110 #endif
   3111 
   3112    VG_(do_atfork_pre)(tid);
   3113 
   3114    if (is_child) {
   3115       VG_(do_atfork_child)(tid);
   3116 
   3117       /* restore signal mask */
   3118       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3119 
   3120       /* If --child-silent-after-fork=yes was specified, set the
   3121          output file descriptors to 'impossible' values.  This is
   3122          noticed by send_bytes_to_logging_sink in m_libcprint.c, which
   3123          duly stops writing any further output. */
   3124       if (VG_(clo_child_silent_after_fork)) {
   3125          if (!VG_(log_output_sink).is_socket)
   3126             VG_(log_output_sink).fd = -1;
   3127          if (!VG_(xml_output_sink).is_socket)
   3128             VG_(xml_output_sink).fd = -1;
   3129       }
   3130 
   3131    } else {
   3132       VG_(do_atfork_parent)(tid);
   3133 
   3134       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
   3135 
   3136       /* restore signal mask */
   3137       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3138    }
   3139 }
   3140 
   3141 PRE(sys_ftruncate)
   3142 {
   3143    *flags |= SfMayBlock;
   3144    PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
   3145    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
   3146 }
   3147 
   3148 PRE(sys_truncate)
   3149 {
   3150    *flags |= SfMayBlock;
   3151    PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3152    PRE_REG_READ2(long, "truncate",
   3153                  const char *, path, unsigned long, length);
   3154    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
   3155 }
   3156 
   3157 PRE(sys_ftruncate64)
   3158 {
   3159    *flags |= SfMayBlock;
   3160 #if VG_WORDSIZE == 4
   3161    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
   3162    PRE_REG_READ3(long, "ftruncate64",
   3163                  unsigned int, fd,
   3164                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3165 #else
   3166    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
   3167    PRE_REG_READ2(long, "ftruncate64",
   3168                  unsigned int,fd, UWord,length);
   3169 #endif
   3170 }
   3171 
   3172 PRE(sys_truncate64)
   3173 {
   3174    *flags |= SfMayBlock;
   3175 #if VG_WORDSIZE == 4
   3176    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
   3177    PRE_REG_READ3(long, "truncate64",
   3178                  const char *, path,
   3179                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3180 #else
   3181    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
   3182    PRE_REG_READ2(long, "truncate64",
   3183                  const char *,path, UWord,length);
   3184 #endif
   3185    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
   3186 }
   3187 
   3188 PRE(sys_getdents)
   3189 {
   3190    *flags |= SfMayBlock;
   3191    PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
   3192    PRE_REG_READ3(long, "getdents",
   3193                  unsigned int, fd, struct linux_dirent *, dirp,
   3194                  unsigned int, count);
   3195    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
   3196 }
   3197 
   3198 POST(sys_getdents)
   3199 {
   3200    vg_assert(SUCCESS);
   3201    if (RES > 0)
   3202       POST_MEM_WRITE( ARG2, RES );
   3203 }
   3204 
   3205 PRE(sys_getdents64)
   3206 {
   3207    *flags |= SfMayBlock;
   3208    PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
   3209    PRE_REG_READ3(long, "getdents64",
   3210                  unsigned int, fd, struct linux_dirent64 *, dirp,
   3211                  unsigned int, count);
   3212    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
   3213 }
   3214 
   3215 POST(sys_getdents64)
   3216 {
   3217    vg_assert(SUCCESS);
   3218    if (RES > 0)
   3219       POST_MEM_WRITE( ARG2, RES );
   3220 }
   3221 
   3222 PRE(sys_getgroups)
   3223 {
   3224    PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
   3225    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
   3226    if (ARG1 > 0)
   3227       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   3228 }
   3229 
   3230 POST(sys_getgroups)
   3231 {
   3232    vg_assert(SUCCESS);
   3233    if (ARG1 > 0 && RES > 0)
   3234       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
   3235 }
   3236 
   3237 PRE(sys_getcwd)
   3238 {
   3239    // Comment from linux/fs/dcache.c:
   3240    //   NOTE! The user-level library version returns a character pointer.
   3241    //   The kernel system call just returns the length of the buffer filled
   3242    //   (which includes the ending '\0' character), or a negative error
   3243    //   value.
   3244    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
   3245    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3246    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
   3247    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
   3248 }
   3249 
   3250 POST(sys_getcwd)
   3251 {
   3252    vg_assert(SUCCESS);
   3253    if (RES != (Addr)NULL)
   3254       POST_MEM_WRITE( ARG1, RES );
   3255 }
   3256 
   3257 PRE(sys_geteuid)
   3258 {
   3259    PRINT("sys_geteuid ( )");
   3260    PRE_REG_READ0(long, "geteuid");
   3261 }
   3262 
   3263 PRE(sys_getegid)
   3264 {
   3265    PRINT("sys_getegid ( )");
   3266    PRE_REG_READ0(long, "getegid");
   3267 }
   3268 
   3269 PRE(sys_getgid)
   3270 {
   3271    PRINT("sys_getgid ( )");
   3272    PRE_REG_READ0(long, "getgid");
   3273 }
   3274 
   3275 PRE(sys_getpid)
   3276 {
   3277    PRINT("sys_getpid ()");
   3278    PRE_REG_READ0(long, "getpid");
   3279 }
   3280 
   3281 PRE(sys_getpgid)
   3282 {
   3283    PRINT("sys_getpgid ( %ld )", ARG1);
   3284    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
   3285 }
   3286 
   3287 PRE(sys_getpgrp)
   3288 {
   3289    PRINT("sys_getpgrp ()");
   3290    PRE_REG_READ0(long, "getpgrp");
   3291 }
   3292 
   3293 PRE(sys_getppid)
   3294 {
   3295    PRINT("sys_getppid ()");
   3296    PRE_REG_READ0(long, "getppid");
   3297 }
   3298 
   3299 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
   3300 {
   3301    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
   3302 
   3303 #ifdef _RLIMIT_POSIX_FLAG
   3304    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
   3305    // Unset it here to make the switch case below work correctly.
   3306    a1 &= ~_RLIMIT_POSIX_FLAG;
   3307 #endif
   3308 
   3309    switch (a1) {
   3310    case VKI_RLIMIT_NOFILE:
   3311       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
   3312       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
   3313       break;
   3314 
   3315    case VKI_RLIMIT_DATA:
   3316       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
   3317       break;
   3318 
   3319    case VKI_RLIMIT_STACK:
   3320       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
   3321       break;
   3322    }
   3323 }
   3324 
   3325 PRE(sys_old_getrlimit)
   3326 {
   3327    PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
   3328    PRE_REG_READ2(long, "old_getrlimit",
   3329                  unsigned int, resource, struct rlimit *, rlim);
   3330    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3331 }
   3332 
   3333 POST(sys_old_getrlimit)
   3334 {
   3335    common_post_getrlimit(tid, ARG1, ARG2);
   3336 }
   3337 
   3338 PRE(sys_getrlimit)
   3339 {
   3340    PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
   3341    PRE_REG_READ2(long, "getrlimit",
   3342                  unsigned int, resource, struct rlimit *, rlim);
   3343    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3344 }
   3345 
   3346 POST(sys_getrlimit)
   3347 {
   3348    common_post_getrlimit(tid, ARG1, ARG2);
   3349 }
   3350 
   3351 PRE(sys_getrusage)
   3352 {
   3353    PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
   3354    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
   3355    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
   3356 }
   3357 
   3358 POST(sys_getrusage)
   3359 {
   3360    vg_assert(SUCCESS);
   3361    if (RES == 0)
   3362       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
   3363 }
   3364 
   3365 PRE(sys_gettimeofday)
   3366 {
   3367    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3368    PRE_REG_READ2(long, "gettimeofday",
   3369                  struct timeval *, tv, struct timezone *, tz);
   3370    // GrP fixme does darwin write to *tz anymore?
   3371    if (ARG1 != 0)
   3372       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
   3373    if (ARG2 != 0)
   3374       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3375 }
   3376 
   3377 POST(sys_gettimeofday)
   3378 {
   3379    vg_assert(SUCCESS);
   3380    if (RES == 0) {
   3381       if (ARG1 != 0)
   3382          POST_timeval_WRITE( ARG1 );
   3383       if (ARG2 != 0)
   3384 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
   3385    }
   3386 }
   3387 
   3388 PRE(sys_settimeofday)
   3389 {
   3390    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3391    PRE_REG_READ2(long, "settimeofday",
   3392                  struct timeval *, tv, struct timezone *, tz);
   3393    if (ARG1 != 0)
   3394       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
   3395    if (ARG2 != 0) {
   3396       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3397       /* maybe should warn if tz->tz_dsttime is non-zero? */
   3398    }
   3399 }
   3400 
   3401 PRE(sys_getuid)
   3402 {
   3403    PRINT("sys_getuid ( )");
   3404    PRE_REG_READ0(long, "getuid");
   3405 }
   3406 
   3407 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
   3408 {
   3409    /* We don't have any specific information on it, so
   3410       try to do something reasonable based on direction and
   3411       size bits.  The encoding scheme is described in
   3412       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3413 
   3414       According to Simon Hausmann, _IOC_READ means the kernel
   3415       writes a value to the ioctl value passed from the user
   3416       space and the other way around with _IOC_WRITE. */
   3417 
   3418    UInt dir  = _VKI_IOC_DIR(request);
   3419    UInt size = _VKI_IOC_SIZE(request);
   3420    if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
   3421       /*
   3422        * Be very lax about ioctl handling; the only
   3423        * assumption is that the size is correct. Doesn't
   3424        * require the full buffer to be initialized when
   3425        * writing.  Without this, using some device
   3426        * drivers with a large number of strange ioctl
   3427        * commands becomes very tiresome.
   3428        */
   3429    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
   3430       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
   3431       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3432       static Int moans = 3;
   3433       if (moans > 0 && !VG_(clo_xml)) {
   3434          moans--;
   3435          VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
   3436                    " with no size/direction hints\n", request);
   3437          VG_(umsg)("   This could cause spurious value errors to appear.\n");
   3438          VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
   3439                    "guidance on writing a proper wrapper.\n" );
   3440       }
   3441    } else {
   3442       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
   3443       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3444       if ((dir & _VKI_IOC_WRITE) && size > 0)
   3445          PRE_MEM_READ( "ioctl(generic)", arg, size);
   3446       if ((dir & _VKI_IOC_READ) && size > 0)
   3447          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
   3448    }
   3449 }
   3450 
   3451 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
   3452 {
   3453    /* We don't have any specific information on it, so
   3454       try to do something reasonable based on direction and
   3455       size bits.  The encoding scheme is described in
   3456       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3457 
   3458       According to Simon Hausmann, _IOC_READ means the kernel
   3459       writes a value to the ioctl value passed from the user
   3460       space and the other way around with _IOC_WRITE. */
   3461 
   3462    UInt dir  = _VKI_IOC_DIR(request);
   3463    UInt size = _VKI_IOC_SIZE(request);
   3464    if (size > 0 && (dir & _VKI_IOC_READ)
   3465        && res == 0
   3466        && arg != (Addr)NULL)
   3467    {
   3468       POST_MEM_WRITE(arg, size);
   3469    }
   3470 }
   3471 
   3472 /*
   3473    If we're sending a SIGKILL to one of our own threads, then simulate
   3474    it rather than really sending the signal, so that the target thread
   3475    gets a chance to clean up.  Returns True if we did the killing (or
   3476    no killing is necessary), and False if the caller should use the
   3477    normal kill syscall.
   3478 
   3479    "pid" is any pid argument which can be passed to kill; group kills
   3480    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
   3481    they'll most likely hit all the threads and we won't need to worry
   3482    about cleanup.  In truth, we can't fully emulate these multicast
   3483    kills.
   3484 
   3485    "tgid" is a thread group id.  If it is not -1, then the target
   3486    thread must be in that thread group.
   3487  */
   3488 Bool ML_(do_sigkill)(Int pid, Int tgid)
   3489 {
   3490    ThreadState *tst;
   3491    ThreadId tid;
   3492 
   3493    if (pid <= 0)
   3494       return False;
   3495 
   3496    tid = VG_(lwpid_to_vgtid)(pid);
   3497    if (tid == VG_INVALID_THREADID)
   3498       return False;		/* none of our threads */
   3499 
   3500    tst = VG_(get_ThreadState)(tid);
   3501    if (tst == NULL || tst->status == VgTs_Empty)
   3502       return False;		/* hm, shouldn't happen */
   3503 
   3504    if (tgid != -1 && tst->os_state.threadgroup != tgid)
   3505       return False;		/* not the right thread group */
   3506 
   3507    /* Check to see that the target isn't already exiting. */
   3508    if (!VG_(is_exiting)(tid)) {
   3509       if (VG_(clo_trace_signals))
   3510 	 VG_(message)(Vg_DebugMsg,
   3511                       "Thread %d being killed with SIGKILL\n",
   3512                       tst->tid);
   3513 
   3514       tst->exitreason = VgSrc_FatalSig;
   3515       tst->os_state.fatalsig = VKI_SIGKILL;
   3516 
   3517       if (!VG_(is_running_thread)(tid))
   3518 	 VG_(get_thread_out_of_syscall)(tid);
   3519    }
   3520 
   3521    return True;
   3522 }
   3523 
   3524 PRE(sys_kill)
   3525 {
   3526    PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
   3527    PRE_REG_READ2(long, "kill", int, pid, int, sig);
   3528    if (!ML_(client_signal_OK)(ARG2)) {
   3529       SET_STATUS_Failure( VKI_EINVAL );
   3530       return;
   3531    }
   3532 
   3533    /* If we're sending SIGKILL, check to see if the target is one of
   3534       our threads and handle it specially. */
   3535    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
   3536       SET_STATUS_Success(0);
   3537    else
   3538       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
   3539          affecting how posix-compliant the call is.  I guess it is
   3540          harmless to pass the 3rd arg on other platforms; hence pass
   3541          it on all. */
   3542       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
   3543 
   3544    if (VG_(clo_trace_signals))
   3545       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
   3546 		   ARG2, ARG1);
   3547 
   3548    /* This kill might have given us a pending signal.  Ask for a check once
   3549       the syscall is done. */
   3550    *flags |= SfPollAfter;
   3551 }
   3552 
   3553 PRE(sys_link)
   3554 {
   3555    *flags |= SfMayBlock;
   3556    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   3557    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
   3558    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
   3559    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
   3560 }
   3561 
   3562 PRE(sys_newlstat)
   3563 {
   3564    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   3565    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
   3566    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
   3567    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
   3568 }
   3569 
   3570 POST(sys_newlstat)
   3571 {
   3572    vg_assert(SUCCESS);
   3573    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3574 }
   3575 
   3576 PRE(sys_mkdir)
   3577 {
   3578    *flags |= SfMayBlock;
   3579    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3580    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
   3581    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
   3582 }
   3583 
   3584 PRE(sys_mprotect)
   3585 {
   3586    PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   3587    PRE_REG_READ3(long, "mprotect",
   3588                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
   3589 
   3590    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
   3591       SET_STATUS_Failure( VKI_ENOMEM );
   3592    }
   3593 #if defined(VKI_PROT_GROWSDOWN)
   3594    else
   3595    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
   3596       /* Deal with mprotects on growable stack areas.
   3597 
   3598          The critical files to understand all this are mm/mprotect.c
   3599          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
   3600          glibc.
   3601 
   3602          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
   3603          round the start/end address of mprotect to the start/end of
   3604          the underlying vma and glibc uses that as an easy way to
   3605          change the protection of the stack by calling mprotect on the
   3606          last page of the stack with PROT_GROWSDOWN set.
   3607 
   3608          The sanity check provided by the kernel is that the vma must
   3609          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
   3610       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
   3611       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
   3612       NSegment const *rseg;
   3613 
   3614       vg_assert(aseg);
   3615 
   3616       if (grows == VKI_PROT_GROWSDOWN) {
   3617          rseg = VG_(am_next_nsegment)( aseg, False/*backwards*/ );
   3618          if (rseg &&
   3619              rseg->kind == SkResvn &&
   3620              rseg->smode == SmUpper &&
   3621              rseg->end+1 == aseg->start) {
   3622             Addr end = ARG1 + ARG2;
   3623             ARG1 = aseg->start;
   3624             ARG2 = end - aseg->start;
   3625             ARG3 &= ~VKI_PROT_GROWSDOWN;
   3626          } else {
   3627             SET_STATUS_Failure( VKI_EINVAL );
   3628          }
   3629       } else if (grows == VKI_PROT_GROWSUP) {
   3630          rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
   3631          if (rseg &&
   3632              rseg->kind == SkResvn &&
   3633              rseg->smode == SmLower &&
   3634              aseg->end+1 == rseg->start) {
   3635             ARG2 = aseg->end - ARG1 + 1;
   3636             ARG3 &= ~VKI_PROT_GROWSUP;
   3637          } else {
   3638             SET_STATUS_Failure( VKI_EINVAL );
   3639          }
   3640       } else {
   3641          /* both GROWSUP and GROWSDOWN */
   3642          SET_STATUS_Failure( VKI_EINVAL );
   3643       }
   3644    }
   3645 #endif   // defined(VKI_PROT_GROWSDOWN)
   3646 }
   3647 
   3648 POST(sys_mprotect)
   3649 {
   3650    Addr a    = ARG1;
   3651    SizeT len = ARG2;
   3652    Int  prot = ARG3;
   3653 
   3654    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
   3655 }
   3656 
   3657 PRE(sys_munmap)
   3658 {
   3659    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
   3660    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3661    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
   3662 
   3663    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
   3664       SET_STATUS_Failure( VKI_EINVAL );
   3665 }
   3666 
   3667 POST(sys_munmap)
   3668 {
   3669    Addr  a   = ARG1;
   3670    SizeT len = ARG2;
   3671 
   3672    ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
   3673 }
   3674 
   3675 PRE(sys_mincore)
   3676 {
   3677    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
   3678    PRE_REG_READ3(long, "mincore",
   3679                  unsigned long, start, vki_size_t, length,
   3680                  unsigned char *, vec);
   3681    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3682 }
   3683 POST(sys_mincore)
   3684 {
   3685    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3686 }
   3687 
   3688 PRE(sys_nanosleep)
   3689 {
   3690    *flags |= SfMayBlock|SfPostOnFail;
   3691    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
   3692    PRE_REG_READ2(long, "nanosleep",
   3693                  struct timespec *, req, struct timespec *, rem);
   3694    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
   3695    if (ARG2 != 0)
   3696       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
   3697 }
   3698 
   3699 POST(sys_nanosleep)
   3700 {
   3701    vg_assert(SUCCESS || FAILURE);
   3702    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
   3703       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
   3704 }
   3705 
   3706 PRE(sys_open)
   3707 {
   3708    if (ARG2 & VKI_O_CREAT) {
   3709       // 3-arg version
   3710       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
   3711       PRE_REG_READ3(long, "open",
   3712                     const char *, filename, int, flags, int, mode);
   3713    } else {
   3714       // 2-arg version
   3715       PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
   3716       PRE_REG_READ2(long, "open",
   3717                     const char *, filename, int, flags);
   3718    }
   3719    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
   3720 
   3721 #if defined(VGO_linux)
   3722    /* Handle the case where the open is of /proc/self/cmdline or
   3723       /proc/<pid>/cmdline, and just give it a copy of the fd for the
   3724       fake file we cooked up at startup (in m_main).  Also, seek the
   3725       cloned fd back to the start. */
   3726    {
   3727       HChar  name[30];
   3728       HChar* arg1s = (HChar*) ARG1;
   3729       SysRes sres;
   3730 
   3731       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
   3732       if (ML_(safe_to_deref)( arg1s, 1 ) &&
   3733           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
   3734          )
   3735       {
   3736          sres = VG_(dup)( VG_(cl_cmdline_fd) );
   3737          SET_STATUS_from_SysRes( sres );
   3738          if (!sr_isError(sres)) {
   3739             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
   3740             if (off < 0)
   3741                SET_STATUS_Failure( VKI_EMFILE );
   3742          }
   3743          return;
   3744       }
   3745    }
   3746 
   3747    /* Handle the case where the open is of /proc/self/auxv or
   3748       /proc/<pid>/auxv, and just give it a copy of the fd for the
   3749       fake file we cooked up at startup (in m_main).  Also, seek the
   3750       cloned fd back to the start. */
   3751    {
   3752       HChar  name[30];
   3753       HChar* arg1s = (HChar*) ARG1;
   3754       SysRes sres;
   3755 
   3756       VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
   3757       if (ML_(safe_to_deref)( arg1s, 1 ) &&
   3758           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/auxv"))
   3759          )
   3760       {
   3761          sres = VG_(dup)( VG_(cl_auxv_fd) );
   3762          SET_STATUS_from_SysRes( sres );
   3763          if (!sr_isError(sres)) {
   3764             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
   3765             if (off < 0)
   3766                SET_STATUS_Failure( VKI_EMFILE );
   3767          }
   3768          return;
   3769       }
   3770    }
   3771 #endif // defined(VGO_linux)
   3772 
   3773    /* Otherwise handle normally */
   3774    *flags |= SfMayBlock;
   3775 }
   3776 
   3777 POST(sys_open)
   3778 {
   3779    vg_assert(SUCCESS);
   3780    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
   3781       VG_(close)(RES);
   3782       SET_STATUS_Failure( VKI_EMFILE );
   3783    } else {
   3784       if (VG_(clo_track_fds))
   3785          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
   3786    }
   3787 }
   3788 
   3789 PRE(sys_read)
   3790 {
   3791    *flags |= SfMayBlock;
   3792    PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
   3793    PRE_REG_READ3(ssize_t, "read",
   3794                  unsigned int, fd, char *, buf, vki_size_t, count);
   3795 
   3796    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
   3797       SET_STATUS_Failure( VKI_EBADF );
   3798    else
   3799       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
   3800 }
   3801 
   3802 POST(sys_read)
   3803 {
   3804    vg_assert(SUCCESS);
   3805    POST_MEM_WRITE( ARG2, RES );
   3806 }
   3807 
   3808 PRE(sys_write)
   3809 {
   3810    Bool ok;
   3811    *flags |= SfMayBlock;
   3812    PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
   3813    PRE_REG_READ3(ssize_t, "write",
   3814                  unsigned int, fd, const char *, buf, vki_size_t, count);
   3815    /* check to see if it is allowed.  If not, try for an exemption from
   3816       --sim-hints=enable-outer (used for self hosting). */
   3817    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
   3818    if (!ok && ARG1 == 2/*stderr*/
   3819            && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
   3820       ok = True;
   3821    if (!ok)
   3822       SET_STATUS_Failure( VKI_EBADF );
   3823    else
   3824       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
   3825 }
   3826 
   3827 PRE(sys_creat)
   3828 {
   3829    *flags |= SfMayBlock;
   3830    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3831    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
   3832    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
   3833 }
   3834 
   3835 POST(sys_creat)
   3836 {
   3837    vg_assert(SUCCESS);
   3838    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
   3839       VG_(close)(RES);
   3840       SET_STATUS_Failure( VKI_EMFILE );
   3841    } else {
   3842       if (VG_(clo_track_fds))
   3843          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
   3844    }
   3845 }
   3846 
   3847 PRE(sys_poll)
   3848 {
   3849    /* struct pollfd {
   3850         int fd;           -- file descriptor
   3851         short events;     -- requested events
   3852         short revents;    -- returned events
   3853       };
   3854       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
   3855    */
   3856    UInt i;
   3857    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   3858    *flags |= SfMayBlock;
   3859    PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
   3860    PRE_REG_READ3(long, "poll",
   3861                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
   3862 
   3863    for (i = 0; i < ARG2; i++) {
   3864       PRE_MEM_READ( "poll(ufds.fd)",
   3865                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
   3866       PRE_MEM_READ( "poll(ufds.events)",
   3867                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
   3868       PRE_MEM_WRITE( "poll(ufds.revents)",
   3869                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   3870    }
   3871 }
   3872 
   3873 POST(sys_poll)
   3874 {
   3875    if (RES >= 0) {
   3876       UInt i;
   3877       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   3878       for (i = 0; i < ARG2; i++)
   3879 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   3880    }
   3881 }
   3882 
   3883 PRE(sys_readlink)
   3884 {
   3885    FUSE_COMPATIBLE_MAY_BLOCK();
   3886    Word saved = SYSNO;
   3887 
   3888    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
   3889    PRE_REG_READ3(long, "readlink",
   3890                  const char *, path, char *, buf, int, bufsiz);
   3891    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
   3892    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
   3893 
   3894    {
   3895 #if defined(VGO_linux)
   3896       /*
   3897        * Handle the case where readlink is looking at /proc/self/exe or
   3898        * /proc/<pid>/exe.
   3899        */
   3900       HChar name[25];
   3901       HChar* arg1s = (HChar*) ARG1;
   3902       VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
   3903       if (ML_(safe_to_deref)(arg1s, 1) &&
   3904           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
   3905          )
   3906       {
   3907          VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
   3908          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
   3909                                                          ARG2, ARG3));
   3910       } else
   3911 #endif // defined(VGO_linux)
   3912       {
   3913          /* Normal case */
   3914          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
   3915       }
   3916    }
   3917 
   3918    if (SUCCESS && RES > 0)
   3919       POST_MEM_WRITE( ARG2, RES );
   3920 }
   3921 
   3922 PRE(sys_readv)
   3923 {
   3924    Int i;
   3925    struct vki_iovec * vec;
   3926    *flags |= SfMayBlock;
   3927    PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
   3928    PRE_REG_READ3(ssize_t, "readv",
   3929                  unsigned long, fd, const struct iovec *, vector,
   3930                  unsigned long, count);
   3931    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
   3932       SET_STATUS_Failure( VKI_EBADF );
   3933    } else {
   3934       PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
   3935 
   3936       if (ARG2 != 0) {
   3937          /* ToDo: don't do any of the following if the vector is invalid */
   3938          vec = (struct vki_iovec *)ARG2;
   3939          for (i = 0; i < (Int)ARG3; i++)
   3940             PRE_MEM_WRITE( "readv(vector[...])",
   3941                            (Addr)vec[i].iov_base, vec[i].iov_len );
   3942       }
   3943    }
   3944 }
   3945 
   3946 POST(sys_readv)
   3947 {
   3948    vg_assert(SUCCESS);
   3949    if (RES > 0) {
   3950       Int i;
   3951       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
   3952       Int remains = RES;
   3953 
   3954       /* RES holds the number of bytes read. */
   3955       for (i = 0; i < (Int)ARG3; i++) {
   3956 	 Int nReadThisBuf = vec[i].iov_len;
   3957 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
   3958 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
   3959 	 remains -= nReadThisBuf;
   3960 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
   3961       }
   3962    }
   3963 }
   3964 
   3965 PRE(sys_rename)
   3966 {
   3967    FUSE_COMPATIBLE_MAY_BLOCK();
   3968    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   3969    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
   3970    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
   3971    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
   3972 }
   3973 
   3974 PRE(sys_rmdir)
   3975 {
   3976    *flags |= SfMayBlock;
   3977    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   3978    PRE_REG_READ1(long, "rmdir", const char *, pathname);
   3979    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
   3980 }
   3981 
   3982 PRE(sys_select)
   3983 {
   3984    *flags |= SfMayBlock;
   3985    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
   3986    PRE_REG_READ5(long, "select",
   3987                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
   3988                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
   3989    // XXX: this possibly understates how much memory is read.
   3990    if (ARG2 != 0)
   3991       PRE_MEM_READ( "select(readfds)",
   3992 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
   3993    if (ARG3 != 0)
   3994       PRE_MEM_READ( "select(writefds)",
   3995 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
   3996    if (ARG4 != 0)
   3997       PRE_MEM_READ( "select(exceptfds)",
   3998 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
   3999    if (ARG5 != 0)
   4000       PRE_timeval_READ( "select(timeout)", ARG5 );
   4001 }
   4002 
   4003 PRE(sys_setgid)
   4004 {
   4005    PRINT("sys_setgid ( %ld )", ARG1);
   4006    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
   4007 }
   4008 
   4009 PRE(sys_setsid)
   4010 {
   4011    PRINT("sys_setsid ( )");
   4012    PRE_REG_READ0(long, "setsid");
   4013 }
   4014 
   4015 PRE(sys_setgroups)
   4016 {
   4017    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
   4018    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
   4019    if (ARG1 > 0)
   4020       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   4021 }
   4022 
   4023 PRE(sys_setpgid)
   4024 {
   4025    PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
   4026    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
   4027 }
   4028 
   4029 PRE(sys_setregid)
   4030 {
   4031    PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
   4032    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
   4033 }
   4034 
   4035 PRE(sys_setreuid)
   4036 {
   4037    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
   4038    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
   4039 }
   4040 
   4041 PRE(sys_setrlimit)
   4042 {
   4043    UWord arg1 = ARG1;
   4044    PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
   4045    PRE_REG_READ2(long, "setrlimit",
   4046                  unsigned int, resource, struct rlimit *, rlim);
   4047    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   4048 
   4049 #ifdef _RLIMIT_POSIX_FLAG
   4050    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
   4051    // Unset it here to make the if statements below work correctly.
   4052    arg1 &= ~_RLIMIT_POSIX_FLAG;
   4053 #endif
   4054 
   4055    if (ARG2 &&
   4056        ((struct vki_rlimit *)ARG2)->rlim_cur > ((struct vki_rlimit *)ARG2)->rlim_max) {
   4057       SET_STATUS_Failure( VKI_EINVAL );
   4058    }
   4059    else if (arg1 == VKI_RLIMIT_NOFILE) {
   4060       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
   4061           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
   4062          SET_STATUS_Failure( VKI_EPERM );
   4063       }
   4064       else {
   4065          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
   4066          SET_STATUS_Success( 0 );
   4067       }
   4068    }
   4069    else if (arg1 == VKI_RLIMIT_DATA) {
   4070       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
   4071           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
   4072          SET_STATUS_Failure( VKI_EPERM );
   4073       }
   4074       else {
   4075          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
   4076          SET_STATUS_Success( 0 );
   4077       }
   4078    }
   4079    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
   4080       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
   4081           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
   4082          SET_STATUS_Failure( VKI_EPERM );
   4083       }
   4084       else {
   4085          VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
   4086          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
   4087          SET_STATUS_Success( 0 );
   4088       }
   4089    }
   4090 }
   4091 
   4092 PRE(sys_setuid)
   4093 {
   4094    PRINT("sys_setuid ( %ld )", ARG1);
   4095    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
   4096 }
   4097 
   4098 PRE(sys_newstat)
   4099 {
   4100    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   4101    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
   4102    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
   4103    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
   4104 }
   4105 
   4106 POST(sys_newstat)
   4107 {
   4108    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   4109 }
   4110 
   4111 PRE(sys_statfs)
   4112 {
   4113    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
   4114    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
   4115    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
   4116    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
   4117 }
   4118 POST(sys_statfs)
   4119 {
   4120    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   4121 }
   4122 
   4123 PRE(sys_statfs64)
   4124 {
   4125    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
   4126    PRE_REG_READ3(long, "statfs64",
   4127                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
   4128    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
   4129    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
   4130 }
   4131 POST(sys_statfs64)
   4132 {
   4133    POST_MEM_WRITE( ARG3, ARG2 );
   4134 }
   4135 
   4136 PRE(sys_symlink)
   4137 {
   4138    *flags |= SfMayBlock;
   4139    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   4140    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
   4141    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
   4142    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
   4143 }
   4144 
   4145 PRE(sys_time)
   4146 {
   4147    /* time_t time(time_t *t); */
   4148    PRINT("sys_time ( %#lx )",ARG1);
   4149    PRE_REG_READ1(long, "time", int *, t);
   4150    if (ARG1 != 0) {
   4151       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
   4152    }
   4153 }
   4154 
   4155 POST(sys_time)
   4156 {
   4157    if (ARG1 != 0) {
   4158       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
   4159    }
   4160 }
   4161 
   4162 PRE(sys_times)
   4163 {
   4164    PRINT("sys_times ( %#lx )", ARG1);
   4165    PRE_REG_READ1(long, "times", struct tms *, buf);
   4166    if (ARG1 != 0) {
   4167       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
   4168    }
   4169 }
   4170 
   4171 POST(sys_times)
   4172 {
   4173    if (ARG1 != 0) {
   4174       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
   4175    }
   4176 }
   4177 
   4178 PRE(sys_umask)
   4179 {
   4180    PRINT("sys_umask ( %ld )", ARG1);
   4181    PRE_REG_READ1(long, "umask", int, mask);
   4182 }
   4183 
   4184 PRE(sys_unlink)
   4185 {
   4186    *flags |= SfMayBlock;
   4187    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
   4188    PRE_REG_READ1(long, "unlink", const char *, pathname);
   4189    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
   4190 }
   4191 
   4192 PRE(sys_newuname)
   4193 {
   4194    PRINT("sys_newuname ( %#lx )", ARG1);
   4195    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
   4196    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
   4197 }
   4198 
   4199 POST(sys_newuname)
   4200 {
   4201    if (ARG1 != 0) {
   4202       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
   4203    }
   4204 }
   4205 
   4206 PRE(sys_waitpid)
   4207 {
   4208    *flags |= SfMayBlock;
   4209    PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
   4210    PRE_REG_READ3(long, "waitpid",
   4211                  vki_pid_t, pid, unsigned int *, status, int, options);
   4212 
   4213    if (ARG2 != (Addr)NULL)
   4214       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
   4215 }
   4216 
   4217 POST(sys_waitpid)
   4218 {
   4219    if (ARG2 != (Addr)NULL)
   4220       POST_MEM_WRITE( ARG2, sizeof(int) );
   4221 }
   4222 
   4223 PRE(sys_wait4)
   4224 {
   4225    *flags |= SfMayBlock;
   4226    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
   4227 
   4228    PRE_REG_READ4(long, "wait4",
   4229                  vki_pid_t, pid, unsigned int *, status, int, options,
   4230                  struct rusage *, rusage);
   4231    if (ARG2 != (Addr)NULL)
   4232       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
   4233    if (ARG4 != (Addr)NULL)
   4234       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
   4235 }
   4236 
   4237 POST(sys_wait4)
   4238 {
   4239    if (ARG2 != (Addr)NULL)
   4240       POST_MEM_WRITE( ARG2, sizeof(int) );
   4241    if (ARG4 != (Addr)NULL)
   4242       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
   4243 }
   4244 
   4245 PRE(sys_writev)
   4246 {
   4247    Int i;
   4248    struct vki_iovec * vec;
   4249    *flags |= SfMayBlock;
   4250    PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
   4251    PRE_REG_READ3(ssize_t, "writev",
   4252                  unsigned long, fd, const struct iovec *, vector,
   4253                  unsigned long, count);
   4254    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
   4255       SET_STATUS_Failure( VKI_EBADF );
   4256    } else {
   4257       PRE_MEM_READ( "writev(vector)",
   4258 		     ARG2, ARG3 * sizeof(struct vki_iovec) );
   4259       if (ARG2 != 0) {
   4260          /* ToDo: don't do any of the following if the vector is invalid */
   4261          vec = (struct vki_iovec *)ARG2;
   4262          for (i = 0; i < (Int)ARG3; i++)
   4263             PRE_MEM_READ( "writev(vector[...])",
   4264                            (Addr)vec[i].iov_base, vec[i].iov_len );
   4265       }
   4266    }
   4267 }
   4268 
   4269 PRE(sys_utimes)
   4270 {
   4271    FUSE_COMPATIBLE_MAY_BLOCK();
   4272    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   4273    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
   4274    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
   4275    if (ARG2 != 0) {
   4276       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
   4277       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
   4278    }
   4279 }
   4280 
   4281 PRE(sys_acct)
   4282 {
   4283    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
   4284    PRE_REG_READ1(long, "acct", const char *, filename);
   4285    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
   4286 }
   4287 
   4288 PRE(sys_pause)
   4289 {
   4290    *flags |= SfMayBlock;
   4291    PRINT("sys_pause ( )");
   4292    PRE_REG_READ0(long, "pause");
   4293 }
   4294 
   4295 PRE(sys_sigaltstack)
   4296 {
   4297    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
   4298    PRE_REG_READ2(int, "sigaltstack",
   4299                  const vki_stack_t *, ss, vki_stack_t *, oss);
   4300    if (ARG1 != 0) {
   4301       const vki_stack_t *ss = (vki_stack_t *)ARG1;
   4302       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
   4303       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
   4304       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
   4305    }
   4306    if (ARG2 != 0) {
   4307       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
   4308    }
   4309 
   4310    SET_STATUS_from_SysRes(
   4311       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
   4312                               (vki_stack_t*)ARG2)
   4313    );
   4314 }
   4315 POST(sys_sigaltstack)
   4316 {
   4317    vg_assert(SUCCESS);
   4318    if (RES == 0 && ARG2 != 0)
   4319       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
   4320 }
   4321 
   4322 PRE(sys_sethostname)
   4323 {
   4324    PRINT("sys_sethostname ( %#lx, %ld )", ARG1,ARG2);
   4325    PRE_REG_READ2(long, "sethostname", char *, name, int, len);
   4326    PRE_MEM_READ( "sethostname(name)", ARG1, ARG2 );
   4327 }
   4328 
   4329 #undef PRE
   4330 #undef POST
   4331 
   4332 #endif // defined(VGO_linux) || defined(VGO_darwin)
   4333 
   4334 /*--------------------------------------------------------------------*/
   4335 /*--- end                                                          ---*/
   4336 /*--------------------------------------------------------------------*/
   4337