Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Wrappers for generic Unix system calls                       ---*/
      4 /*---                                            syswrap-generic.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2000-2011 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #if defined(VGO_linux) || defined(VGO_darwin)
     33 
     34 #include "pub_core_basics.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_vkiscnums.h"
     37 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
     38 #include "pub_core_threadstate.h"
     39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
     40 #include "pub_core_aspacemgr.h"
     41 #include "pub_core_transtab.h"      // VG_(discard_translations)
     42 #include "pub_core_xarray.h"
     43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
     44 #include "pub_core_debuglog.h"
     45 #include "pub_core_errormgr.h"
     46 #include "pub_tool_gdbserver.h"     // VG_(gdbserver)
     47 #include "pub_core_libcbase.h"
     48 #include "pub_core_libcassert.h"
     49 #include "pub_core_libcfile.h"
     50 #include "pub_core_libcprint.h"
     51 #include "pub_core_libcproc.h"
     52 #include "pub_core_libcsignal.h"
     53 #include "pub_core_machine.h"       // VG_(get_SP)
     54 #include "pub_core_mallocfree.h"
     55 #include "pub_core_options.h"
     56 #include "pub_core_scheduler.h"
     57 #include "pub_core_signals.h"
     58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     59 #include "pub_core_syscall.h"
     60 #include "pub_core_syswrap.h"
     61 #include "pub_core_tooliface.h"
     62 #include "pub_core_ume.h"
     63 
     64 #include "priv_types_n_macros.h"
     65 #include "priv_syswrap-generic.h"
     66 
     67 #include "config.h"
     68 
     69 
     70 /* Returns True iff address range is something the client can
     71    plausibly mess with: all of it is either already belongs to the
     72    client or is free or a reservation. */
     73 
     74 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
     75                                    const Char *syscallname)
     76 {
     77    Bool ret;
     78 
     79    if (size == 0)
     80       return True;
     81 
     82    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
     83             (start,size,VKI_PROT_NONE);
     84 
     85    if (0)
     86       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
     87 		  syscallname, start, start+size-1, (Int)ret);
     88 
     89    if (!ret && syscallname != NULL) {
     90       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
     91                                "to modify addresses %#lx-%#lx\n",
     92                                syscallname, start, start+size-1);
     93       if (VG_(clo_verbosity) > 1) {
     94          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
     95       }
     96    }
     97 
     98    return ret;
     99 }
    100 
    101 
    102 Bool ML_(client_signal_OK)(Int sigNo)
    103 {
    104    /* signal 0 is OK for kill */
    105    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
    106 
    107    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
    108 
    109    return ret;
    110 }
    111 
    112 
    113 /* Handy small function to help stop wrappers from segfaulting when
    114    presented with bogus client addresses.  Is not used for generating
    115    user-visible errors. */
    116 
    117 Bool ML_(safe_to_deref) ( void* start, SizeT size )
    118 {
    119    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
    120 }
    121 
    122 
    123 /* ---------------------------------------------------------------------
    124    Doing mmap, mremap
    125    ------------------------------------------------------------------ */
    126 
    127 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
    128    munmap, mprotect (and mremap??) work at the page level.  So addresses
    129    and lengths must be adjusted for this. */
    130 
    131 /* Mash around start and length so that the area exactly covers
    132    an integral number of pages.  If we don't do that, memcheck's
    133    idea of addressible memory diverges from that of the
    134    kernel's, which causes the leak detector to crash. */
    135 static
    136 void page_align_addr_and_len( Addr* a, SizeT* len)
    137 {
    138    Addr ra;
    139 
    140    ra = VG_PGROUNDDN(*a);
    141    *len = VG_PGROUNDUP(*a + *len) - ra;
    142    *a = ra;
    143 }
    144 
    145 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
    146                                 UInt flags, Int fd, Off64T offset)
    147 {
    148    Bool d;
    149 
    150    /* 'a' is the return value from a real kernel mmap, hence: */
    151    vg_assert(VG_IS_PAGE_ALIGNED(a));
    152    /* whereas len is whatever the syscall supplied.  So: */
    153    len = VG_PGROUNDUP(len);
    154 
    155    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
    156 
    157    if (d)
    158       VG_(discard_translations)( (Addr64)a, (ULong)len,
    159                                  "notify_core_of_mmap" );
    160 }
    161 
    162 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
    163 {
    164    SizeT fourgig = (1ULL << 32);
    165    SizeT guardpage = 10 * fourgig;
    166    Bool rr, ww, xx;
    167 
    168    /* 'a' is the return value from a real kernel mmap, hence: */
    169    vg_assert(VG_IS_PAGE_ALIGNED(a));
    170    /* whereas len is whatever the syscall supplied.  So: */
    171    len = VG_PGROUNDUP(len);
    172 
    173    rr = toBool(prot & VKI_PROT_READ);
    174    ww = toBool(prot & VKI_PROT_WRITE);
    175    xx = toBool(prot & VKI_PROT_EXEC);
    176 
    177 #ifdef VGA_amd64
    178    if (len >= fourgig + 2 * guardpage) {
    179      VG_(printf)("Valgrind: ignoring NaCl's mmap(84G)\n");
    180      return;
    181    }
    182 #endif  // VGA_amd64
    183    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
    184 }
    185 
    186 
    187 /* When a client mmap has been successfully done, this function must
    188    be called.  It notifies both aspacem and the tool of the new
    189    mapping.
    190 
    191    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
    192    it is called from is POST(sys_io_setup).  In particular,
    193    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
    194    client mmap.  But it doesn't call this function; instead it does the
    195    relevant notifications itself.  Here, we just pass di_handle=0 to
    196    notify_tool_of_mmap as we have no better information.  But really this
    197    function should be done away with; problem is I don't understand what
    198    POST(sys_io_setup) does or how it works.
    199 
    200    [However, this function is used lots for Darwin, because
    201     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
    202  */
    203 void
    204 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
    205                                     UInt flags, Int fd, Off64T offset )
    206 {
    207    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
    208    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
    209    // Should it?  --njn
    210    notify_core_of_mmap(a, len, prot, flags, fd, offset);
    211    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
    212 }
    213 
    214 void
    215 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
    216 {
    217    Bool d;
    218 
    219    page_align_addr_and_len(&a, &len);
    220    d = VG_(am_notify_munmap)(a, len);
    221    VG_TRACK( die_mem_munmap, a, len );
    222    VG_(di_notify_munmap)( a, len );
    223    if (d)
    224       VG_(discard_translations)( (Addr64)a, (ULong)len,
    225                                  "ML_(notify_core_and_tool_of_munmap)" );
    226 }
    227 
    228 void
    229 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
    230 {
    231    Bool rr = toBool(prot & VKI_PROT_READ);
    232    Bool ww = toBool(prot & VKI_PROT_WRITE);
    233    Bool xx = toBool(prot & VKI_PROT_EXEC);
    234    Bool d;
    235 
    236    page_align_addr_and_len(&a, &len);
    237    d = VG_(am_notify_mprotect)(a, len, prot);
    238    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
    239    VG_(di_notify_mprotect)( a, len, prot );
    240    if (d)
    241       VG_(discard_translations)( (Addr64)a, (ULong)len,
    242                                  "ML_(notify_core_and_tool_of_mprotect)" );
    243 }
    244 
    245 
    246 
    247 #if HAVE_MREMAP
    248 /* Expand (or shrink) an existing mapping, potentially moving it at
    249    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
    250 */
    251 static
    252 SysRes do_mremap( Addr old_addr, SizeT old_len,
    253                   Addr new_addr, SizeT new_len,
    254                   UWord flags, ThreadId tid )
    255 {
    256 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
    257 
    258    Bool      ok, d;
    259    NSegment const* old_seg;
    260    Addr      advised;
    261    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
    262    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
    263 
    264    if (0)
    265       VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
    266                   old_addr,old_len,new_addr,new_len,
    267                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
    268                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
    269    if (0)
    270       VG_(am_show_nsegments)(0, "do_remap: before");
    271 
    272    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
    273       goto eINVAL;
    274 
    275    if (!VG_IS_PAGE_ALIGNED(old_addr))
    276       goto eINVAL;
    277 
    278    old_len = VG_PGROUNDUP(old_len);
    279    new_len = VG_PGROUNDUP(new_len);
    280 
    281    if (new_len == 0)
    282       goto eINVAL;
    283 
    284    /* kernel doesn't reject this, but we do. */
    285    if (old_len == 0)
    286       goto eINVAL;
    287 
    288    /* reject wraparounds */
    289    if (old_addr + old_len < old_addr)
    290       goto eINVAL;
    291    if (f_fixed == True && new_addr + new_len < new_len)
    292       goto eINVAL;
    293 
    294    /* kernel rejects all fixed, no-move requests (which are
    295       meaningless). */
    296    if (f_fixed == True && f_maymove == False)
    297       goto eINVAL;
    298 
    299    /* Stay away from non-client areas. */
    300    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
    301       goto eINVAL;
    302 
    303    /* In all remaining cases, if the old range does not fall within a
    304       single segment, fail. */
    305    old_seg = VG_(am_find_nsegment)( old_addr );
    306    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
    307       goto eINVAL;
    308    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
    309       goto eINVAL;
    310 
    311    vg_assert(old_len > 0);
    312    vg_assert(new_len > 0);
    313    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
    314    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
    315    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
    316 
    317    /* There are 3 remaining cases:
    318 
    319       * maymove == False
    320 
    321         new space has to be at old address, so:
    322             - shrink    -> unmap end
    323             - same size -> do nothing
    324             - grow      -> if can grow in-place, do so, else fail
    325 
    326       * maymove == True, fixed == False
    327 
    328         new space can be anywhere, so:
    329             - shrink    -> unmap end
    330             - same size -> do nothing
    331             - grow      -> if can grow in-place, do so, else
    332                            move to anywhere large enough, else fail
    333 
    334       * maymove == True, fixed == True
    335 
    336         new space must be at new address, so:
    337 
    338             - if new address is not page aligned, fail
    339             - if new address range overlaps old one, fail
    340             - if new address range cannot be allocated, fail
    341             - else move to new address range with new size
    342             - else fail
    343    */
    344 
    345    if (f_maymove == False) {
    346       /* new space has to be at old address */
    347       if (new_len < old_len)
    348          goto shrink_in_place;
    349       if (new_len > old_len)
    350          goto grow_in_place_or_fail;
    351       goto same_in_place;
    352    }
    353 
    354    if (f_maymove == True && f_fixed == False) {
    355       /* new space can be anywhere */
    356       if (new_len < old_len)
    357          goto shrink_in_place;
    358       if (new_len > old_len)
    359          goto grow_in_place_or_move_anywhere_or_fail;
    360       goto same_in_place;
    361    }
    362 
    363    if (f_maymove == True && f_fixed == True) {
    364       /* new space can only be at the new address */
    365       if (!VG_IS_PAGE_ALIGNED(new_addr))
    366          goto eINVAL;
    367       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
    368          /* no overlap */
    369       } else {
    370          goto eINVAL;
    371       }
    372       if (new_addr == 0)
    373          goto eINVAL;
    374          /* VG_(am_get_advisory_client_simple) interprets zero to mean
    375             non-fixed, which is not what we want */
    376       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
    377       if (!ok || advised != new_addr)
    378          goto eNOMEM;
    379       ok = VG_(am_relocate_nooverlap_client)
    380               ( &d, old_addr, old_len, new_addr, new_len );
    381       if (ok) {
    382          VG_TRACK( copy_mem_remap, old_addr, new_addr,
    383                                    MIN_SIZET(old_len,new_len) );
    384          if (new_len > old_len)
    385             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
    386                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
    387                       0/*di_handle*/ );
    388          VG_TRACK(die_mem_munmap, old_addr, old_len);
    389          if (d) {
    390             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
    391             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
    392          }
    393          return VG_(mk_SysRes_Success)( new_addr );
    394       }
    395       goto eNOMEM;
    396    }
    397 
    398    /* end of the 3 cases */
    399    /*NOTREACHED*/ vg_assert(0);
    400 
    401   grow_in_place_or_move_anywhere_or_fail:
    402    {
    403    /* try growing it in-place */
    404    Addr   needA = old_addr + old_len;
    405    SSizeT needL = new_len - old_len;
    406 
    407    vg_assert(needL > 0);
    408    if (needA == 0)
    409       goto eINVAL;
    410       /* VG_(am_get_advisory_client_simple) interprets zero to mean
    411          non-fixed, which is not what we want */
    412    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    413    if (ok) {
    414       /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
    415          this-or-nothing) is too lenient, and may allow us to trash
    416          the next segment along.  So make very sure that the proposed
    417          new area really is free.  This is perhaps overly
    418          conservative, but it fixes #129866. */
    419       NSegment const* segLo = VG_(am_find_nsegment)( needA );
    420       NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
    421       if (segLo == NULL || segHi == NULL
    422           || segLo != segHi || segLo->kind != SkFree)
    423          ok = False;
    424    }
    425    if (ok && advised == needA) {
    426       ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
    427       if (ok) {
    428          VG_TRACK( new_mem_mmap, needA, needL,
    429                                  old_seg->hasR,
    430                                  old_seg->hasW, old_seg->hasX,
    431                                  0/*di_handle*/ );
    432          if (d)
    433             VG_(discard_translations)( needA, needL, "do_remap(3)" );
    434          return VG_(mk_SysRes_Success)( old_addr );
    435       }
    436    }
    437 
    438    /* that failed.  Look elsewhere. */
    439    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
    440    if (ok) {
    441       Bool oldR = old_seg->hasR;
    442       Bool oldW = old_seg->hasW;
    443       Bool oldX = old_seg->hasX;
    444       /* assert new area does not overlap old */
    445       vg_assert(advised+new_len-1 < old_addr
    446                 || advised > old_addr+old_len-1);
    447       ok = VG_(am_relocate_nooverlap_client)
    448               ( &d, old_addr, old_len, advised, new_len );
    449       if (ok) {
    450          VG_TRACK( copy_mem_remap, old_addr, advised,
    451                                    MIN_SIZET(old_len,new_len) );
    452          if (new_len > old_len)
    453             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
    454                       oldR, oldW, oldX, 0/*di_handle*/ );
    455          VG_TRACK(die_mem_munmap, old_addr, old_len);
    456          if (d) {
    457             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
    458             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
    459          }
    460          return VG_(mk_SysRes_Success)( advised );
    461       }
    462    }
    463    goto eNOMEM;
    464    }
    465    /*NOTREACHED*/ vg_assert(0);
    466 
    467   grow_in_place_or_fail:
    468    {
    469    Addr  needA = old_addr + old_len;
    470    SizeT needL = new_len - old_len;
    471    if (needA == 0)
    472       goto eINVAL;
    473       /* VG_(am_get_advisory_client_simple) interprets zero to mean
    474          non-fixed, which is not what we want */
    475    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    476    if (ok) {
    477       /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
    478          this-or-nothing) is too lenient, and may allow us to trash
    479          the next segment along.  So make very sure that the proposed
    480          new area really is free. */
    481       NSegment const* segLo = VG_(am_find_nsegment)( needA );
    482       NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
    483       if (segLo == NULL || segHi == NULL
    484           || segLo != segHi || segLo->kind != SkFree)
    485          ok = False;
    486    }
    487    if (!ok || advised != needA)
    488       goto eNOMEM;
    489    ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
    490    if (!ok)
    491       goto eNOMEM;
    492    VG_TRACK( new_mem_mmap, needA, needL,
    493                            old_seg->hasR, old_seg->hasW, old_seg->hasX,
    494                            0/*di_handle*/ );
    495    if (d)
    496       VG_(discard_translations)( needA, needL, "do_remap(6)" );
    497    return VG_(mk_SysRes_Success)( old_addr );
    498    }
    499    /*NOTREACHED*/ vg_assert(0);
    500 
    501   shrink_in_place:
    502    {
    503    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
    504    if (sr_isError(sres))
    505       return sres;
    506    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
    507    if (d)
    508       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
    509                                  "do_remap(7)" );
    510    return VG_(mk_SysRes_Success)( old_addr );
    511    }
    512    /*NOTREACHED*/ vg_assert(0);
    513 
    514   same_in_place:
    515    return VG_(mk_SysRes_Success)( old_addr );
    516    /*NOTREACHED*/ vg_assert(0);
    517 
    518   eINVAL:
    519    return VG_(mk_SysRes_Error)( VKI_EINVAL );
    520   eNOMEM:
    521    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
    522 
    523 #  undef MIN_SIZET
    524 }
    525 #endif /* HAVE_MREMAP */
    526 
    527 
    528 /* ---------------------------------------------------------------------
    529    File-descriptor tracking
    530    ------------------------------------------------------------------ */
    531 
    532 /* One of these is allocated for each open file descriptor.  */
    533 typedef struct OpenFd
    534 {
    535    Int fd;                        /* The file descriptor */
    536    Char *pathname;                /* NULL if not a regular file or unknown */
    537    ExeContext *where;             /* NULL if inherited from parent */
    538    struct OpenFd *next, *prev;
    539 } OpenFd;
    540 
    541 /* List of allocated file descriptors. */
    542 static OpenFd *allocated_fds = NULL;
    543 
    544 /* Count of open file descriptors. */
    545 static Int fd_count = 0;
    546 
    547 
    548 /* Note the fact that a file descriptor was just closed. */
    549 static
    550 void record_fd_close(Int fd)
    551 {
    552    OpenFd *i = allocated_fds;
    553 
    554    if (fd >= VG_(fd_hard_limit))
    555       return;			/* Valgrind internal */
    556 
    557    while(i) {
    558       if(i->fd == fd) {
    559          if(i->prev)
    560             i->prev->next = i->next;
    561          else
    562             allocated_fds = i->next;
    563          if(i->next)
    564             i->next->prev = i->prev;
    565          if(i->pathname)
    566             VG_(arena_free) (VG_AR_CORE, i->pathname);
    567          VG_(arena_free) (VG_AR_CORE, i);
    568          fd_count--;
    569          break;
    570       }
    571       i = i->next;
    572    }
    573 }
    574 
    575 /* Note the fact that a file descriptor was just opened.  If the
    576    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
    577    this either indicates a non-standard file (i.e. a pipe or socket or
    578    some such thing) or that we don't know the filename.  If the fd is
    579    already open, then we're probably doing a dup2() to an existing fd,
    580    so just overwrite the existing one. */
    581 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
    582 {
    583    OpenFd *i;
    584 
    585    if (fd >= VG_(fd_hard_limit))
    586       return;			/* Valgrind internal */
    587 
    588    /* Check to see if this fd is already open. */
    589    i = allocated_fds;
    590    while (i) {
    591       if (i->fd == fd) {
    592          if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
    593          break;
    594       }
    595       i = i->next;
    596    }
    597 
    598    /* Not already one: allocate an OpenFd */
    599    if (i == NULL) {
    600       i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
    601 
    602       i->prev = NULL;
    603       i->next = allocated_fds;
    604       if(allocated_fds) allocated_fds->prev = i;
    605       allocated_fds = i;
    606       fd_count++;
    607    }
    608 
    609    i->fd = fd;
    610    i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
    611    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
    612 }
    613 
    614 // Record opening of an fd, and find its name.
    615 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
    616 {
    617    static HChar buf[VKI_PATH_MAX];
    618    Char* name;
    619    if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
    620       name = buf;
    621    else
    622       name = NULL;
    623 
    624    ML_(record_fd_open_with_given_name)(tid, fd, name);
    625 }
    626 
    627 // Record opening of a nameless fd.
    628 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
    629 {
    630    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
    631 }
    632 
    633 static
    634 Char *unix2name(struct vki_sockaddr_un *sa, UInt len, Char *name)
    635 {
    636    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
    637       VG_(sprintf)(name, "<unknown>");
    638    } else {
    639       VG_(sprintf)(name, "%s", sa->sun_path);
    640    }
    641 
    642    return name;
    643 }
    644 
    645 static
    646 Char *inet2name(struct vki_sockaddr_in *sa, UInt len, Char *name)
    647 {
    648    if (sa == NULL || len == 0) {
    649       VG_(sprintf)(name, "<unknown>");
    650    } else {
    651       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
    652       if (addr == 0) {
    653          VG_(sprintf)(name, "<unbound>");
    654       } else {
    655          VG_(sprintf)(name, "%u.%u.%u.%u:%u",
    656                       (addr>>24) & 0xFF, (addr>>16) & 0xFF,
    657                       (addr>>8) & 0xFF, addr & 0xFF,
    658                       VG_(ntohs)(sa->sin_port));
    659       }
    660    }
    661 
    662    return name;
    663 }
    664 
    665 /*
    666  * Try get some details about a socket.
    667  */
    668 static void
    669 getsockdetails(Int fd)
    670 {
    671    union u {
    672       struct vki_sockaddr a;
    673       struct vki_sockaddr_in in;
    674       struct vki_sockaddr_un un;
    675    } laddr;
    676    UInt llen;
    677 
    678    llen = sizeof(laddr);
    679    VG_(memset)(&laddr, 0, llen);
    680 
    681    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
    682       switch(laddr.a.sa_family) {
    683       case VKI_AF_INET: {
    684          static char lname[32];
    685          static char pname[32];
    686          struct vki_sockaddr_in paddr;
    687          UInt plen = sizeof(struct vki_sockaddr_in);
    688 
    689          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
    690             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
    691                          inet2name(&(laddr.in), llen, lname),
    692                          inet2name(&paddr, plen, pname));
    693          } else {
    694             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
    695                          fd, inet2name(&(laddr.in), llen, lname));
    696          }
    697          return;
    698          }
    699       case VKI_AF_UNIX: {
    700          static char lname[256];
    701          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
    702                       unix2name(&(laddr.un), llen, lname));
    703          return;
    704          }
    705       default:
    706          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
    707                       laddr.a.sa_family, fd);
    708          return;
    709       }
    710    }
    711 
    712    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
    713 }
    714 
    715 
    716 /* Dump out a summary, and a more detailed list, of open file descriptors. */
    717 void VG_(show_open_fds) (void)
    718 {
    719    OpenFd *i = allocated_fds;
    720 
    721    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open at exit.\n", fd_count);
    722 
    723    while (i) {
    724       if (i->pathname) {
    725          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
    726                       i->pathname);
    727       } else {
    728          Int val;
    729          UInt len = sizeof(val);
    730 
    731          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
    732              == -1) {
    733             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
    734          } else {
    735             getsockdetails(i->fd);
    736          }
    737       }
    738 
    739       if(i->where) {
    740          VG_(pp_ExeContext)(i->where);
    741          VG_(message)(Vg_UserMsg, "\n");
    742       } else {
    743          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
    744          VG_(message)(Vg_UserMsg, "\n");
    745       }
    746 
    747       i = i->next;
    748    }
    749 
    750    VG_(message)(Vg_UserMsg, "\n");
    751 }
    752 
    753 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
    754    have /proc support compiled in, or a non-Linux kernel), then we need to
    755    find out what file descriptors we inherited from our parent process the
    756    hard way - by checking each fd in turn. */
    757 static
    758 void init_preopened_fds_without_proc_self_fd(void)
    759 {
    760    struct vki_rlimit lim;
    761    UInt count;
    762    Int i;
    763 
    764    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
    765       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
    766          an arbitrarily high number.  1024 happens to be the limit in
    767          the 2.4 Linux kernels. */
    768       count = 1024;
    769    } else {
    770       count = lim.rlim_cur;
    771    }
    772 
    773    for (i = 0; i < count; i++)
    774       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
    775          ML_(record_fd_open_named)(-1, i);
    776 }
    777 
    778 /* Initialize the list of open file descriptors with the file descriptors
    779    we inherited from out parent process. */
    780 
    781 void VG_(init_preopened_fds)(void)
    782 {
    783 // DDD: should probably use HAVE_PROC here or similar, instead.
    784 #if defined(VGO_linux)
    785    Int ret;
    786    struct vki_dirent d;
    787    SysRes f;
    788 
    789    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
    790    if (sr_isError(f)) {
    791       init_preopened_fds_without_proc_self_fd();
    792       return;
    793    }
    794 
    795    while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
    796       if (ret == -1)
    797          goto out;
    798 
    799       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
    800          Char* s;
    801          Int fno = VG_(strtoll10)(d.d_name, &s);
    802          if (*s == '\0') {
    803             if (fno != sr_Res(f))
    804                if (VG_(clo_track_fds))
    805                   ML_(record_fd_open_named)(-1, fno);
    806          } else {
    807             VG_(message)(Vg_DebugMsg,
    808                "Warning: invalid file name in /proc/self/fd: %s\n",
    809                d.d_name);
    810          }
    811       }
    812 
    813       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
    814    }
    815 
    816   out:
    817    VG_(close)(sr_Res(f));
    818 
    819 #elif defined(VGO_darwin)
    820    init_preopened_fds_without_proc_self_fd();
    821 
    822 #else
    823 #  error Unknown OS
    824 #endif
    825 }
    826 
    827 static
    828 Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
    829 {
    830    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
    831    Char *result = VG_(arena_malloc) ( aid, cc, len );
    832    VG_(strcpy) ( result, s1 );
    833    VG_(strcat) ( result, s2 );
    834    return result;
    835 }
    836 
    837 static
    838 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
    839                             Char *msg, Addr base, SizeT size )
    840 {
    841    Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
    842                               "socketcall.sendmsg", msg, VG_AR_CORE );
    843    PRE_MEM_READ( outmsg, base, size );
    844    VG_(arena_free) ( VG_AR_CORE, outmsg );
    845 }
    846 
    847 static
    848 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
    849                              Char *msg, Addr base, SizeT size )
    850 {
    851    Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
    852                               "socketcall.recvmsg", msg, VG_AR_CORE );
    853    if ( read )
    854       PRE_MEM_READ( outmsg, base, size );
    855    else
    856       PRE_MEM_WRITE( outmsg, base, size );
    857    VG_(arena_free) ( VG_AR_CORE, outmsg );
    858 }
    859 
    860 static
    861 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
    862                               Char *fieldName, Addr base, SizeT size )
    863 {
    864    if ( !read )
    865       POST_MEM_WRITE( base, size );
    866 }
    867 
    868 static
    869 void msghdr_foreachfield (
    870         ThreadId tid,
    871         struct vki_msghdr *msg,
    872         void (*foreach_func)( ThreadId, Bool, Char *, Addr, SizeT )
    873      )
    874 {
    875    if ( !msg )
    876       return;
    877 
    878    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
    879    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
    880    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
    881    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
    882    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
    883    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
    884    foreach_func ( tid, False, "(msg)", (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
    885 
    886    if ( msg->msg_name )
    887       foreach_func ( tid, False,
    888                      "(msg.msg_name)",
    889                      (Addr)msg->msg_name, msg->msg_namelen );
    890 
    891    if ( msg->msg_iov ) {
    892       struct vki_iovec *iov = msg->msg_iov;
    893       UInt i;
    894 
    895       foreach_func ( tid, True,
    896                      "(msg.msg_iov)",
    897                      (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
    898 
    899       for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
    900          foreach_func ( tid, False,
    901                         "(msg.msg_iov[i])",
    902                         (Addr)iov->iov_base, iov->iov_len );
    903    }
    904 
    905    if ( msg->msg_control )
    906       foreach_func ( tid, False,
    907                      "(msg.msg_control)",
    908                      (Addr)msg->msg_control, msg->msg_controllen );
    909 }
    910 
    911 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
    912 {
    913    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
    914 
    915    while (cm) {
    916       if (cm->cmsg_level == VKI_SOL_SOCKET &&
    917           cm->cmsg_type == VKI_SCM_RIGHTS ) {
    918          Int *fds = (Int *) VKI_CMSG_DATA(cm);
    919          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
    920                          / sizeof(int);
    921          Int i;
    922 
    923          for (i = 0; i < fdc; i++)
    924             if(VG_(clo_track_fds))
    925                // XXX: must we check the range on these fds with
    926                //      ML_(fd_allowed)()?
    927                ML_(record_fd_open_named)(tid, fds[i]);
    928       }
    929 
    930       cm = VKI_CMSG_NXTHDR(msg, cm);
    931    }
    932 }
    933 
    934 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
    935 static
    936 void pre_mem_read_sockaddr ( ThreadId tid,
    937                              Char *description,
    938                              struct vki_sockaddr *sa, UInt salen )
    939 {
    940    Char *outmsg;
    941    struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
    942    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
    943    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
    944 
    945    /* NULL/zero-length sockaddrs are legal */
    946    if ( sa == NULL || salen == 0 ) return;
    947 
    948    outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
    949                                 VG_(strlen)( description ) + 30 );
    950 
    951    VG_(sprintf) ( outmsg, description, "sa_family" );
    952    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
    953 
    954    switch (sa->sa_family) {
    955 
    956       case VKI_AF_UNIX:
    957          VG_(sprintf) ( outmsg, description, "sun_path" );
    958          PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
    959          // GrP fixme max of sun_len-2? what about nul char?
    960          break;
    961 
    962       case VKI_AF_INET:
    963          VG_(sprintf) ( outmsg, description, "sin_port" );
    964          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
    965          VG_(sprintf) ( outmsg, description, "sin_addr" );
    966          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
    967          break;
    968 
    969       case VKI_AF_INET6:
    970          VG_(sprintf) ( outmsg, description, "sin6_port" );
    971          PRE_MEM_READ( outmsg,
    972             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
    973          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
    974          PRE_MEM_READ( outmsg,
    975             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
    976          VG_(sprintf) ( outmsg, description, "sin6_addr" );
    977          PRE_MEM_READ( outmsg,
    978             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
    979          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
    980          PRE_MEM_READ( outmsg,
    981             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
    982          break;
    983 
    984       default:
    985          VG_(sprintf) ( outmsg, description, "" );
    986          PRE_MEM_READ( outmsg, (Addr) sa, salen );
    987          break;
    988    }
    989 
    990    VG_(arena_free) ( VG_AR_CORE, outmsg );
    991 }
    992 
    993 /* Dereference a pointer to a UInt. */
    994 static UInt deref_UInt ( ThreadId tid, Addr a, Char* s )
    995 {
    996    UInt* a_p = (UInt*)a;
    997    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
    998    if (a_p == NULL)
    999       return 0;
   1000    else
   1001       return *a_p;
   1002 }
   1003 
   1004 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
   1005                                   Char* buf_s, Char* buflen_s )
   1006 {
   1007    if (VG_(tdict).track_pre_mem_write) {
   1008       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
   1009       if (buflen_in > 0) {
   1010          VG_(tdict).track_pre_mem_write(
   1011             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
   1012       }
   1013    }
   1014 }
   1015 
   1016 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
   1017                                    Addr buf_p, Addr buflen_p, Char* s )
   1018 {
   1019    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
   1020       UInt buflen_out = deref_UInt( tid, buflen_p, s);
   1021       if (buflen_out > 0 && buf_p != (Addr)NULL) {
   1022          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
   1023       }
   1024    }
   1025 }
   1026 
   1027 /* ---------------------------------------------------------------------
   1028    Data seg end, for brk()
   1029    ------------------------------------------------------------------ */
   1030 
   1031 /*   +--------+------------+
   1032      | anon   |    resvn   |
   1033      +--------+------------+
   1034 
   1035      ^     ^  ^
   1036      |     |  boundary is page aligned
   1037      |     VG_(brk_limit) -- no alignment constraint
   1038      VG_(brk_base) -- page aligned -- does not move
   1039 
   1040      Both the anon part and the reservation part are always at least
   1041      one page.
   1042 */
   1043 
   1044 /* Set the new data segment end to NEWBRK.  If this succeeds, return
   1045    NEWBRK, else return the current data segment end. */
   1046 
   1047 static Addr do_brk ( Addr newbrk )
   1048 {
   1049    NSegment const* aseg;
   1050    NSegment const* rseg;
   1051    Addr newbrkP;
   1052    SizeT delta;
   1053    Bool ok;
   1054    Bool debug = False;
   1055 
   1056    if (debug)
   1057       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
   1058 		  VG_(brk_base), VG_(brk_limit), newbrk);
   1059 
   1060 #  if 0
   1061    if (0) show_segments("in_brk");
   1062 #  endif
   1063 
   1064    if (newbrk < VG_(brk_base))
   1065       /* Clearly impossible. */
   1066       goto bad;
   1067 
   1068    if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
   1069       /* shrinking the data segment.  Be lazy and don't munmap the
   1070          excess area. */
   1071       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
   1072       if (seg && seg->hasT)
   1073          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
   1074                                     "do_brk(shrink)" );
   1075       /* Since we're being lazy and not unmapping pages, we have to
   1076          zero out the area, so that if the area later comes back into
   1077          circulation, it will be filled with zeroes, as if it really
   1078          had been unmapped and later remapped.  Be a bit paranoid and
   1079          try hard to ensure we're not going to segfault by doing the
   1080          write - check both ends of the range are in the same segment
   1081          and that segment is writable. */
   1082       if (seg) {
   1083          /* pre: newbrk < VG_(brk_limit)
   1084               => newbrk <= VG_(brk_limit)-1 */
   1085          NSegment const * seg2;
   1086          vg_assert(newbrk < VG_(brk_limit));
   1087          seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1088          if (seg2 && seg == seg2 && seg->hasW)
   1089             VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
   1090       }
   1091 
   1092       VG_(brk_limit) = newbrk;
   1093       return newbrk;
   1094    }
   1095 
   1096    /* otherwise we're expanding the brk segment. */
   1097    if (VG_(brk_limit) > VG_(brk_base))
   1098       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1099    else
   1100       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
   1101    rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
   1102 
   1103    /* These should be assured by setup_client_dataseg in m_main. */
   1104    vg_assert(aseg);
   1105    vg_assert(rseg);
   1106    vg_assert(aseg->kind == SkAnonC);
   1107    vg_assert(rseg->kind == SkResvn);
   1108    vg_assert(aseg->end+1 == rseg->start);
   1109 
   1110    vg_assert(newbrk >= VG_(brk_base));
   1111    if (newbrk <= rseg->start) {
   1112       /* still fits within the anon segment. */
   1113       VG_(brk_limit) = newbrk;
   1114       return newbrk;
   1115    }
   1116 
   1117    if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
   1118       /* request is too large -- the resvn would fall below 1 page,
   1119          which isn't allowed. */
   1120       goto bad;
   1121    }
   1122 
   1123    newbrkP = VG_PGROUNDUP(newbrk);
   1124    vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
   1125    delta = newbrkP - rseg->start;
   1126    vg_assert(delta > 0);
   1127    vg_assert(VG_IS_PAGE_ALIGNED(delta));
   1128 
   1129    ok = VG_(am_extend_into_adjacent_reservation_client)( (NSegment*)aseg, delta );
   1130    if (!ok) goto bad;
   1131 
   1132    VG_(brk_limit) = newbrk;
   1133    return newbrk;
   1134 
   1135   bad:
   1136    return VG_(brk_limit);
   1137 }
   1138 
   1139 
   1140 /* ---------------------------------------------------------------------
   1141    Vet file descriptors for sanity
   1142    ------------------------------------------------------------------ */
   1143 /*
   1144 > - what does the "Bool soft" parameter mean?
   1145 
   1146 (Tom Hughes, 3 Oct 05):
   1147 
   1148 Whether or not to consider a file descriptor invalid if it is above
   1149 the current soft limit.
   1150 
   1151 Basically if we are testing whether a newly created file descriptor is
   1152 valid (in a post handler) then we set soft to true, and if we are
   1153 testing whether a file descriptor that is about to be used (in a pre
   1154 handler) is valid [viz, an already-existing fd] then we set it to false.
   1155 
   1156 The point is that if the (virtual) soft limit is lowered then any
   1157 existing descriptors can still be read/written/closed etc (so long as
   1158 they are below the valgrind reserved descriptors) but no new
   1159 descriptors can be created above the new soft limit.
   1160 
   1161 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
   1162 */
   1163 
   1164 /* Return true if we're allowed to use or create this fd */
   1165 Bool ML_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool isNewFd)
   1166 {
   1167    Bool allowed = True;
   1168 
   1169    /* hard limits always apply */
   1170    if (fd < 0 || fd >= VG_(fd_hard_limit))
   1171       allowed = False;
   1172 
   1173    /* hijacking the output fds is never allowed */
   1174    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
   1175       allowed = False;
   1176 
   1177    /* if creating a new fd (rather than using an existing one), the
   1178       soft limit must also be observed */
   1179    if (isNewFd && fd >= VG_(fd_soft_limit))
   1180       allowed = False;
   1181 
   1182    /* this looks like it ought to be included, but causes problems: */
   1183    /*
   1184    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
   1185       allowed = False;
   1186    */
   1187    /* The difficulty is as follows: consider a program P which expects
   1188       to be able to mess with (redirect) its own stderr (fd 2).
   1189       Usually to deal with P we would issue command line flags to send
   1190       logging somewhere other than stderr, so as not to disrupt P.
   1191       The problem is that -d unilaterally hijacks stderr with no
   1192       consultation with P.  And so, if this check is enabled, P will
   1193       work OK normally but fail if -d is issued.
   1194 
   1195       Basically -d is a hack and you take your chances when using it.
   1196       It's very useful for low level debugging -- particularly at
   1197       startup -- and having its presence change the behaviour of the
   1198       client is exactly what we don't want.  */
   1199 
   1200    /* croak? */
   1201    if ((!allowed) && VG_(showing_core_errors)() ) {
   1202       VG_(message)(Vg_UserMsg,
   1203          "Warning: invalid file descriptor %d in syscall %s()\n",
   1204          fd, syscallname);
   1205       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
   1206 	 VG_(message)(Vg_UserMsg,
   1207             "   Use --log-fd=<number> to select an alternative log fd.\n");
   1208       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
   1209 	 VG_(message)(Vg_UserMsg,
   1210             "   Use --xml-fd=<number> to select an alternative XML "
   1211             "output fd.\n");
   1212       // DDD: consider always printing this stack trace, it's useful.
   1213       // Also consider also making this a proper core error, ie.
   1214       // suppressible and all that.
   1215       if (VG_(clo_verbosity) > 1) {
   1216          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1217       }
   1218    }
   1219 
   1220    return allowed;
   1221 }
   1222 
   1223 
   1224 /* ---------------------------------------------------------------------
   1225    Deal with a bunch of socket-related syscalls
   1226    ------------------------------------------------------------------ */
   1227 
   1228 /* ------ */
   1229 
   1230 void
   1231 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
   1232                                   UWord arg0, UWord arg1,
   1233                                   UWord arg2, UWord arg3 )
   1234 {
   1235    /* int socketpair(int d, int type, int protocol, int sv[2]); */
   1236    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
   1237                   arg3, 2*sizeof(int) );
   1238 }
   1239 
   1240 SysRes
   1241 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
   1242                                    SysRes res,
   1243                                    UWord arg0, UWord arg1,
   1244                                    UWord arg2, UWord arg3 )
   1245 {
   1246    SysRes r = res;
   1247    Int fd1 = ((Int*)arg3)[0];
   1248    Int fd2 = ((Int*)arg3)[1];
   1249    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1250    POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1251    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
   1252        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
   1253       VG_(close)(fd1);
   1254       VG_(close)(fd2);
   1255       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1256    } else {
   1257       POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1258       if (VG_(clo_track_fds)) {
   1259          ML_(record_fd_open_nameless)(tid, fd1);
   1260          ML_(record_fd_open_nameless)(tid, fd2);
   1261       }
   1262    }
   1263    return r;
   1264 }
   1265 
   1266 /* ------ */
   1267 
   1268 SysRes
   1269 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
   1270 {
   1271    SysRes r = res;
   1272    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1273    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
   1274       VG_(close)(sr_Res(res));
   1275       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1276    } else {
   1277       if (VG_(clo_track_fds))
   1278          ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1279    }
   1280    return r;
   1281 }
   1282 
   1283 /* ------ */
   1284 
   1285 void
   1286 ML_(generic_PRE_sys_bind) ( ThreadId tid,
   1287                             UWord arg0, UWord arg1, UWord arg2 )
   1288 {
   1289    /* int bind(int sockfd, struct sockaddr *my_addr,
   1290                int addrlen); */
   1291    pre_mem_read_sockaddr(
   1292       tid, "socketcall.bind(my_addr.%s)",
   1293       (struct vki_sockaddr *) arg1, arg2
   1294    );
   1295 }
   1296 
   1297 /* ------ */
   1298 
   1299 void
   1300 ML_(generic_PRE_sys_accept) ( ThreadId tid,
   1301                               UWord arg0, UWord arg1, UWord arg2 )
   1302 {
   1303    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
   1304    Addr addr_p     = arg1;
   1305    Addr addrlen_p  = arg2;
   1306    if (addr_p != (Addr)NULL)
   1307       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
   1308                                    "socketcall.accept(addr)",
   1309                                    "socketcall.accept(addrlen_in)" );
   1310 }
   1311 
   1312 SysRes
   1313 ML_(generic_POST_sys_accept) ( ThreadId tid,
   1314                                SysRes res,
   1315                                UWord arg0, UWord arg1, UWord arg2 )
   1316 {
   1317    SysRes r = res;
   1318    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1319    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
   1320       VG_(close)(sr_Res(res));
   1321       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1322    } else {
   1323       Addr addr_p     = arg1;
   1324       Addr addrlen_p  = arg2;
   1325       if (addr_p != (Addr)NULL)
   1326          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
   1327                                        "socketcall.accept(addrlen_out)" );
   1328       if (VG_(clo_track_fds))
   1329           ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1330    }
   1331    return r;
   1332 }
   1333 
   1334 /* ------ */
   1335 
   1336 void
   1337 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
   1338                               UWord arg0, UWord arg1, UWord arg2,
   1339                               UWord arg3, UWord arg4, UWord arg5 )
   1340 {
   1341    /* int sendto(int s, const void *msg, int len,
   1342                  unsigned int flags,
   1343                  const struct sockaddr *to, int tolen); */
   1344    PRE_MEM_READ( "socketcall.sendto(msg)",
   1345                  arg1, /* msg */
   1346                  arg2  /* len */ );
   1347    pre_mem_read_sockaddr(
   1348       tid, "socketcall.sendto(to.%s)",
   1349       (struct vki_sockaddr *) arg4, arg5
   1350    );
   1351 }
   1352 
   1353 /* ------ */
   1354 
   1355 void
   1356 ML_(generic_PRE_sys_send) ( ThreadId tid,
   1357                             UWord arg0, UWord arg1, UWord arg2 )
   1358 {
   1359    /* int send(int s, const void *msg, size_t len, int flags); */
   1360    PRE_MEM_READ( "socketcall.send(msg)",
   1361                   arg1, /* msg */
   1362                   arg2  /* len */ );
   1363 
   1364 }
   1365 
   1366 /* ------ */
   1367 
   1368 void
   1369 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
   1370                                 UWord arg0, UWord arg1, UWord arg2,
   1371                                 UWord arg3, UWord arg4, UWord arg5 )
   1372 {
   1373    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
   1374                    struct sockaddr *from, int *fromlen); */
   1375    Addr buf_p      = arg1;
   1376    Int  len        = arg2;
   1377    Addr from_p     = arg4;
   1378    Addr fromlen_p  = arg5;
   1379    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
   1380    if (from_p != (Addr)NULL)
   1381       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
   1382                                    "socketcall.recvfrom(from)",
   1383                                    "socketcall.recvfrom(fromlen_in)" );
   1384 }
   1385 
   1386 void
   1387 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
   1388                                  SysRes res,
   1389                                  UWord arg0, UWord arg1, UWord arg2,
   1390                                  UWord arg3, UWord arg4, UWord arg5 )
   1391 {
   1392    Addr buf_p      = arg1;
   1393    Int  len        = arg2;
   1394    Addr from_p     = arg4;
   1395    Addr fromlen_p  = arg5;
   1396 
   1397    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1398    if (from_p != (Addr)NULL)
   1399       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
   1400                                     "socketcall.recvfrom(fromlen_out)" );
   1401    POST_MEM_WRITE( buf_p, len );
   1402 }
   1403 
   1404 /* ------ */
   1405 
   1406 void
   1407 ML_(generic_PRE_sys_recv) ( ThreadId tid,
   1408                             UWord arg0, UWord arg1, UWord arg2 )
   1409 {
   1410    /* int recv(int s, void *buf, int len, unsigned int flags); */
   1411    /* man 2 recv says:
   1412       The  recv call is normally used only on a connected socket
   1413       (see connect(2)) and is identical to recvfrom with a  NULL
   1414       from parameter.
   1415    */
   1416    PRE_MEM_WRITE( "socketcall.recv(buf)",
   1417                   arg1, /* buf */
   1418                   arg2  /* len */ );
   1419 }
   1420 
   1421 void
   1422 ML_(generic_POST_sys_recv) ( ThreadId tid,
   1423                              UWord res,
   1424                              UWord arg0, UWord arg1, UWord arg2 )
   1425 {
   1426    if (res >= 0 && arg1 != 0) {
   1427       POST_MEM_WRITE( arg1, /* buf */
   1428                       arg2  /* len */ );
   1429    }
   1430 }
   1431 
   1432 /* ------ */
   1433 
   1434 void
   1435 ML_(generic_PRE_sys_connect) ( ThreadId tid,
   1436                                UWord arg0, UWord arg1, UWord arg2 )
   1437 {
   1438    /* int connect(int sockfd,
   1439                   struct sockaddr *serv_addr, int addrlen ); */
   1440    pre_mem_read_sockaddr( tid,
   1441                           "socketcall.connect(serv_addr.%s)",
   1442                           (struct vki_sockaddr *) arg1, arg2);
   1443 }
   1444 
   1445 /* ------ */
   1446 
   1447 void
   1448 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
   1449                                   UWord arg0, UWord arg1, UWord arg2,
   1450                                   UWord arg3, UWord arg4 )
   1451 {
   1452    /* int setsockopt(int s, int level, int optname,
   1453                      const void *optval, int optlen); */
   1454    PRE_MEM_READ( "socketcall.setsockopt(optval)",
   1455                  arg3, /* optval */
   1456                  arg4  /* optlen */ );
   1457 }
   1458 
   1459 /* ------ */
   1460 
   1461 void
   1462 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
   1463                                    UWord arg0, UWord arg1, UWord arg2 )
   1464 {
   1465    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
   1466    Addr name_p     = arg1;
   1467    Addr namelen_p  = arg2;
   1468    /* Nb: name_p cannot be NULL */
   1469    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1470                                 "socketcall.getsockname(name)",
   1471                                 "socketcall.getsockname(namelen_in)" );
   1472 }
   1473 
   1474 void
   1475 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
   1476                                     SysRes res,
   1477                                     UWord arg0, UWord arg1, UWord arg2 )
   1478 {
   1479    Addr name_p     = arg1;
   1480    Addr namelen_p  = arg2;
   1481    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1482    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1483                                  "socketcall.getsockname(namelen_out)" );
   1484 }
   1485 
   1486 /* ------ */
   1487 
   1488 void
   1489 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
   1490                                    UWord arg0, UWord arg1, UWord arg2 )
   1491 {
   1492    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
   1493    Addr name_p     = arg1;
   1494    Addr namelen_p  = arg2;
   1495    /* Nb: name_p cannot be NULL */
   1496    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1497                                 "socketcall.getpeername(name)",
   1498                                 "socketcall.getpeername(namelen_in)" );
   1499 }
   1500 
   1501 void
   1502 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
   1503                                     SysRes res,
   1504                                     UWord arg0, UWord arg1, UWord arg2 )
   1505 {
   1506    Addr name_p     = arg1;
   1507    Addr namelen_p  = arg2;
   1508    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1509    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1510                                  "socketcall.getpeername(namelen_out)" );
   1511 }
   1512 
   1513 /* ------ */
   1514 
   1515 void
   1516 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid,
   1517                                UWord arg0, UWord arg1 )
   1518 {
   1519    /* int sendmsg(int s, const struct msghdr *msg, int flags); */
   1520    struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
   1521    msghdr_foreachfield ( tid, msg, pre_mem_read_sendmsg );
   1522 }
   1523 
   1524 /* ------ */
   1525 
   1526 void
   1527 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid,
   1528                                UWord arg0, UWord arg1 )
   1529 {
   1530    /* int recvmsg(int s, struct msghdr *msg, int flags); */
   1531    struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
   1532    msghdr_foreachfield ( tid, msg, pre_mem_write_recvmsg );
   1533 }
   1534 
   1535 void
   1536 ML_(generic_POST_sys_recvmsg) ( ThreadId tid,
   1537                                 UWord arg0, UWord arg1 )
   1538 {
   1539    struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
   1540    msghdr_foreachfield( tid, msg, post_mem_write_recvmsg );
   1541    check_cmsg_for_fds( tid, msg );
   1542 }
   1543 
   1544 
   1545 /* ---------------------------------------------------------------------
   1546    Deal with a bunch of IPC related syscalls
   1547    ------------------------------------------------------------------ */
   1548 
   1549 /* ------ */
   1550 
   1551 void
   1552 ML_(generic_PRE_sys_semop) ( ThreadId tid,
   1553                              UWord arg0, UWord arg1, UWord arg2 )
   1554 {
   1555    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
   1556    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1557 }
   1558 
   1559 /* ------ */
   1560 
   1561 void
   1562 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
   1563                                   UWord arg0, UWord arg1,
   1564                                   UWord arg2, UWord arg3 )
   1565 {
   1566    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
   1567                      struct timespec *timeout); */
   1568    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1569    if (arg3 != 0)
   1570       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
   1571 }
   1572 
   1573 /* ------ */
   1574 
   1575 static
   1576 UInt get_sem_count( Int semid )
   1577 {
   1578    struct vki_semid_ds buf;
   1579    union vki_semun arg;
   1580    SysRes res;
   1581 
   1582    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
   1583       (experimental) otherwise complains that the use in the return
   1584       statement below is uninitialised. */
   1585    buf.sem_nsems = 0;
   1586 
   1587    arg.buf = &buf;
   1588 
   1589 #  ifdef __NR_semctl
   1590    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
   1591 #  else
   1592    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
   1593                           VKI_IPC_STAT, (UWord)&arg);
   1594 #  endif
   1595    if (sr_isError(res))
   1596       return 0;
   1597 
   1598    return buf.sem_nsems;
   1599 }
   1600 
   1601 void
   1602 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
   1603                               UWord arg0, UWord arg1,
   1604                               UWord arg2, UWord arg3 )
   1605 {
   1606    /* int semctl(int semid, int semnum, int cmd, ...); */
   1607    union vki_semun arg = *(union vki_semun *)&arg3;
   1608    UInt nsems;
   1609    switch (arg2 /* cmd */) {
   1610 #if defined(VKI_IPC_INFO)
   1611    case VKI_IPC_INFO:
   1612    case VKI_SEM_INFO:
   1613    case VKI_IPC_INFO|VKI_IPC_64:
   1614    case VKI_SEM_INFO|VKI_IPC_64:
   1615       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
   1616                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1617       break;
   1618 #endif
   1619 
   1620    case VKI_IPC_STAT:
   1621 #if defined(VKI_SEM_STAT)
   1622    case VKI_SEM_STAT:
   1623 #endif
   1624       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1625                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1626       break;
   1627 
   1628 #if defined(VKI_IPC_64)
   1629    case VKI_IPC_STAT|VKI_IPC_64:
   1630 #if defined(VKI_SEM_STAT)
   1631    case VKI_SEM_STAT|VKI_IPC_64:
   1632 #endif
   1633       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1634                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1635       break;
   1636 #endif
   1637 
   1638    case VKI_IPC_SET:
   1639       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1640                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1641       break;
   1642 
   1643 #if defined(VKI_IPC_64)
   1644    case VKI_IPC_SET|VKI_IPC_64:
   1645       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1646                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1647       break;
   1648 #endif
   1649 
   1650    case VKI_GETALL:
   1651 #if defined(VKI_IPC_64)
   1652    case VKI_GETALL|VKI_IPC_64:
   1653 #endif
   1654       nsems = get_sem_count( arg0 );
   1655       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
   1656                      (Addr)arg.array, sizeof(unsigned short) * nsems );
   1657       break;
   1658 
   1659    case VKI_SETALL:
   1660 #if defined(VKI_IPC_64)
   1661    case VKI_SETALL|VKI_IPC_64:
   1662 #endif
   1663       nsems = get_sem_count( arg0 );
   1664       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
   1665                     (Addr)arg.array, sizeof(unsigned short) * nsems );
   1666       break;
   1667    }
   1668 }
   1669 
   1670 void
   1671 ML_(generic_POST_sys_semctl) ( ThreadId tid,
   1672                                UWord res,
   1673                                UWord arg0, UWord arg1,
   1674                                UWord arg2, UWord arg3 )
   1675 {
   1676    union vki_semun arg = *(union vki_semun *)&arg3;
   1677    UInt nsems;
   1678    switch (arg2 /* cmd */) {
   1679 #if defined(VKI_IPC_INFO)
   1680    case VKI_IPC_INFO:
   1681    case VKI_SEM_INFO:
   1682    case VKI_IPC_INFO|VKI_IPC_64:
   1683    case VKI_SEM_INFO|VKI_IPC_64:
   1684       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1685       break;
   1686 #endif
   1687 
   1688    case VKI_IPC_STAT:
   1689 #if defined(VKI_SEM_STAT)
   1690    case VKI_SEM_STAT:
   1691 #endif
   1692       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1693       break;
   1694 
   1695 #if defined(VKI_IPC_64)
   1696    case VKI_IPC_STAT|VKI_IPC_64:
   1697    case VKI_SEM_STAT|VKI_IPC_64:
   1698       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1699       break;
   1700 #endif
   1701 
   1702    case VKI_GETALL:
   1703 #if defined(VKI_IPC_64)
   1704    case VKI_GETALL|VKI_IPC_64:
   1705 #endif
   1706       nsems = get_sem_count( arg0 );
   1707       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
   1708       break;
   1709    }
   1710 }
   1711 
   1712 /* ------ */
   1713 
   1714 /* ------ */
   1715 
   1716 static
   1717 UInt get_shm_size ( Int shmid )
   1718 {
   1719 #ifdef __NR_shmctl
   1720 #  ifdef VKI_IPC_64
   1721    struct vki_shmid64_ds buf;
   1722 #    ifdef VGP_amd64_linux
   1723      /* See bug 222545 comment 7 */
   1724      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1725                                      VKI_IPC_STAT, (UWord)&buf);
   1726 #    else
   1727      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1728                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
   1729 #    endif
   1730 #  else /* !def VKI_IPC_64 */
   1731    struct vki_shmid_ds buf;
   1732    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
   1733 #  endif /* def VKI_IPC_64 */
   1734 #else
   1735    struct vki_shmid_ds buf;
   1736    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
   1737                                  VKI_IPC_STAT, 0, (UWord)&buf);
   1738 #endif
   1739    if (sr_isError(__res))
   1740       return 0;
   1741 
   1742    return buf.shm_segsz;
   1743 }
   1744 
   1745 UWord
   1746 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
   1747                              UWord arg0, UWord arg1, UWord arg2 )
   1748 {
   1749    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
   1750    UInt  segmentSize = get_shm_size ( arg0 );
   1751    UWord tmp;
   1752    Bool  ok;
   1753    if (arg1 == 0) {
   1754       /* arm-linux only: work around the fact that
   1755          VG_(am_get_advisory_client_simple) produces something that is
   1756          VKI_PAGE_SIZE aligned, whereas what we want is something
   1757          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
   1758          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
   1759          then round the result up to the next VKI_SHMLBA boundary.
   1760          See bug 222545 comment 15.  So far, arm-linux is the only
   1761          platform where this is known to be necessary. */
   1762       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
   1763       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1764          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
   1765       }
   1766       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
   1767       if (ok) {
   1768          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1769             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
   1770          } else {
   1771             arg1 = tmp;
   1772          }
   1773       }
   1774    }
   1775    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
   1776       arg1 = 0;
   1777    return arg1;
   1778 }
   1779 
   1780 void
   1781 ML_(generic_POST_sys_shmat) ( ThreadId tid,
   1782                               UWord res,
   1783                               UWord arg0, UWord arg1, UWord arg2 )
   1784 {
   1785    UInt segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
   1786    if ( segmentSize > 0 ) {
   1787       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
   1788       Bool d;
   1789 
   1790       if (arg2 & VKI_SHM_RDONLY)
   1791          prot &= ~VKI_PROT_WRITE;
   1792       /* It isn't exactly correct to pass 0 for the fd and offset
   1793          here.  The kernel seems to think the corresponding section
   1794          does have dev/ino numbers:
   1795 
   1796          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
   1797 
   1798          However there is no obvious way to find them.  In order to
   1799          cope with the discrepancy, aspacem's sync checker omits the
   1800          dev/ino correspondence check in cases where V does not know
   1801          the dev/ino. */
   1802       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
   1803 
   1804       /* we don't distinguish whether it's read-only or
   1805        * read-write -- it doesn't matter really. */
   1806       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
   1807                               0/*di_handle*/ );
   1808       if (d)
   1809          VG_(discard_translations)( (Addr64)res,
   1810                                     (ULong)VG_PGROUNDUP(segmentSize),
   1811                                     "ML_(generic_POST_sys_shmat)" );
   1812    }
   1813 }
   1814 
   1815 /* ------ */
   1816 
   1817 Bool
   1818 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
   1819 {
   1820    /* int shmdt(const void *shmaddr); */
   1821    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
   1822 }
   1823 
   1824 void
   1825 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
   1826 {
   1827    NSegment const* s = VG_(am_find_nsegment)(arg0);
   1828 
   1829    if (s != NULL) {
   1830       Addr  s_start = s->start;
   1831       SizeT s_len   = s->end+1 - s->start;
   1832       Bool  d;
   1833 
   1834       vg_assert(s->kind == SkShmC);
   1835       vg_assert(s->start == arg0);
   1836 
   1837       d = VG_(am_notify_munmap)(s_start, s_len);
   1838       s = NULL; /* s is now invalid */
   1839       VG_TRACK( die_mem_munmap, s_start, s_len );
   1840       if (d)
   1841          VG_(discard_translations)( (Addr64)s_start,
   1842                                     (ULong)s_len,
   1843                                     "ML_(generic_POST_sys_shmdt)" );
   1844    }
   1845 }
   1846 /* ------ */
   1847 
   1848 void
   1849 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
   1850                               UWord arg0, UWord arg1, UWord arg2 )
   1851 {
   1852    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
   1853    switch (arg1 /* cmd */) {
   1854 #if defined(VKI_IPC_INFO)
   1855    case VKI_IPC_INFO:
   1856       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   1857                      arg2, sizeof(struct vki_shminfo) );
   1858       break;
   1859 #if defined(VKI_IPC_64)
   1860    case VKI_IPC_INFO|VKI_IPC_64:
   1861       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   1862                      arg2, sizeof(struct vki_shminfo64) );
   1863       break;
   1864 #endif
   1865 #endif
   1866 
   1867 #if defined(VKI_SHM_INFO)
   1868    case VKI_SHM_INFO:
   1869 #if defined(VKI_IPC_64)
   1870    case VKI_SHM_INFO|VKI_IPC_64:
   1871 #endif
   1872       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
   1873                      arg2, sizeof(struct vki_shm_info) );
   1874       break;
   1875 #endif
   1876 
   1877    case VKI_IPC_STAT:
   1878 #if defined(VKI_SHM_STAT)
   1879    case VKI_SHM_STAT:
   1880 #endif
   1881       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
   1882                      arg2, sizeof(struct vki_shmid_ds) );
   1883       break;
   1884 
   1885 #if defined(VKI_IPC_64)
   1886    case VKI_IPC_STAT|VKI_IPC_64:
   1887    case VKI_SHM_STAT|VKI_IPC_64:
   1888       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
   1889                      arg2, sizeof(struct vki_shmid64_ds) );
   1890       break;
   1891 #endif
   1892 
   1893    case VKI_IPC_SET:
   1894       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   1895                     arg2, sizeof(struct vki_shmid_ds) );
   1896       break;
   1897 
   1898 #if defined(VKI_IPC_64)
   1899    case VKI_IPC_SET|VKI_IPC_64:
   1900       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   1901                     arg2, sizeof(struct vki_shmid64_ds) );
   1902       break;
   1903 #endif
   1904    }
   1905 }
   1906 
   1907 void
   1908 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
   1909                                UWord res,
   1910                                UWord arg0, UWord arg1, UWord arg2 )
   1911 {
   1912    switch (arg1 /* cmd */) {
   1913 #if defined(VKI_IPC_INFO)
   1914    case VKI_IPC_INFO:
   1915       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
   1916       break;
   1917    case VKI_IPC_INFO|VKI_IPC_64:
   1918       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
   1919       break;
   1920 #endif
   1921 
   1922 #if defined(VKI_SHM_INFO)
   1923    case VKI_SHM_INFO:
   1924    case VKI_SHM_INFO|VKI_IPC_64:
   1925       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
   1926       break;
   1927 #endif
   1928 
   1929    case VKI_IPC_STAT:
   1930 #if defined(VKI_SHM_STAT)
   1931    case VKI_SHM_STAT:
   1932 #endif
   1933       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
   1934       break;
   1935 
   1936 #if defined(VKI_IPC_64)
   1937    case VKI_IPC_STAT|VKI_IPC_64:
   1938    case VKI_SHM_STAT|VKI_IPC_64:
   1939       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
   1940       break;
   1941 #endif
   1942 
   1943 
   1944    }
   1945 }
   1946 
   1947 
   1948 /* ---------------------------------------------------------------------
   1949    Generic handler for mmap
   1950    ------------------------------------------------------------------ */
   1951 
   1952 /*
   1953  * Although mmap is specified by POSIX and the argument are generally
   1954  * consistent across platforms the precise details of the low level
   1955  * argument passing conventions differ. For example:
   1956  *
   1957  * - On x86-linux there is mmap (aka old_mmap) which takes the
   1958  *   arguments in a memory block and the offset in bytes; and
   1959  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   1960  *   way and the offset in pages.
   1961  *
   1962  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
   1963  *   arguments in the normal way and the offset in bytes; and
   1964  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   1965  *   way and the offset in pages.
   1966  *
   1967  * - On amd64-linux everything is simple and there is just the one
   1968  *   call, mmap (aka sys_mmap)  which takes the arguments in the
   1969  *   normal way and the offset in bytes.
   1970  *
   1971  * - On s390x-linux there is mmap (aka old_mmap) which takes the
   1972  *   arguments in a memory block and the offset in bytes. mmap2
   1973  *   is also available (but not exported via unistd.h) with
   1974  *   arguments in a memory block and the offset in pages.
   1975  *
   1976  * To cope with all this we provide a generic handler function here
   1977  * and then each platform implements one or more system call handlers
   1978  * which call this generic routine after extracting and normalising
   1979  * the arguments.
   1980  */
   1981 
   1982 SysRes
   1983 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
   1984                             UWord arg1, UWord arg2, UWord arg3,
   1985                             UWord arg4, UWord arg5, Off64T arg6 )
   1986 {
   1987    Addr       advised;
   1988    SysRes     sres;
   1989    MapRequest mreq;
   1990    Bool       mreq_ok;
   1991 
   1992 #if defined(VGO_darwin)
   1993    // Nb: we can't use this on Darwin, it has races:
   1994    // * needs to RETRY if advisory succeeds but map fails
   1995    //   (could have been some other thread in a nonblocking call)
   1996    // * needs to not use fixed-position mmap() on Darwin
   1997    //   (mmap will cheerfully smash whatever's already there, which might
   1998    //   be a new mapping from some other thread in a nonblocking call)
   1999    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
   2000 #endif
   2001 
   2002    if (arg2 == 0) {
   2003       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
   2004          shall be established. */
   2005       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2006    }
   2007 
   2008    if (!VG_IS_PAGE_ALIGNED(arg1)) {
   2009       /* zap any misaligned addresses. */
   2010       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
   2011          to fail.   Here, we catch them all. */
   2012       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2013    }
   2014 
   2015    if (!VG_IS_PAGE_ALIGNED(arg6)) {
   2016       /* zap any misaligned offsets. */
   2017       /* SuSV3 says: The off argument is constrained to be aligned and
   2018          sized according to the value returned by sysconf() when
   2019          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
   2020       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2021    }
   2022 
   2023    /* Figure out what kind of allocation constraints there are
   2024       (fixed/hint/any), and ask aspacem what we should do. */
   2025    mreq.start = arg1;
   2026    mreq.len   = arg2;
   2027    if (arg4 & VKI_MAP_FIXED) {
   2028       mreq.rkind = MFixed;
   2029    } else
   2030    if (arg1 != 0) {
   2031       mreq.rkind = MHint;
   2032    } else {
   2033       mreq.rkind = MAny;
   2034    }
   2035 
   2036    /* Enquire ... */
   2037    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2038    if (!mreq_ok) {
   2039       /* Our request was bounced, so we'd better fail. */
   2040       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2041    }
   2042 
   2043    /* Otherwise we're OK (so far).  Install aspacem's choice of
   2044       address, and let the mmap go through.  */
   2045    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2046                                     arg4 | VKI_MAP_FIXED,
   2047                                     arg5, arg6);
   2048 
   2049    /* A refinement: it may be that the kernel refused aspacem's choice
   2050       of address.  If we were originally asked for a hinted mapping,
   2051       there is still a last chance: try again at any address.
   2052       Hence: */
   2053    if (mreq.rkind == MHint && sr_isError(sres)) {
   2054       mreq.start = 0;
   2055       mreq.len   = arg2;
   2056       mreq.rkind = MAny;
   2057       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2058       if (!mreq_ok) {
   2059          /* Our request was bounced, so we'd better fail. */
   2060          return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2061       }
   2062       /* and try again with the kernel */
   2063       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2064                                        arg4 | VKI_MAP_FIXED,
   2065                                        arg5, arg6);
   2066    }
   2067 
   2068    if (!sr_isError(sres)) {
   2069       ULong di_handle;
   2070       /* Notify aspacem. */
   2071       notify_core_of_mmap(
   2072          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2073          arg2, /* length */
   2074          arg3, /* prot */
   2075          arg4, /* the original flags value */
   2076          arg5, /* fd */
   2077          arg6  /* offset */
   2078       );
   2079       /* Load symbols? */
   2080       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
   2081                                        False/*allow_SkFileV*/, (Int)arg5 );
   2082       /* Notify the tool. */
   2083       notify_tool_of_mmap(
   2084          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2085          arg2, /* length */
   2086          arg3, /* prot */
   2087          di_handle /* so the tool can refer to the read debuginfo later,
   2088                       if it wants. */
   2089       );
   2090    }
   2091 
   2092    /* Stay sane */
   2093    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
   2094       vg_assert(sr_Res(sres) == arg1);
   2095 
   2096    return sres;
   2097 }
   2098 
   2099 
   2100 /* ---------------------------------------------------------------------
   2101    The Main Entertainment ... syscall wrappers
   2102    ------------------------------------------------------------------ */
   2103 
   2104 /* Note: the PRE() and POST() wrappers are for the actual functions
   2105    implementing the system calls in the OS kernel.  These mostly have
   2106    names like sys_write();  a few have names like old_mmap().  See the
   2107    comment for ML_(syscall_table)[] for important info about the __NR_foo
   2108    constants and their relationship to the sys_foo() functions.
   2109 
   2110    Some notes about names used for syscalls and args:
   2111    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
   2112      ambiguity.
   2113 
   2114    - For error messages, we generally use a somewhat generic name
   2115      for the syscall (eg. "write" rather than "sys_write").  This should be
   2116      good enough for the average user to understand what is happening,
   2117      without confusing them with names like "sys_write".
   2118 
   2119    - Also, for error messages the arg names are mostly taken from the man
   2120      pages (even though many of those man pages are really for glibc
   2121      functions of the same name), rather than from the OS kernel source,
   2122      for the same reason -- a user presented with a "bogus foo(bar)" arg
   2123      will most likely look at the "foo" man page to see which is the "bar"
   2124      arg.
   2125 
   2126    Note that we use our own vki_* types.  The one exception is in
   2127    PRE_REG_READn calls, where pointer types haven't been changed, because
   2128    they don't need to be -- eg. for "foo*" to be used, the type foo need not
   2129    be visible.
   2130 
   2131    XXX: some of these are arch-specific, and should be factored out.
   2132 */
   2133 
   2134 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
   2135 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
   2136 
   2137 // Macros to support 64-bit syscall args split into two 32 bit values
   2138 #if defined(VG_LITTLEENDIAN)
   2139 #define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2140 #define MERGE64_FIRST(name) name##_low
   2141 #define MERGE64_SECOND(name) name##_high
   2142 #elif defined(VG_BIGENDIAN)
   2143 #define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2144 #define MERGE64_FIRST(name) name##_high
   2145 #define MERGE64_SECOND(name) name##_low
   2146 #else
   2147 #error Unknown endianness
   2148 #endif
   2149 
   2150 PRE(sys_exit)
   2151 {
   2152    ThreadState* tst;
   2153    /* simple; just make this thread exit */
   2154    PRINT("exit( %ld )", ARG1);
   2155    PRE_REG_READ1(void, "exit", int, status);
   2156    tst = VG_(get_ThreadState)(tid);
   2157    /* Set the thread's status to be exiting, then claim that the
   2158       syscall succeeded. */
   2159    tst->exitreason = VgSrc_ExitThread;
   2160    tst->os_state.exitcode = ARG1;
   2161    SET_STATUS_Success(0);
   2162 }
   2163 
   2164 PRE(sys_ni_syscall)
   2165 {
   2166    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
   2167       VG_SYSNUM_STRING(SYSNO));
   2168    PRE_REG_READ0(long, "ni_syscall");
   2169    SET_STATUS_Failure( VKI_ENOSYS );
   2170 }
   2171 
   2172 PRE(sys_iopl)
   2173 {
   2174    PRINT("sys_iopl ( %ld )", ARG1);
   2175    PRE_REG_READ1(long, "iopl", unsigned long, level);
   2176 }
   2177 
   2178 PRE(sys_fsync)
   2179 {
   2180    *flags |= SfMayBlock;
   2181    PRINT("sys_fsync ( %ld )", ARG1);
   2182    PRE_REG_READ1(long, "fsync", unsigned int, fd);
   2183 }
   2184 
   2185 PRE(sys_fdatasync)
   2186 {
   2187    *flags |= SfMayBlock;
   2188    PRINT("sys_fdatasync ( %ld )", ARG1);
   2189    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
   2190 }
   2191 
   2192 PRE(sys_msync)
   2193 {
   2194    *flags |= SfMayBlock;
   2195    PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   2196    PRE_REG_READ3(long, "msync",
   2197                  unsigned long, start, vki_size_t, length, int, flags);
   2198    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
   2199 }
   2200 
   2201 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
   2202 // versions of LiS (Linux Streams).  They are not part of the kernel.
   2203 // Therefore, we have to provide this type ourself, rather than getting it
   2204 // from the kernel sources.
   2205 struct vki_pmsg_strbuf {
   2206    int     maxlen;         /* no. of bytes in buffer */
   2207    int     len;            /* no. of bytes returned */
   2208    vki_caddr_t buf;        /* pointer to data */
   2209 };
   2210 PRE(sys_getpmsg)
   2211 {
   2212    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
   2213    struct vki_pmsg_strbuf *ctrl;
   2214    struct vki_pmsg_strbuf *data;
   2215    *flags |= SfMayBlock;
   2216    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
   2217    PRE_REG_READ5(int, "getpmsg",
   2218                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2219                  int *, bandp, int *, flagsp);
   2220    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2221    data = (struct vki_pmsg_strbuf *)ARG3;
   2222    if (ctrl && ctrl->maxlen > 0)
   2223       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
   2224    if (data && data->maxlen > 0)
   2225       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
   2226    if (ARG4)
   2227       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
   2228    if (ARG5)
   2229       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
   2230 }
   2231 POST(sys_getpmsg)
   2232 {
   2233    struct vki_pmsg_strbuf *ctrl;
   2234    struct vki_pmsg_strbuf *data;
   2235    vg_assert(SUCCESS);
   2236    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2237    data = (struct vki_pmsg_strbuf *)ARG3;
   2238    if (RES == 0 && ctrl && ctrl->len > 0) {
   2239       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
   2240    }
   2241    if (RES == 0 && data && data->len > 0) {
   2242       POST_MEM_WRITE( (Addr)data->buf, data->len);
   2243    }
   2244 }
   2245 
   2246 PRE(sys_putpmsg)
   2247 {
   2248    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
   2249    struct vki_pmsg_strbuf *ctrl;
   2250    struct vki_pmsg_strbuf *data;
   2251    *flags |= SfMayBlock;
   2252    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
   2253    PRE_REG_READ5(int, "putpmsg",
   2254                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2255                  int, band, int, flags);
   2256    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2257    data = (struct vki_pmsg_strbuf *)ARG3;
   2258    if (ctrl && ctrl->len > 0)
   2259       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
   2260    if (data && data->len > 0)
   2261       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
   2262 }
   2263 
   2264 PRE(sys_getitimer)
   2265 {
   2266    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2267    PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
   2268    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
   2269 
   2270    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
   2271    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
   2272 }
   2273 
   2274 POST(sys_getitimer)
   2275 {
   2276    if (ARG2 != (Addr)NULL) {
   2277       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2278       POST_timeval_WRITE( &(value->it_interval) );
   2279       POST_timeval_WRITE( &(value->it_value) );
   2280    }
   2281 }
   2282 
   2283 PRE(sys_setitimer)
   2284 {
   2285    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
   2286    PRE_REG_READ3(long, "setitimer",
   2287                  int, which,
   2288                  struct itimerval *, value, struct itimerval *, ovalue);
   2289    if (ARG2 != (Addr)NULL) {
   2290       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2291       PRE_timeval_READ( "setitimer(&value->it_interval)",
   2292                          &(value->it_interval));
   2293       PRE_timeval_READ( "setitimer(&value->it_value)",
   2294                          &(value->it_value));
   2295    }
   2296    if (ARG3 != (Addr)NULL) {
   2297       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2298       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
   2299                          &(ovalue->it_interval));
   2300       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
   2301                          &(ovalue->it_value));
   2302    }
   2303 }
   2304 
   2305 POST(sys_setitimer)
   2306 {
   2307    if (ARG3 != (Addr)NULL) {
   2308       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2309       POST_timeval_WRITE( &(ovalue->it_interval) );
   2310       POST_timeval_WRITE( &(ovalue->it_value) );
   2311    }
   2312 }
   2313 
   2314 PRE(sys_chroot)
   2315 {
   2316    PRINT("sys_chroot ( %#lx )", ARG1);
   2317    PRE_REG_READ1(long, "chroot", const char *, path);
   2318    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
   2319 }
   2320 
   2321 PRE(sys_madvise)
   2322 {
   2323    *flags |= SfMayBlock;
   2324    PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   2325    PRE_REG_READ3(long, "madvise",
   2326                  unsigned long, start, vki_size_t, length, int, advice);
   2327 }
   2328 
   2329 #if HAVE_MREMAP
   2330 PRE(sys_mremap)
   2331 {
   2332    // Nb: this is different to the glibc version described in the man pages,
   2333    // which lacks the fifth 'new_address' argument.
   2334    if (ARG4 & VKI_MREMAP_FIXED) {
   2335       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
   2336             ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
   2337       PRE_REG_READ5(unsigned long, "mremap",
   2338                     unsigned long, old_addr, unsigned long, old_size,
   2339                     unsigned long, new_size, unsigned long, flags,
   2340                     unsigned long, new_addr);
   2341    } else {
   2342       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
   2343             ARG1, (ULong)ARG2, ARG3, ARG4);
   2344       PRE_REG_READ4(unsigned long, "mremap",
   2345                     unsigned long, old_addr, unsigned long, old_size,
   2346                     unsigned long, new_size, unsigned long, flags);
   2347    }
   2348    SET_STATUS_from_SysRes(
   2349       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
   2350    );
   2351 }
   2352 #endif /* HAVE_MREMAP */
   2353 
   2354 PRE(sys_nice)
   2355 {
   2356    PRINT("sys_nice ( %ld )", ARG1);
   2357    PRE_REG_READ1(long, "nice", int, inc);
   2358 }
   2359 
   2360 PRE(sys_mlock)
   2361 {
   2362    *flags |= SfMayBlock;
   2363    PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
   2364    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
   2365 }
   2366 
   2367 PRE(sys_munlock)
   2368 {
   2369    *flags |= SfMayBlock;
   2370    PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
   2371    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
   2372 }
   2373 
   2374 PRE(sys_mlockall)
   2375 {
   2376    *flags |= SfMayBlock;
   2377    PRINT("sys_mlockall ( %lx )", ARG1);
   2378    PRE_REG_READ1(long, "mlockall", int, flags);
   2379 }
   2380 
   2381 PRE(sys_setpriority)
   2382 {
   2383    PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
   2384    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
   2385 }
   2386 
   2387 PRE(sys_getpriority)
   2388 {
   2389    PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
   2390    PRE_REG_READ2(long, "getpriority", int, which, int, who);
   2391 }
   2392 
   2393 PRE(sys_pwrite64)
   2394 {
   2395    *flags |= SfMayBlock;
   2396 #if VG_WORDSIZE == 4
   2397    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
   2398          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
   2399    PRE_REG_READ5(ssize_t, "pwrite64",
   2400                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2401                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2402 #elif VG_WORDSIZE == 8
   2403    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
   2404          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
   2405    PRE_REG_READ4(ssize_t, "pwrite64",
   2406                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2407                  Word, offset);
   2408 #else
   2409 #  error Unexpected word size
   2410 #endif
   2411    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
   2412 }
   2413 
   2414 PRE(sys_sync)
   2415 {
   2416    *flags |= SfMayBlock;
   2417    PRINT("sys_sync ( )");
   2418    PRE_REG_READ0(long, "sync");
   2419 }
   2420 
   2421 PRE(sys_fstatfs)
   2422 {
   2423    FUSE_COMPATIBLE_MAY_BLOCK();
   2424    PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
   2425    PRE_REG_READ2(long, "fstatfs",
   2426                  unsigned int, fd, struct statfs *, buf);
   2427    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
   2428 }
   2429 
   2430 POST(sys_fstatfs)
   2431 {
   2432    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   2433 }
   2434 
   2435 PRE(sys_fstatfs64)
   2436 {
   2437    FUSE_COMPATIBLE_MAY_BLOCK();
   2438    PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
   2439    PRE_REG_READ3(long, "fstatfs64",
   2440                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
   2441    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
   2442 }
   2443 POST(sys_fstatfs64)
   2444 {
   2445    POST_MEM_WRITE( ARG3, ARG2 );
   2446 }
   2447 
   2448 PRE(sys_getsid)
   2449 {
   2450    PRINT("sys_getsid ( %ld )", ARG1);
   2451    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
   2452 }
   2453 
   2454 PRE(sys_pread64)
   2455 {
   2456    *flags |= SfMayBlock;
   2457 #if VG_WORDSIZE == 4
   2458    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
   2459          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
   2460    PRE_REG_READ5(ssize_t, "pread64",
   2461                  unsigned int, fd, char *, buf, vki_size_t, count,
   2462                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2463 #elif VG_WORDSIZE == 8
   2464    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
   2465          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
   2466    PRE_REG_READ4(ssize_t, "pread64",
   2467                  unsigned int, fd, char *, buf, vki_size_t, count,
   2468                  Word, offset);
   2469 #else
   2470 #  error Unexpected word size
   2471 #endif
   2472    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
   2473 }
   2474 POST(sys_pread64)
   2475 {
   2476    vg_assert(SUCCESS);
   2477    if (RES > 0) {
   2478       POST_MEM_WRITE( ARG2, RES );
   2479    }
   2480 }
   2481 
   2482 PRE(sys_mknod)
   2483 {
   2484    FUSE_COMPATIBLE_MAY_BLOCK();
   2485    PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
   2486    PRE_REG_READ3(long, "mknod",
   2487                  const char *, pathname, int, mode, unsigned, dev);
   2488    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
   2489 }
   2490 
   2491 PRE(sys_flock)
   2492 {
   2493    *flags |= SfMayBlock;
   2494    PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
   2495    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
   2496 }
   2497 
   2498 // Pre_read a char** argument.
   2499 static void pre_argv_envp(Addr a, ThreadId tid, Char* s1, Char* s2)
   2500 {
   2501    while (True) {
   2502       Addr a_deref;
   2503       Addr* a_p = (Addr*)a;
   2504       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
   2505       a_deref = *a_p;
   2506       if (0 == a_deref)
   2507          break;
   2508       PRE_MEM_RASCIIZ( s2, a_deref );
   2509       a += sizeof(char*);
   2510    }
   2511 }
   2512 
   2513 static Bool i_am_the_only_thread ( void )
   2514 {
   2515    Int c = VG_(count_living_threads)();
   2516    vg_assert(c >= 1); /* stay sane */
   2517    return c == 1;
   2518 }
   2519 
   2520 /* Wait until all other threads disappear. */
   2521 void VG_(reap_threads)(ThreadId self)
   2522 {
   2523    while (!i_am_the_only_thread()) {
   2524       /* Let other thread(s) run */
   2525       VG_(vg_yield)();
   2526       VG_(poll_signals)(self);
   2527    }
   2528    vg_assert(i_am_the_only_thread());
   2529 }
   2530 
   2531 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
   2532 // but it seems to work nonetheless...
   2533 PRE(sys_execve)
   2534 {
   2535    Char*        path = NULL;       /* path to executable */
   2536    Char**       envp = NULL;
   2537    Char**       argv = NULL;
   2538    Char**       arg2copy;
   2539    Char*        launcher_basename = NULL;
   2540    ThreadState* tst;
   2541    Int          i, j, tot_args;
   2542    SysRes       res;
   2543    Bool         setuid_allowed, trace_this_child;
   2544 
   2545    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
   2546    PRE_REG_READ3(vki_off_t, "execve",
   2547                  char *, filename, char **, argv, char **, envp);
   2548    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
   2549    if (ARG2 != 0)
   2550       pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
   2551    if (ARG3 != 0)
   2552       pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
   2553 
   2554    vg_assert(VG_(is_valid_tid)(tid));
   2555    tst = VG_(get_ThreadState)(tid);
   2556 
   2557    /* Erk.  If the exec fails, then the following will have made a
   2558       mess of things which makes it hard for us to continue.  The
   2559       right thing to do is piece everything together again in
   2560       POST(execve), but that's close to impossible.  Instead, we make
   2561       an effort to check that the execve will work before actually
   2562       doing it. */
   2563 
   2564    /* Check that the name at least begins in client-accessible storage. */
   2565    if (ARG1 == 0 /* obviously bogus */
   2566        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
   2567       SET_STATUS_Failure( VKI_EFAULT );
   2568       return;
   2569    }
   2570    // debug-only printing
   2571    if (0) {
   2572       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
   2573       if (ARG2) {
   2574          VG_(printf)("ARG2 = ");
   2575          Int q;
   2576          HChar** vec = (HChar**)ARG2;
   2577          for (q = 0; vec[q]; q++)
   2578             VG_(printf)("%p(%s) ", vec[q], vec[q]);
   2579          VG_(printf)("\n");
   2580       } else {
   2581          VG_(printf)("ARG2 = null\n");
   2582       }
   2583    }
   2584 
   2585    // Decide whether or not we want to follow along
   2586    { // Make 'child_argv' be a pointer to the child's arg vector
   2587      // (skipping the exe name)
   2588      HChar** child_argv = (HChar**)ARG2;
   2589      if (child_argv && child_argv[0] == NULL)
   2590         child_argv = NULL;
   2591      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
   2592    }
   2593 
   2594    // Do the important checks:  it is a file, is executable, permissions are
   2595    // ok, etc.  We allow setuid executables to run only in the case when
   2596    // we are not simulating them, that is, they to be run natively.
   2597    setuid_allowed = trace_this_child  ? False  : True;
   2598    res = VG_(pre_exec_check)((const Char*)ARG1, NULL, setuid_allowed);
   2599    if (sr_isError(res)) {
   2600       SET_STATUS_Failure( sr_Err(res) );
   2601       return;
   2602    }
   2603 
   2604    /* If we're tracing the child, and the launcher name looks bogus
   2605       (possibly because launcher.c couldn't figure it out, see
   2606       comments therein) then we have no option but to fail. */
   2607    if (trace_this_child
   2608        && (VG_(name_of_launcher) == NULL
   2609            || VG_(name_of_launcher)[0] != '/')) {
   2610       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
   2611       return;
   2612    }
   2613 
   2614    /* After this point, we can't recover if the execve fails. */
   2615    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (Char*)ARG1);
   2616 
   2617 
   2618    // Terminate gdbserver if it is active.
   2619    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
   2620       // If the child will not be traced, we need to terminate gdbserver
   2621       // to cleanup the gdbserver resources (e.g. the FIFO files).
   2622       // If child will be traced, we also terminate gdbserver: the new
   2623       // Valgrind will start a fresh gdbserver after exec.
   2624       VG_(gdbserver) (0);
   2625    }
   2626 
   2627    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
   2628       this. (Really, nuke them all, since the new process will make
   2629       its own new thread.) */
   2630    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
   2631    VG_(reap_threads)(tid);
   2632 
   2633    // Set up the child's exe path.
   2634    //
   2635    if (trace_this_child) {
   2636 
   2637       // We want to exec the launcher.  Get its pre-remembered path.
   2638       path = VG_(name_of_launcher);
   2639       // VG_(name_of_launcher) should have been acquired by m_main at
   2640       // startup.
   2641       vg_assert(path);
   2642 
   2643       launcher_basename = VG_(strrchr)(path, '/');
   2644       if (launcher_basename == NULL || launcher_basename[1] == 0) {
   2645          launcher_basename = path;  // hmm, tres dubious
   2646       } else {
   2647          launcher_basename++;
   2648       }
   2649 
   2650    } else {
   2651       path = (Char*)ARG1;
   2652       if (VG_(clo_xml)) {
   2653         VG_(printf_xml)("\n<execv/>\n\n</valgrindoutput>\n\n");
   2654       } else {
   2655         VG_(umsg)("execv called - the tool will now quit\n");
   2656       }
   2657    }
   2658 
   2659    // Set up the child's environment.
   2660    //
   2661    // Remove the valgrind-specific stuff from the environment so the
   2662    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
   2663    // This is done unconditionally, since if we are tracing the child,
   2664    // the child valgrind will set up the appropriate client environment.
   2665    // Nb: we make a copy of the environment before trying to mangle it
   2666    // as it might be in read-only memory (this was bug #101881).
   2667    //
   2668    // Then, if tracing the child, set VALGRIND_LIB for it.
   2669    //
   2670    if (ARG3 == 0) {
   2671       envp = NULL;
   2672    } else {
   2673       envp = VG_(env_clone)( (Char**)ARG3 );
   2674       if (envp == NULL) goto hosed;
   2675       VG_(env_remove_valgrind_env_stuff)( envp );
   2676    }
   2677 
   2678    if (trace_this_child) {
   2679       // Set VALGRIND_LIB in ARG3 (the environment)
   2680       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
   2681    }
   2682 
   2683    // Set up the child's args.  If not tracing it, they are
   2684    // simply ARG2.  Otherwise, they are
   2685    //
   2686    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
   2687    //
   2688    // except that the first VG_(args_for_valgrind_noexecpass) args
   2689    // are omitted.
   2690    //
   2691    if (!trace_this_child) {
   2692       argv = (Char**)ARG2;
   2693    } else {
   2694       vg_assert( VG_(args_for_valgrind) );
   2695       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
   2696       vg_assert( VG_(args_for_valgrind_noexecpass)
   2697                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
   2698       /* how many args in total will there be? */
   2699       // launcher basename
   2700       tot_args = 1;
   2701       // V's args
   2702       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
   2703       tot_args -= VG_(args_for_valgrind_noexecpass);
   2704       // name of client exe
   2705       tot_args++;
   2706       // args for client exe, skipping [0]
   2707       arg2copy = (Char**)ARG2;
   2708       if (arg2copy && arg2copy[0]) {
   2709          for (i = 1; arg2copy[i]; i++)
   2710             tot_args++;
   2711       }
   2712       // allocate
   2713       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
   2714                           (tot_args+1) * sizeof(HChar*) );
   2715       if (argv == 0) goto hosed;
   2716       // copy
   2717       j = 0;
   2718       argv[j++] = launcher_basename;
   2719       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
   2720          if (i < VG_(args_for_valgrind_noexecpass))
   2721             continue;
   2722          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
   2723       }
   2724       argv[j++] = (Char*)ARG1;
   2725       if (arg2copy && arg2copy[0])
   2726          for (i = 1; arg2copy[i]; i++)
   2727             argv[j++] = arg2copy[i];
   2728       argv[j++] = NULL;
   2729       // check
   2730       vg_assert(j == tot_args+1);
   2731    }
   2732 
   2733    /* restore the DATA rlimit for the child */
   2734    VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
   2735 
   2736    /*
   2737       Set the signal state up for exec.
   2738 
   2739       We need to set the real signal state to make sure the exec'd
   2740       process gets SIG_IGN properly.
   2741 
   2742       Also set our real sigmask to match the client's sigmask so that
   2743       the exec'd child will get the right mask.  First we need to
   2744       clear out any pending signals so they they don't get delivered,
   2745       which would confuse things.
   2746 
   2747       XXX This is a bug - the signals should remain pending, and be
   2748       delivered to the new process after exec.  There's also a
   2749       race-condition, since if someone delivers us a signal between
   2750       the sigprocmask and the execve, we'll still get the signal. Oh
   2751       well.
   2752    */
   2753    {
   2754       vki_sigset_t allsigs;
   2755       vki_siginfo_t info;
   2756 
   2757       /* What this loop does: it queries SCSS (the signal state that
   2758          the client _thinks_ the kernel is in) by calling
   2759          VG_(do_sys_sigaction), and modifies the real kernel signal
   2760          state accordingly. */
   2761       for (i = 1; i < VG_(max_signal); i++) {
   2762          vki_sigaction_fromK_t sa_f;
   2763          vki_sigaction_toK_t   sa_t;
   2764          VG_(do_sys_sigaction)(i, NULL, &sa_f);
   2765          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
   2766          if (sa_t.ksa_handler == VKI_SIG_IGN)
   2767             VG_(sigaction)(i, &sa_t, NULL);
   2768          else {
   2769             sa_t.ksa_handler = VKI_SIG_DFL;
   2770             VG_(sigaction)(i, &sa_t, NULL);
   2771          }
   2772       }
   2773 
   2774       VG_(sigfillset)(&allsigs);
   2775       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
   2776          ;
   2777 
   2778       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
   2779    }
   2780 
   2781    if (0) {
   2782       Char **cpp;
   2783       VG_(printf)("exec: %s\n", path);
   2784       for (cpp = argv; cpp && *cpp; cpp++)
   2785          VG_(printf)("argv: %s\n", *cpp);
   2786       if (0)
   2787          for (cpp = envp; cpp && *cpp; cpp++)
   2788             VG_(printf)("env: %s\n", *cpp);
   2789    }
   2790 
   2791    SET_STATUS_from_SysRes(
   2792       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
   2793    );
   2794 
   2795    /* If we got here, then the execve failed.  We've already made way
   2796       too much of a mess to continue, so we have to abort. */
   2797   hosed:
   2798    vg_assert(FAILURE);
   2799    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
   2800                 ARG1, (char*)ARG1, ARG2, ARG3, ERR);
   2801    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
   2802                             "execve() failing, so I'm dying.\n");
   2803    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
   2804                             "or work out how to recover.\n");
   2805    VG_(exit)(101);
   2806 }
   2807 
   2808 PRE(sys_access)
   2809 {
   2810    PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   2811    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
   2812    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
   2813 }
   2814 
   2815 PRE(sys_alarm)
   2816 {
   2817    PRINT("sys_alarm ( %ld )", ARG1);
   2818    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
   2819 }
   2820 
   2821 PRE(sys_brk)
   2822 {
   2823    Addr brk_limit = VG_(brk_limit);
   2824    Addr brk_new;
   2825 
   2826    /* libc   says: int   brk(void *end_data_segment);
   2827       kernel says: void* brk(void* end_data_segment);  (more or less)
   2828 
   2829       libc returns 0 on success, and -1 (and sets errno) on failure.
   2830       Nb: if you ask to shrink the dataseg end below what it
   2831       currently is, that always succeeds, even if the dataseg end
   2832       doesn't actually change (eg. brk(0)).  Unless it seg faults.
   2833 
   2834       Kernel returns the new dataseg end.  If the brk() failed, this
   2835       will be unchanged from the old one.  That's why calling (kernel)
   2836       brk(0) gives the current dataseg end (libc brk() just returns
   2837       zero in that case).
   2838 
   2839       Both will seg fault if you shrink it back into a text segment.
   2840    */
   2841    PRINT("sys_brk ( %#lx )", ARG1);
   2842    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
   2843 
   2844    brk_new = do_brk(ARG1);
   2845    SET_STATUS_Success( brk_new );
   2846 
   2847    if (brk_new == ARG1) {
   2848       /* brk() succeeded */
   2849       if (brk_new < brk_limit) {
   2850          /* successfully shrunk the data segment. */
   2851          VG_TRACK( die_mem_brk, (Addr)ARG1,
   2852 		   brk_limit-ARG1 );
   2853       } else
   2854       if (brk_new > brk_limit) {
   2855          /* successfully grew the data segment */
   2856          VG_TRACK( new_mem_brk, brk_limit,
   2857                    ARG1-brk_limit, tid );
   2858       }
   2859    } else {
   2860       /* brk() failed */
   2861       vg_assert(brk_limit == brk_new);
   2862    }
   2863 }
   2864 
   2865 PRE(sys_chdir)
   2866 {
   2867    FUSE_COMPATIBLE_MAY_BLOCK();
   2868    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   2869    PRE_REG_READ1(long, "chdir", const char *, path);
   2870    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
   2871 }
   2872 
   2873 PRE(sys_chmod)
   2874 {
   2875    FUSE_COMPATIBLE_MAY_BLOCK();
   2876    PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   2877    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
   2878    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
   2879 }
   2880 
   2881 PRE(sys_chown)
   2882 {
   2883    FUSE_COMPATIBLE_MAY_BLOCK();
   2884    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   2885    PRE_REG_READ3(long, "chown",
   2886                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   2887    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
   2888 }
   2889 
   2890 PRE(sys_lchown)
   2891 {
   2892    FUSE_COMPATIBLE_MAY_BLOCK();
   2893    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   2894    PRE_REG_READ3(long, "lchown",
   2895                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   2896    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
   2897 }
   2898 
   2899 PRE(sys_close)
   2900 {
   2901    FUSE_COMPATIBLE_MAY_BLOCK();
   2902    PRINT("sys_close ( %ld )", ARG1);
   2903    PRE_REG_READ1(long, "close", unsigned int, fd);
   2904 
   2905    /* Detect and negate attempts by the client to close Valgrind's log fd */
   2906    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
   2907         /* If doing -d style logging (which is to fd=2), don't
   2908            allow that to be closed either. */
   2909         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
   2910       SET_STATUS_Failure( VKI_EBADF );
   2911 }
   2912 
   2913 POST(sys_close)
   2914 {
   2915    if (VG_(clo_track_fds)) record_fd_close(ARG1);
   2916 }
   2917 
   2918 PRE(sys_dup)
   2919 {
   2920    PRINT("sys_dup ( %ld )", ARG1);
   2921    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
   2922 }
   2923 
   2924 POST(sys_dup)
   2925 {
   2926    vg_assert(SUCCESS);
   2927    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
   2928       VG_(close)(RES);
   2929       SET_STATUS_Failure( VKI_EMFILE );
   2930    } else {
   2931       if (VG_(clo_track_fds))
   2932          ML_(record_fd_open_named)(tid, RES);
   2933    }
   2934 }
   2935 
   2936 PRE(sys_dup2)
   2937 {
   2938    PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
   2939    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
   2940    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
   2941       SET_STATUS_Failure( VKI_EBADF );
   2942 }
   2943 
   2944 POST(sys_dup2)
   2945 {
   2946    vg_assert(SUCCESS);
   2947    if (VG_(clo_track_fds))
   2948       ML_(record_fd_open_named)(tid, RES);
   2949 }
   2950 
   2951 PRE(sys_fchdir)
   2952 {
   2953    FUSE_COMPATIBLE_MAY_BLOCK();
   2954    PRINT("sys_fchdir ( %ld )", ARG1);
   2955    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
   2956 }
   2957 
   2958 PRE(sys_fchown)
   2959 {
   2960    FUSE_COMPATIBLE_MAY_BLOCK();
   2961    PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
   2962    PRE_REG_READ3(long, "fchown",
   2963                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
   2964 }
   2965 
   2966 PRE(sys_fchmod)
   2967 {
   2968    FUSE_COMPATIBLE_MAY_BLOCK();
   2969    PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
   2970    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
   2971 }
   2972 
   2973 PRE(sys_newfstat)
   2974 {
   2975    FUSE_COMPATIBLE_MAY_BLOCK();
   2976    PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
   2977    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
   2978    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
   2979 }
   2980 
   2981 POST(sys_newfstat)
   2982 {
   2983    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   2984 }
   2985 
   2986 static vki_sigset_t fork_saved_mask;
   2987 
   2988 // In Linux, the sys_fork() function varies across architectures, but we
   2989 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
   2990 PRE(sys_fork)
   2991 {
   2992    Bool is_child;
   2993    Int child_pid;
   2994    vki_sigset_t mask;
   2995 
   2996    PRINT("sys_fork ( )");
   2997    PRE_REG_READ0(long, "fork");
   2998 
   2999    /* Block all signals during fork, so that we can fix things up in
   3000       the child without being interrupted. */
   3001    VG_(sigfillset)(&mask);
   3002    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
   3003 
   3004    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
   3005 
   3006    if (!SUCCESS) return;
   3007 
   3008 #if defined(VGO_linux)
   3009    // RES is 0 for child, non-0 (the child's PID) for parent.
   3010    is_child = ( RES == 0 ? True : False );
   3011    child_pid = ( is_child ? -1 : RES );
   3012 #elif defined(VGO_darwin)
   3013    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
   3014    is_child = RESHI;
   3015    child_pid = RES;
   3016 #else
   3017 #  error Unknown OS
   3018 #endif
   3019 
   3020    VG_(do_atfork_pre)(tid);
   3021 
   3022    if (is_child) {
   3023       VG_(do_atfork_child)(tid);
   3024 
   3025       /* restore signal mask */
   3026       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3027 
   3028       /* If --child-silent-after-fork=yes was specified, set the
   3029          output file descriptors to 'impossible' values.  This is
   3030          noticed by send_bytes_to_logging_sink in m_libcprint.c, which
   3031          duly stops writing any further output. */
   3032       if (VG_(clo_child_silent_after_fork)) {
   3033          if (!VG_(log_output_sink).is_socket)
   3034             VG_(log_output_sink).fd = -1;
   3035          if (!VG_(xml_output_sink).is_socket)
   3036             VG_(xml_output_sink).fd = -1;
   3037       }
   3038 
   3039    } else {
   3040       VG_(do_atfork_parent)(tid);
   3041 
   3042       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
   3043 
   3044       /* restore signal mask */
   3045       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3046    }
   3047 }
   3048 
   3049 PRE(sys_ftruncate)
   3050 {
   3051    *flags |= SfMayBlock;
   3052    PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
   3053    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
   3054 }
   3055 
   3056 PRE(sys_truncate)
   3057 {
   3058    *flags |= SfMayBlock;
   3059    PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3060    PRE_REG_READ2(long, "truncate",
   3061                  const char *, path, unsigned long, length);
   3062    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
   3063 }
   3064 
   3065 PRE(sys_ftruncate64)
   3066 {
   3067    *flags |= SfMayBlock;
   3068 #if VG_WORDSIZE == 4
   3069    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
   3070    PRE_REG_READ3(long, "ftruncate64",
   3071                  unsigned int, fd,
   3072                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3073 #else
   3074    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
   3075    PRE_REG_READ2(long, "ftruncate64",
   3076                  unsigned int,fd, UWord,length);
   3077 #endif
   3078 }
   3079 
   3080 PRE(sys_truncate64)
   3081 {
   3082    *flags |= SfMayBlock;
   3083 #if VG_WORDSIZE == 4
   3084    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
   3085    PRE_REG_READ3(long, "truncate64",
   3086                  const char *, path,
   3087                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3088 #else
   3089    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
   3090    PRE_REG_READ2(long, "truncate64",
   3091                  const char *,path, UWord,length);
   3092 #endif
   3093    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
   3094 }
   3095 
   3096 PRE(sys_getdents)
   3097 {
   3098    *flags |= SfMayBlock;
   3099    PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
   3100    PRE_REG_READ3(long, "getdents",
   3101                  unsigned int, fd, struct linux_dirent *, dirp,
   3102                  unsigned int, count);
   3103    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
   3104 }
   3105 
   3106 POST(sys_getdents)
   3107 {
   3108    vg_assert(SUCCESS);
   3109    if (RES > 0)
   3110       POST_MEM_WRITE( ARG2, RES );
   3111 }
   3112 
   3113 PRE(sys_getdents64)
   3114 {
   3115    *flags |= SfMayBlock;
   3116    PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
   3117    PRE_REG_READ3(long, "getdents64",
   3118                  unsigned int, fd, struct linux_dirent64 *, dirp,
   3119                  unsigned int, count);
   3120    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
   3121 }
   3122 
   3123 POST(sys_getdents64)
   3124 {
   3125    vg_assert(SUCCESS);
   3126    if (RES > 0)
   3127       POST_MEM_WRITE( ARG2, RES );
   3128 }
   3129 
   3130 PRE(sys_getgroups)
   3131 {
   3132    PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
   3133    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
   3134    if (ARG1 > 0)
   3135       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   3136 }
   3137 
   3138 POST(sys_getgroups)
   3139 {
   3140    vg_assert(SUCCESS);
   3141    if (ARG1 > 0 && RES > 0)
   3142       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
   3143 }
   3144 
   3145 PRE(sys_getcwd)
   3146 {
   3147    // Comment from linux/fs/dcache.c:
   3148    //   NOTE! The user-level library version returns a character pointer.
   3149    //   The kernel system call just returns the length of the buffer filled
   3150    //   (which includes the ending '\0' character), or a negative error
   3151    //   value.
   3152    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
   3153    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3154    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
   3155    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
   3156 }
   3157 
   3158 POST(sys_getcwd)
   3159 {
   3160    vg_assert(SUCCESS);
   3161    if (RES != (Addr)NULL)
   3162       POST_MEM_WRITE( ARG1, RES );
   3163 }
   3164 
   3165 PRE(sys_geteuid)
   3166 {
   3167    PRINT("sys_geteuid ( )");
   3168    PRE_REG_READ0(long, "geteuid");
   3169 }
   3170 
   3171 PRE(sys_getegid)
   3172 {
   3173    PRINT("sys_getegid ( )");
   3174    PRE_REG_READ0(long, "getegid");
   3175 }
   3176 
   3177 PRE(sys_getgid)
   3178 {
   3179    PRINT("sys_getgid ( )");
   3180    PRE_REG_READ0(long, "getgid");
   3181 }
   3182 
   3183 PRE(sys_getpid)
   3184 {
   3185    PRINT("sys_getpid ()");
   3186    PRE_REG_READ0(long, "getpid");
   3187 }
   3188 
   3189 PRE(sys_getpgid)
   3190 {
   3191    PRINT("sys_getpgid ( %ld )", ARG1);
   3192    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
   3193 }
   3194 
   3195 PRE(sys_getpgrp)
   3196 {
   3197    PRINT("sys_getpgrp ()");
   3198    PRE_REG_READ0(long, "getpgrp");
   3199 }
   3200 
   3201 PRE(sys_getppid)
   3202 {
   3203    PRINT("sys_getppid ()");
   3204    PRE_REG_READ0(long, "getppid");
   3205 }
   3206 
   3207 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
   3208 {
   3209    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
   3210 
   3211 #ifdef _RLIMIT_POSIX_FLAG
   3212    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
   3213    // Unset it here to make the switch case below work correctly.
   3214    a1 &= ~_RLIMIT_POSIX_FLAG;
   3215 #endif
   3216 
   3217    switch (a1) {
   3218    case VKI_RLIMIT_NOFILE:
   3219       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
   3220       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
   3221       break;
   3222 
   3223    case VKI_RLIMIT_DATA:
   3224       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
   3225       break;
   3226 
   3227    case VKI_RLIMIT_STACK:
   3228       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
   3229       break;
   3230    }
   3231 }
   3232 
   3233 PRE(sys_old_getrlimit)
   3234 {
   3235    PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
   3236    PRE_REG_READ2(long, "old_getrlimit",
   3237                  unsigned int, resource, struct rlimit *, rlim);
   3238    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3239 }
   3240 
   3241 POST(sys_old_getrlimit)
   3242 {
   3243    common_post_getrlimit(tid, ARG1, ARG2);
   3244 }
   3245 
   3246 PRE(sys_getrlimit)
   3247 {
   3248    PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
   3249    PRE_REG_READ2(long, "getrlimit",
   3250                  unsigned int, resource, struct rlimit *, rlim);
   3251    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3252 }
   3253 
   3254 POST(sys_getrlimit)
   3255 {
   3256    common_post_getrlimit(tid, ARG1, ARG2);
   3257 }
   3258 
   3259 PRE(sys_getrusage)
   3260 {
   3261    PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
   3262    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
   3263    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
   3264 }
   3265 
   3266 POST(sys_getrusage)
   3267 {
   3268    vg_assert(SUCCESS);
   3269    if (RES == 0)
   3270       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
   3271 }
   3272 
   3273 PRE(sys_gettimeofday)
   3274 {
   3275    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3276    PRE_REG_READ2(long, "gettimeofday",
   3277                  struct timeval *, tv, struct timezone *, tz);
   3278    // GrP fixme does darwin write to *tz anymore?
   3279    if (ARG1 != 0)
   3280       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
   3281    if (ARG2 != 0)
   3282       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3283 }
   3284 
   3285 POST(sys_gettimeofday)
   3286 {
   3287    vg_assert(SUCCESS);
   3288    if (RES == 0) {
   3289       if (ARG1 != 0)
   3290          POST_timeval_WRITE( ARG1 );
   3291       if (ARG2 != 0)
   3292 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
   3293    }
   3294 }
   3295 
   3296 PRE(sys_settimeofday)
   3297 {
   3298    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3299    PRE_REG_READ2(long, "settimeofday",
   3300                  struct timeval *, tv, struct timezone *, tz);
   3301    if (ARG1 != 0)
   3302       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
   3303    if (ARG2 != 0) {
   3304       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3305       /* maybe should warn if tz->tz_dsttime is non-zero? */
   3306    }
   3307 }
   3308 
   3309 PRE(sys_getuid)
   3310 {
   3311    PRINT("sys_getuid ( )");
   3312    PRE_REG_READ0(long, "getuid");
   3313 }
   3314 
   3315 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
   3316 {
   3317    /* We don't have any specific information on it, so
   3318       try to do something reasonable based on direction and
   3319       size bits.  The encoding scheme is described in
   3320       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3321 
   3322       According to Simon Hausmann, _IOC_READ means the kernel
   3323       writes a value to the ioctl value passed from the user
   3324       space and the other way around with _IOC_WRITE. */
   3325 
   3326    UInt dir  = _VKI_IOC_DIR(request);
   3327    UInt size = _VKI_IOC_SIZE(request);
   3328    if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
   3329       /*
   3330        * Be very lax about ioctl handling; the only
   3331        * assumption is that the size is correct. Doesn't
   3332        * require the full buffer to be initialized when
   3333        * writing.  Without this, using some device
   3334        * drivers with a large number of strange ioctl
   3335        * commands becomes very tiresome.
   3336        */
   3337    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
   3338       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
   3339       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3340       static Int moans = 3;
   3341       if (moans > 0 && !VG_(clo_xml)) {
   3342          moans--;
   3343          VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
   3344                    " with no size/direction hints\n", request);
   3345          VG_(umsg)("   This could cause spurious value errors to appear.\n");
   3346          VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
   3347                    "guidance on writing a proper wrapper.\n" );
   3348       }
   3349    } else {
   3350       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
   3351       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3352       if ((dir & _VKI_IOC_WRITE) && size > 0)
   3353          PRE_MEM_READ( "ioctl(generic)", arg, size);
   3354       if ((dir & _VKI_IOC_READ) && size > 0)
   3355          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
   3356    }
   3357 }
   3358 
   3359 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
   3360 {
   3361    /* We don't have any specific information on it, so
   3362       try to do something reasonable based on direction and
   3363       size bits.  The encoding scheme is described in
   3364       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3365 
   3366       According to Simon Hausmann, _IOC_READ means the kernel
   3367       writes a value to the ioctl value passed from the user
   3368       space and the other way around with _IOC_WRITE. */
   3369 
   3370    UInt dir  = _VKI_IOC_DIR(request);
   3371    UInt size = _VKI_IOC_SIZE(request);
   3372    if (size > 0 && (dir & _VKI_IOC_READ)
   3373        && res == 0
   3374        && arg != (Addr)NULL)
   3375    {
   3376       POST_MEM_WRITE(arg, size);
   3377    }
   3378 }
   3379 
   3380 /*
   3381    If we're sending a SIGKILL to one of our own threads, then simulate
   3382    it rather than really sending the signal, so that the target thread
   3383    gets a chance to clean up.  Returns True if we did the killing (or
   3384    no killing is necessary), and False if the caller should use the
   3385    normal kill syscall.
   3386 
   3387    "pid" is any pid argument which can be passed to kill; group kills
   3388    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
   3389    they'll most likely hit all the threads and we won't need to worry
   3390    about cleanup.  In truth, we can't fully emulate these multicast
   3391    kills.
   3392 
   3393    "tgid" is a thread group id.  If it is not -1, then the target
   3394    thread must be in that thread group.
   3395  */
   3396 Bool ML_(do_sigkill)(Int pid, Int tgid)
   3397 {
   3398    ThreadState *tst;
   3399    ThreadId tid;
   3400 
   3401    if (pid <= 0)
   3402       return False;
   3403 
   3404    tid = VG_(lwpid_to_vgtid)(pid);
   3405    if (tid == VG_INVALID_THREADID)
   3406       return False;		/* none of our threads */
   3407 
   3408    tst = VG_(get_ThreadState)(tid);
   3409    if (tst == NULL || tst->status == VgTs_Empty)
   3410       return False;		/* hm, shouldn't happen */
   3411 
   3412    if (tgid != -1 && tst->os_state.threadgroup != tgid)
   3413       return False;		/* not the right thread group */
   3414 
   3415    /* Check to see that the target isn't already exiting. */
   3416    if (!VG_(is_exiting)(tid)) {
   3417       if (VG_(clo_trace_signals))
   3418 	 VG_(message)(Vg_DebugMsg,
   3419                       "Thread %d being killed with SIGKILL\n",
   3420                       tst->tid);
   3421 
   3422       tst->exitreason = VgSrc_FatalSig;
   3423       tst->os_state.fatalsig = VKI_SIGKILL;
   3424 
   3425       if (!VG_(is_running_thread)(tid))
   3426 	 VG_(get_thread_out_of_syscall)(tid);
   3427    }
   3428 
   3429    return True;
   3430 }
   3431 
   3432 PRE(sys_kill)
   3433 {
   3434    PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
   3435    PRE_REG_READ2(long, "kill", int, pid, int, sig);
   3436    if (!ML_(client_signal_OK)(ARG2)) {
   3437       SET_STATUS_Failure( VKI_EINVAL );
   3438       return;
   3439    }
   3440 
   3441    /* If we're sending SIGKILL, check to see if the target is one of
   3442       our threads and handle it specially. */
   3443    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
   3444       SET_STATUS_Success(0);
   3445    else
   3446       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
   3447          affecting how posix-compliant the call is.  I guess it is
   3448          harmless to pass the 3rd arg on other platforms; hence pass
   3449          it on all. */
   3450       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
   3451 
   3452    if (VG_(clo_trace_signals))
   3453       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
   3454 		   ARG2, ARG1);
   3455 
   3456    /* This kill might have given us a pending signal.  Ask for a check once
   3457       the syscall is done. */
   3458    *flags |= SfPollAfter;
   3459 }
   3460 
   3461 PRE(sys_link)
   3462 {
   3463    *flags |= SfMayBlock;
   3464    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   3465    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
   3466    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
   3467    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
   3468 }
   3469 
   3470 PRE(sys_newlstat)
   3471 {
   3472    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   3473    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
   3474    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
   3475    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
   3476 }
   3477 
   3478 POST(sys_newlstat)
   3479 {
   3480    vg_assert(SUCCESS);
   3481    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3482 }
   3483 
   3484 PRE(sys_mkdir)
   3485 {
   3486    *flags |= SfMayBlock;
   3487    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3488    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
   3489    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
   3490 }
   3491 
   3492 PRE(sys_mprotect)
   3493 {
   3494    PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   3495    PRE_REG_READ3(long, "mprotect",
   3496                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
   3497 
   3498    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
   3499       SET_STATUS_Failure( VKI_ENOMEM );
   3500    }
   3501 #if defined(VKI_PROT_GROWSDOWN)
   3502    else
   3503    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
   3504       /* Deal with mprotects on growable stack areas.
   3505 
   3506          The critical files to understand all this are mm/mprotect.c
   3507          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
   3508          glibc.
   3509 
   3510          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
   3511          round the start/end address of mprotect to the start/end of
   3512          the underlying vma and glibc uses that as an easy way to
   3513          change the protection of the stack by calling mprotect on the
   3514          last page of the stack with PROT_GROWSDOWN set.
   3515 
   3516          The sanity check provided by the kernel is that the vma must
   3517          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
   3518       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
   3519       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
   3520       NSegment const *rseg;
   3521 
   3522       vg_assert(aseg);
   3523 
   3524       if (grows == VKI_PROT_GROWSDOWN) {
   3525          rseg = VG_(am_next_nsegment)( (NSegment*)aseg, False/*backwards*/ );
   3526          if (rseg &&
   3527              rseg->kind == SkResvn &&
   3528              rseg->smode == SmUpper &&
   3529              rseg->end+1 == aseg->start) {
   3530             Addr end = ARG1 + ARG2;
   3531             ARG1 = aseg->start;
   3532             ARG2 = end - aseg->start;
   3533             ARG3 &= ~VKI_PROT_GROWSDOWN;
   3534          } else {
   3535             SET_STATUS_Failure( VKI_EINVAL );
   3536          }
   3537       } else if (grows == VKI_PROT_GROWSUP) {
   3538          rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
   3539          if (rseg &&
   3540              rseg->kind == SkResvn &&
   3541              rseg->smode == SmLower &&
   3542              aseg->end+1 == rseg->start) {
   3543             ARG2 = aseg->end - ARG1 + 1;
   3544             ARG3 &= ~VKI_PROT_GROWSUP;
   3545          } else {
   3546             SET_STATUS_Failure( VKI_EINVAL );
   3547          }
   3548       } else {
   3549          /* both GROWSUP and GROWSDOWN */
   3550          SET_STATUS_Failure( VKI_EINVAL );
   3551       }
   3552    }
   3553 #endif   // defined(VKI_PROT_GROWSDOWN)
   3554 }
   3555 
   3556 POST(sys_mprotect)
   3557 {
   3558    Addr a    = ARG1;
   3559    SizeT len = ARG2;
   3560    Int  prot = ARG3;
   3561 
   3562    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
   3563 }
   3564 
   3565 PRE(sys_munmap)
   3566 {
   3567    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
   3568    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3569    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
   3570 
   3571    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
   3572       SET_STATUS_Failure( VKI_EINVAL );
   3573 }
   3574 
   3575 POST(sys_munmap)
   3576 {
   3577    Addr  a   = ARG1;
   3578    SizeT len = ARG2;
   3579 
   3580    ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
   3581 }
   3582 
   3583 PRE(sys_mincore)
   3584 {
   3585    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
   3586    PRE_REG_READ3(long, "mincore",
   3587                  unsigned long, start, vki_size_t, length,
   3588                  unsigned char *, vec);
   3589    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3590 }
   3591 POST(sys_mincore)
   3592 {
   3593    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3594 }
   3595 
   3596 PRE(sys_nanosleep)
   3597 {
   3598    *flags |= SfMayBlock|SfPostOnFail;
   3599    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
   3600    PRE_REG_READ2(long, "nanosleep",
   3601                  struct timespec *, req, struct timespec *, rem);
   3602    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
   3603    if (ARG2 != 0)
   3604       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
   3605 }
   3606 
   3607 POST(sys_nanosleep)
   3608 {
   3609    vg_assert(SUCCESS || FAILURE);
   3610    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
   3611       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
   3612 }
   3613 
   3614 PRE(sys_open)
   3615 {
   3616    if (ARG2 & VKI_O_CREAT) {
   3617       // 3-arg version
   3618       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
   3619       PRE_REG_READ3(long, "open",
   3620                     const char *, filename, int, flags, int, mode);
   3621    } else {
   3622       // 2-arg version
   3623       PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
   3624       PRE_REG_READ2(long, "open",
   3625                     const char *, filename, int, flags);
   3626    }
   3627    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
   3628 
   3629 #if defined(VGO_linux)
   3630    /* Handle the case where the open is of /proc/self/cmdline or
   3631       /proc/<pid>/cmdline, and just give it a copy of the fd for the
   3632       fake file we cooked up at startup (in m_main).  Also, seek the
   3633       cloned fd back to the start. */
   3634    {
   3635       HChar  name[30];
   3636       Char*  arg1s = (Char*) ARG1;
   3637       SysRes sres;
   3638 
   3639       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
   3640       if (ML_(safe_to_deref)( arg1s, 1 ) &&
   3641           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
   3642          )
   3643       {
   3644          sres = VG_(dup)( VG_(cl_cmdline_fd) );
   3645          SET_STATUS_from_SysRes( sres );
   3646          if (!sr_isError(sres)) {
   3647             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
   3648             if (off < 0)
   3649                SET_STATUS_Failure( VKI_EMFILE );
   3650          }
   3651          return;
   3652       }
   3653    }
   3654 #endif // defined(VGO_linux)
   3655 
   3656    /* Otherwise handle normally */
   3657    *flags |= SfMayBlock;
   3658 }
   3659 
   3660 POST(sys_open)
   3661 {
   3662    vg_assert(SUCCESS);
   3663    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
   3664       VG_(close)(RES);
   3665       SET_STATUS_Failure( VKI_EMFILE );
   3666    } else {
   3667       if (VG_(clo_track_fds))
   3668          ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
   3669    }
   3670 }
   3671 
   3672 PRE(sys_read)
   3673 {
   3674    *flags |= SfMayBlock;
   3675    PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
   3676    PRE_REG_READ3(ssize_t, "read",
   3677                  unsigned int, fd, char *, buf, vki_size_t, count);
   3678 
   3679    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
   3680       SET_STATUS_Failure( VKI_EBADF );
   3681    else
   3682       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
   3683 }
   3684 
   3685 POST(sys_read)
   3686 {
   3687    vg_assert(SUCCESS);
   3688    POST_MEM_WRITE( ARG2, RES );
   3689 }
   3690 
   3691 PRE(sys_write)
   3692 {
   3693    Bool ok;
   3694    *flags |= SfMayBlock;
   3695    PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
   3696    PRE_REG_READ3(ssize_t, "write",
   3697                  unsigned int, fd, const char *, buf, vki_size_t, count);
   3698    /* check to see if it is allowed.  If not, try for an exemption from
   3699       --sim-hints=enable-outer (used for self hosting). */
   3700    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
   3701    if (!ok && ARG1 == 2/*stderr*/
   3702            && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
   3703       ok = True;
   3704    if (!ok)
   3705       SET_STATUS_Failure( VKI_EBADF );
   3706    else
   3707       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
   3708 }
   3709 
   3710 PRE(sys_creat)
   3711 {
   3712    *flags |= SfMayBlock;
   3713    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3714    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
   3715    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
   3716 }
   3717 
   3718 POST(sys_creat)
   3719 {
   3720    vg_assert(SUCCESS);
   3721    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
   3722       VG_(close)(RES);
   3723       SET_STATUS_Failure( VKI_EMFILE );
   3724    } else {
   3725       if (VG_(clo_track_fds))
   3726          ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
   3727    }
   3728 }
   3729 
   3730 PRE(sys_poll)
   3731 {
   3732    /* struct pollfd {
   3733         int fd;           -- file descriptor
   3734         short events;     -- requested events
   3735         short revents;    -- returned events
   3736       };
   3737       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
   3738    */
   3739    UInt i;
   3740    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   3741    *flags |= SfMayBlock;
   3742    PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
   3743    PRE_REG_READ3(long, "poll",
   3744                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
   3745 
   3746    for (i = 0; i < ARG2; i++) {
   3747       PRE_MEM_READ( "poll(ufds.fd)",
   3748                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
   3749       PRE_MEM_READ( "poll(ufds.events)",
   3750                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
   3751       PRE_MEM_WRITE( "poll(ufds.reventss)",
   3752                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   3753    }
   3754 }
   3755 
   3756 POST(sys_poll)
   3757 {
   3758    if (RES >= 0) {
   3759       UInt i;
   3760       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   3761       for (i = 0; i < ARG2; i++)
   3762 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   3763    }
   3764 }
   3765 
   3766 PRE(sys_readlink)
   3767 {
   3768    FUSE_COMPATIBLE_MAY_BLOCK();
   3769    Word saved = SYSNO;
   3770 
   3771    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
   3772    PRE_REG_READ3(long, "readlink",
   3773                  const char *, path, char *, buf, int, bufsiz);
   3774    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
   3775    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
   3776 
   3777    {
   3778 #if defined(VGO_linux)
   3779       /*
   3780        * Handle the case where readlink is looking at /proc/self/exe or
   3781        * /proc/<pid>/exe.
   3782        */
   3783       HChar name[25];
   3784       Char* arg1s = (Char*) ARG1;
   3785       VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
   3786       if (ML_(safe_to_deref)(arg1s, 1) &&
   3787           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
   3788          )
   3789       {
   3790          VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
   3791          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
   3792                                                          ARG2, ARG3));
   3793       } else
   3794 #endif // defined(VGO_linux)
   3795       {
   3796          /* Normal case */
   3797          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
   3798       }
   3799    }
   3800 
   3801    if (SUCCESS && RES > 0)
   3802       POST_MEM_WRITE( ARG2, RES );
   3803 }
   3804 
   3805 PRE(sys_readv)
   3806 {
   3807    Int i;
   3808    struct vki_iovec * vec;
   3809    *flags |= SfMayBlock;
   3810    PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
   3811    PRE_REG_READ3(ssize_t, "readv",
   3812                  unsigned long, fd, const struct iovec *, vector,
   3813                  unsigned long, count);
   3814    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
   3815       SET_STATUS_Failure( VKI_EBADF );
   3816    } else {
   3817       PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
   3818 
   3819       if (ARG2 != 0) {
   3820          /* ToDo: don't do any of the following if the vector is invalid */
   3821          vec = (struct vki_iovec *)ARG2;
   3822          for (i = 0; i < (Int)ARG3; i++)
   3823             PRE_MEM_WRITE( "readv(vector[...])",
   3824                            (Addr)vec[i].iov_base, vec[i].iov_len );
   3825       }
   3826    }
   3827 }
   3828 
   3829 POST(sys_readv)
   3830 {
   3831    vg_assert(SUCCESS);
   3832    if (RES > 0) {
   3833       Int i;
   3834       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
   3835       Int remains = RES;
   3836 
   3837       /* RES holds the number of bytes read. */
   3838       for (i = 0; i < (Int)ARG3; i++) {
   3839 	 Int nReadThisBuf = vec[i].iov_len;
   3840 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
   3841 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
   3842 	 remains -= nReadThisBuf;
   3843 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
   3844       }
   3845    }
   3846 }
   3847 
   3848 PRE(sys_rename)
   3849 {
   3850    FUSE_COMPATIBLE_MAY_BLOCK();
   3851    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   3852    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
   3853    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
   3854    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
   3855 }
   3856 
   3857 PRE(sys_rmdir)
   3858 {
   3859    *flags |= SfMayBlock;
   3860    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   3861    PRE_REG_READ1(long, "rmdir", const char *, pathname);
   3862    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
   3863 }
   3864 
   3865 PRE(sys_select)
   3866 {
   3867    *flags |= SfMayBlock;
   3868    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
   3869    PRE_REG_READ5(long, "select",
   3870                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
   3871                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
   3872    // XXX: this possibly understates how much memory is read.
   3873    if (ARG2 != 0)
   3874       PRE_MEM_READ( "select(readfds)",
   3875 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
   3876    if (ARG3 != 0)
   3877       PRE_MEM_READ( "select(writefds)",
   3878 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
   3879    if (ARG4 != 0)
   3880       PRE_MEM_READ( "select(exceptfds)",
   3881 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
   3882    if (ARG5 != 0)
   3883       PRE_timeval_READ( "select(timeout)", ARG5 );
   3884 }
   3885 
   3886 PRE(sys_setgid)
   3887 {
   3888    PRINT("sys_setgid ( %ld )", ARG1);
   3889    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
   3890 }
   3891 
   3892 PRE(sys_setsid)
   3893 {
   3894    PRINT("sys_setsid ( )");
   3895    PRE_REG_READ0(long, "setsid");
   3896 }
   3897 
   3898 PRE(sys_setgroups)
   3899 {
   3900    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
   3901    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
   3902    if (ARG1 > 0)
   3903       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   3904 }
   3905 
   3906 PRE(sys_setpgid)
   3907 {
   3908    PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
   3909    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
   3910 }
   3911 
   3912 PRE(sys_setregid)
   3913 {
   3914    PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
   3915    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
   3916 }
   3917 
   3918 PRE(sys_setreuid)
   3919 {
   3920    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
   3921    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
   3922 }
   3923 
   3924 PRE(sys_setrlimit)
   3925 {
   3926    UWord arg1 = ARG1;
   3927    PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
   3928    PRE_REG_READ2(long, "setrlimit",
   3929                  unsigned int, resource, struct rlimit *, rlim);
   3930    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3931 
   3932 #ifdef _RLIMIT_POSIX_FLAG
   3933    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
   3934    // Unset it here to make the if statements below work correctly.
   3935    arg1 &= ~_RLIMIT_POSIX_FLAG;
   3936 #endif
   3937 
   3938    if (arg1 == VKI_RLIMIT_NOFILE) {
   3939       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
   3940           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
   3941          SET_STATUS_Failure( VKI_EPERM );
   3942       }
   3943       else {
   3944          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
   3945          SET_STATUS_Success( 0 );
   3946       }
   3947    }
   3948    else if (arg1 == VKI_RLIMIT_DATA) {
   3949       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
   3950           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
   3951          SET_STATUS_Failure( VKI_EPERM );
   3952       }
   3953       else {
   3954          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
   3955          SET_STATUS_Success( 0 );
   3956       }
   3957    }
   3958    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
   3959       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
   3960           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
   3961          SET_STATUS_Failure( VKI_EPERM );
   3962       }
   3963       else {
   3964          VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
   3965          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
   3966          SET_STATUS_Success( 0 );
   3967       }
   3968    }
   3969 }
   3970 
   3971 PRE(sys_setuid)
   3972 {
   3973    PRINT("sys_setuid ( %ld )", ARG1);
   3974    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
   3975 }
   3976 
   3977 PRE(sys_newstat)
   3978 {
   3979    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   3980    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
   3981    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
   3982    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
   3983 }
   3984 
   3985 POST(sys_newstat)
   3986 {
   3987    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3988 }
   3989 
   3990 PRE(sys_statfs)
   3991 {
   3992    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
   3993    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
   3994    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
   3995    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
   3996 }
   3997 POST(sys_statfs)
   3998 {
   3999    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   4000 }
   4001 
   4002 PRE(sys_statfs64)
   4003 {
   4004    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
   4005    PRE_REG_READ3(long, "statfs64",
   4006                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
   4007    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
   4008    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
   4009 }
   4010 POST(sys_statfs64)
   4011 {
   4012    POST_MEM_WRITE( ARG3, ARG2 );
   4013 }
   4014 
   4015 PRE(sys_symlink)
   4016 {
   4017    *flags |= SfMayBlock;
   4018    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   4019    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
   4020    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
   4021    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
   4022 }
   4023 
   4024 PRE(sys_time)
   4025 {
   4026    /* time_t time(time_t *t); */
   4027    PRINT("sys_time ( %#lx )",ARG1);
   4028    PRE_REG_READ1(long, "time", int *, t);
   4029    if (ARG1 != 0) {
   4030       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
   4031    }
   4032 }
   4033 
   4034 POST(sys_time)
   4035 {
   4036    if (ARG1 != 0) {
   4037       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
   4038    }
   4039 }
   4040 
   4041 PRE(sys_times)
   4042 {
   4043    PRINT("sys_times ( %#lx )", ARG1);
   4044    PRE_REG_READ1(long, "times", struct tms *, buf);
   4045    if (ARG1 != 0) {
   4046       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
   4047    }
   4048 }
   4049 
   4050 POST(sys_times)
   4051 {
   4052    if (ARG1 != 0) {
   4053       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
   4054    }
   4055 }
   4056 
   4057 PRE(sys_umask)
   4058 {
   4059    PRINT("sys_umask ( %ld )", ARG1);
   4060    PRE_REG_READ1(long, "umask", int, mask);
   4061 }
   4062 
   4063 PRE(sys_unlink)
   4064 {
   4065    *flags |= SfMayBlock;
   4066    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
   4067    PRE_REG_READ1(long, "unlink", const char *, pathname);
   4068    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
   4069 }
   4070 
   4071 PRE(sys_newuname)
   4072 {
   4073    PRINT("sys_newuname ( %#lx )", ARG1);
   4074    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
   4075    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
   4076 }
   4077 
   4078 POST(sys_newuname)
   4079 {
   4080    if (ARG1 != 0) {
   4081       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
   4082    }
   4083 }
   4084 
   4085 PRE(sys_waitpid)
   4086 {
   4087    *flags |= SfMayBlock;
   4088    PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
   4089    PRE_REG_READ3(long, "waitpid",
   4090                  vki_pid_t, pid, unsigned int *, status, int, options);
   4091 
   4092    if (ARG2 != (Addr)NULL)
   4093       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
   4094 }
   4095 
   4096 POST(sys_waitpid)
   4097 {
   4098    if (ARG2 != (Addr)NULL)
   4099       POST_MEM_WRITE( ARG2, sizeof(int) );
   4100 }
   4101 
   4102 PRE(sys_wait4)
   4103 {
   4104    *flags |= SfMayBlock;
   4105    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
   4106 
   4107    PRE_REG_READ4(long, "wait4",
   4108                  vki_pid_t, pid, unsigned int *, status, int, options,
   4109                  struct rusage *, rusage);
   4110    if (ARG2 != (Addr)NULL)
   4111       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
   4112    if (ARG4 != (Addr)NULL)
   4113       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
   4114 }
   4115 
   4116 POST(sys_wait4)
   4117 {
   4118    if (ARG2 != (Addr)NULL)
   4119       POST_MEM_WRITE( ARG2, sizeof(int) );
   4120    if (ARG4 != (Addr)NULL)
   4121       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
   4122 }
   4123 
   4124 PRE(sys_writev)
   4125 {
   4126    Int i;
   4127    struct vki_iovec * vec;
   4128    *flags |= SfMayBlock;
   4129    PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
   4130    PRE_REG_READ3(ssize_t, "writev",
   4131                  unsigned long, fd, const struct iovec *, vector,
   4132                  unsigned long, count);
   4133    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
   4134       SET_STATUS_Failure( VKI_EBADF );
   4135    } else {
   4136       PRE_MEM_READ( "writev(vector)",
   4137 		     ARG2, ARG3 * sizeof(struct vki_iovec) );
   4138       if (ARG2 != 0) {
   4139          /* ToDo: don't do any of the following if the vector is invalid */
   4140          vec = (struct vki_iovec *)ARG2;
   4141          for (i = 0; i < (Int)ARG3; i++)
   4142             PRE_MEM_READ( "writev(vector[...])",
   4143                            (Addr)vec[i].iov_base, vec[i].iov_len );
   4144       }
   4145    }
   4146 }
   4147 
   4148 PRE(sys_utimes)
   4149 {
   4150    FUSE_COMPATIBLE_MAY_BLOCK();
   4151    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   4152    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
   4153    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
   4154    if (ARG2 != 0) {
   4155       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
   4156       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
   4157    }
   4158 }
   4159 
   4160 PRE(sys_acct)
   4161 {
   4162    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
   4163    PRE_REG_READ1(long, "acct", const char *, filename);
   4164    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
   4165 }
   4166 
   4167 PRE(sys_pause)
   4168 {
   4169    *flags |= SfMayBlock;
   4170    PRINT("sys_pause ( )");
   4171    PRE_REG_READ0(long, "pause");
   4172 }
   4173 
   4174 PRE(sys_sigaltstack)
   4175 {
   4176    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
   4177    PRE_REG_READ2(int, "sigaltstack",
   4178                  const vki_stack_t *, ss, vki_stack_t *, oss);
   4179    if (ARG1 != 0) {
   4180       const vki_stack_t *ss = (vki_stack_t *)ARG1;
   4181       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
   4182       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
   4183       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
   4184    }
   4185    if (ARG2 != 0) {
   4186       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
   4187    }
   4188 
   4189    SET_STATUS_from_SysRes(
   4190       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
   4191                               (vki_stack_t*)ARG2)
   4192    );
   4193 }
   4194 POST(sys_sigaltstack)
   4195 {
   4196    vg_assert(SUCCESS);
   4197    if (RES == 0 && ARG2 != 0)
   4198       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
   4199 }
   4200 
   4201 #undef PRE
   4202 #undef POST
   4203 
   4204 #endif // defined(VGO_linux) || defined(VGO_darwin)
   4205 
   4206 /*--------------------------------------------------------------------*/
   4207 /*--- end                                                          ---*/
   4208 /*--------------------------------------------------------------------*/
   4209