Home | History | Annotate | Download | only in m_syswrap
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- Wrappers for generic Unix system calls                       ---*/
      5 /*---                                            syswrap-generic.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Valgrind, a dynamic binary instrumentation
     10    framework.
     11 
     12    Copyright (C) 2000-2015 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
     34 
     35 #include "pub_core_basics.h"
     36 #include "pub_core_vki.h"
     37 #include "pub_core_vkiscnums.h"
     38 #include "pub_core_threadstate.h"
     39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
     40 #include "pub_core_aspacemgr.h"
     41 #include "pub_core_transtab.h"      // VG_(discard_translations)
     42 #include "pub_core_xarray.h"
     43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
     44 #include "pub_core_debuglog.h"
     45 #include "pub_core_errormgr.h"
     46 #include "pub_core_gdbserver.h"     // VG_(gdbserver)
     47 #include "pub_core_libcbase.h"
     48 #include "pub_core_libcassert.h"
     49 #include "pub_core_libcfile.h"
     50 #include "pub_core_libcprint.h"
     51 #include "pub_core_libcproc.h"
     52 #include "pub_core_libcsignal.h"
     53 #include "pub_core_machine.h"       // VG_(get_SP)
     54 #include "pub_core_mallocfree.h"
     55 #include "pub_core_options.h"
     56 #include "pub_core_scheduler.h"
     57 #include "pub_core_signals.h"
     58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     59 #include "pub_core_syscall.h"
     60 #include "pub_core_syswrap.h"
     61 #include "pub_core_tooliface.h"
     62 #include "pub_core_ume.h"
     63 #include "pub_core_stacks.h"
     64 
     65 #include "priv_types_n_macros.h"
     66 #include "priv_syswrap-generic.h"
     67 
     68 #include "config.h"
     69 
     70 
     71 void ML_(guess_and_register_stack) (Addr sp, ThreadState* tst)
     72 {
     73    Bool debug = False;
     74    NSegment const* seg;
     75 
     76    /* We don't really know where the client stack is, because its
     77       allocated by the client.  The best we can do is look at the
     78       memory mappings and try to derive some useful information.  We
     79       assume that sp starts near its highest possible value, and can
     80       only go down to the start of the mmaped segment. */
     81    seg = VG_(am_find_nsegment)(sp);
     82    if (seg &&
     83        VG_(am_is_valid_for_client)(sp, 1, VKI_PROT_READ | VKI_PROT_WRITE)) {
     84       tst->client_stack_highest_byte = (Addr)VG_PGROUNDUP(sp)-1;
     85       tst->client_stack_szB = tst->client_stack_highest_byte - seg->start + 1;
     86 
     87       VG_(register_stack)(seg->start, tst->client_stack_highest_byte);
     88 
     89       if (debug)
     90 	 VG_(printf)("tid %u: guessed client stack range [%#lx-%#lx]\n",
     91 		     tst->tid, seg->start, tst->client_stack_highest_byte);
     92    } else {
     93       VG_(message)(Vg_UserMsg,
     94                    "!? New thread %u starts with SP(%#lx) unmapped\n",
     95 		   tst->tid, sp);
     96       tst->client_stack_highest_byte = 0;
     97       tst->client_stack_szB  = 0;
     98    }
     99 }
    100 
    101 /* Returns True iff address range is something the client can
    102    plausibly mess with: all of it is either already belongs to the
    103    client or is free or a reservation. */
    104 
    105 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
    106                                    const HChar *syscallname)
    107 {
    108    Bool ret;
    109 
    110    if (size == 0)
    111       return True;
    112 
    113    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
    114             (start,size,VKI_PROT_NONE);
    115 
    116    if (0)
    117       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
    118 		  syscallname, start, start+size-1, (Int)ret);
    119 
    120    if (!ret && syscallname != NULL) {
    121       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
    122                                "to modify addresses %#lx-%#lx\n",
    123                                syscallname, start, start+size-1);
    124       if (VG_(clo_verbosity) > 1) {
    125          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
    126       }
    127    }
    128 
    129    return ret;
    130 }
    131 
    132 
    133 Bool ML_(client_signal_OK)(Int sigNo)
    134 {
    135    /* signal 0 is OK for kill */
    136    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
    137 
    138    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
    139 
    140    return ret;
    141 }
    142 
    143 
    144 /* Handy small function to help stop wrappers from segfaulting when
    145    presented with bogus client addresses.  Is not used for generating
    146    user-visible errors. */
    147 
    148 Bool ML_(safe_to_deref) ( const void *start, SizeT size )
    149 {
    150    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
    151 }
    152 
    153 
    154 /* ---------------------------------------------------------------------
    155    Doing mmap, mremap
    156    ------------------------------------------------------------------ */
    157 
    158 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
    159    munmap, mprotect (and mremap??) work at the page level.  So addresses
    160    and lengths must be adjusted for this. */
    161 
    162 /* Mash around start and length so that the area exactly covers
    163    an integral number of pages.  If we don't do that, memcheck's
    164    idea of addressible memory diverges from that of the
    165    kernel's, which causes the leak detector to crash. */
    166 static
    167 void page_align_addr_and_len( Addr* a, SizeT* len)
    168 {
    169    Addr ra;
    170 
    171    ra = VG_PGROUNDDN(*a);
    172    *len = VG_PGROUNDUP(*a + *len) - ra;
    173    *a = ra;
    174 }
    175 
    176 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
    177                                 UInt flags, Int fd, Off64T offset)
    178 {
    179    Bool d;
    180 
    181    /* 'a' is the return value from a real kernel mmap, hence: */
    182    vg_assert(VG_IS_PAGE_ALIGNED(a));
    183    /* whereas len is whatever the syscall supplied.  So: */
    184    len = VG_PGROUNDUP(len);
    185 
    186    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
    187 
    188    if (d)
    189       VG_(discard_translations)( a, (ULong)len,
    190                                  "notify_core_of_mmap" );
    191 }
    192 
    193 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
    194 {
    195    Bool rr, ww, xx;
    196 
    197    /* 'a' is the return value from a real kernel mmap, hence: */
    198    vg_assert(VG_IS_PAGE_ALIGNED(a));
    199    /* whereas len is whatever the syscall supplied.  So: */
    200    len = VG_PGROUNDUP(len);
    201 
    202    rr = toBool(prot & VKI_PROT_READ);
    203    ww = toBool(prot & VKI_PROT_WRITE);
    204    xx = toBool(prot & VKI_PROT_EXEC);
    205 
    206    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
    207 }
    208 
    209 
    210 /* When a client mmap has been successfully done, this function must
    211    be called.  It notifies both aspacem and the tool of the new
    212    mapping.
    213 
    214    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
    215    it is called from is POST(sys_io_setup).  In particular,
    216    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
    217    client mmap.  But it doesn't call this function; instead it does the
    218    relevant notifications itself.  Here, we just pass di_handle=0 to
    219    notify_tool_of_mmap as we have no better information.  But really this
    220    function should be done away with; problem is I don't understand what
    221    POST(sys_io_setup) does or how it works.
    222 
    223    [However, this function is used lots for Darwin, because
    224     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
    225  */
    226 void
    227 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
    228                                     UInt flags, Int fd, Off64T offset )
    229 {
    230    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
    231    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
    232    // Should it?  --njn
    233    notify_core_of_mmap(a, len, prot, flags, fd, offset);
    234    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
    235 }
    236 
    237 void
    238 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
    239 {
    240    Bool d;
    241 
    242    page_align_addr_and_len(&a, &len);
    243    d = VG_(am_notify_munmap)(a, len);
    244    VG_TRACK( die_mem_munmap, a, len );
    245    VG_(di_notify_munmap)( a, len );
    246    if (d)
    247       VG_(discard_translations)( a, (ULong)len,
    248                                  "ML_(notify_core_and_tool_of_munmap)" );
    249 }
    250 
    251 void
    252 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
    253 {
    254    Bool rr = toBool(prot & VKI_PROT_READ);
    255    Bool ww = toBool(prot & VKI_PROT_WRITE);
    256    Bool xx = toBool(prot & VKI_PROT_EXEC);
    257    Bool d;
    258 
    259    page_align_addr_and_len(&a, &len);
    260    d = VG_(am_notify_mprotect)(a, len, prot);
    261    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
    262    VG_(di_notify_mprotect)( a, len, prot );
    263    if (d)
    264       VG_(discard_translations)( a, (ULong)len,
    265                                  "ML_(notify_core_and_tool_of_mprotect)" );
    266 }
    267 
    268 
    269 
    270 #if HAVE_MREMAP
    271 /* Expand (or shrink) an existing mapping, potentially moving it at
    272    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
    273 */
    274 static
    275 SysRes do_mremap( Addr old_addr, SizeT old_len,
    276                   Addr new_addr, SizeT new_len,
    277                   UWord flags, ThreadId tid )
    278 {
    279 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
    280 
    281    Bool      ok, d;
    282    NSegment const* old_seg;
    283    Addr      advised;
    284    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
    285    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
    286 
    287    if (0)
    288       VG_(printf)("do_remap (old %#lx %lu) (new %#lx %lu) %s %s\n",
    289                   old_addr,old_len,new_addr,new_len,
    290                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
    291                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
    292    if (0)
    293       VG_(am_show_nsegments)(0, "do_remap: before");
    294 
    295    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
    296       goto eINVAL;
    297 
    298    if (!VG_IS_PAGE_ALIGNED(old_addr))
    299       goto eINVAL;
    300 
    301    old_len = VG_PGROUNDUP(old_len);
    302    new_len = VG_PGROUNDUP(new_len);
    303 
    304    if (new_len == 0)
    305       goto eINVAL;
    306 
    307    /* kernel doesn't reject this, but we do. */
    308    if (old_len == 0)
    309       goto eINVAL;
    310 
    311    /* reject wraparounds */
    312    if (old_addr + old_len < old_addr)
    313       goto eINVAL;
    314    if (f_fixed == True && new_addr + new_len < new_len)
    315       goto eINVAL;
    316 
    317    /* kernel rejects all fixed, no-move requests (which are
    318       meaningless). */
    319    if (f_fixed == True && f_maymove == False)
    320       goto eINVAL;
    321 
    322    /* Stay away from non-client areas. */
    323    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
    324       goto eINVAL;
    325 
    326    /* In all remaining cases, if the old range does not fall within a
    327       single segment, fail. */
    328    old_seg = VG_(am_find_nsegment)( old_addr );
    329    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
    330       goto eINVAL;
    331    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC &&
    332        old_seg->kind != SkShmC)
    333       goto eINVAL;
    334 
    335    vg_assert(old_len > 0);
    336    vg_assert(new_len > 0);
    337    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
    338    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
    339    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
    340 
    341    /* There are 3 remaining cases:
    342 
    343       * maymove == False
    344 
    345         new space has to be at old address, so:
    346             - shrink    -> unmap end
    347             - same size -> do nothing
    348             - grow      -> if can grow in-place, do so, else fail
    349 
    350       * maymove == True, fixed == False
    351 
    352         new space can be anywhere, so:
    353             - shrink    -> unmap end
    354             - same size -> do nothing
    355             - grow      -> if can grow in-place, do so, else
    356                            move to anywhere large enough, else fail
    357 
    358       * maymove == True, fixed == True
    359 
    360         new space must be at new address, so:
    361 
    362             - if new address is not page aligned, fail
    363             - if new address range overlaps old one, fail
    364             - if new address range cannot be allocated, fail
    365             - else move to new address range with new size
    366             - else fail
    367    */
    368 
    369    if (f_maymove == False) {
    370       /* new space has to be at old address */
    371       if (new_len < old_len)
    372          goto shrink_in_place;
    373       if (new_len > old_len)
    374          goto grow_in_place_or_fail;
    375       goto same_in_place;
    376    }
    377 
    378    if (f_maymove == True && f_fixed == False) {
    379       /* new space can be anywhere */
    380       if (new_len < old_len)
    381          goto shrink_in_place;
    382       if (new_len > old_len)
    383          goto grow_in_place_or_move_anywhere_or_fail;
    384       goto same_in_place;
    385    }
    386 
    387    if (f_maymove == True && f_fixed == True) {
    388       /* new space can only be at the new address */
    389       if (!VG_IS_PAGE_ALIGNED(new_addr))
    390          goto eINVAL;
    391       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
    392          /* no overlap */
    393       } else {
    394          goto eINVAL;
    395       }
    396       if (new_addr == 0)
    397          goto eINVAL;
    398          /* VG_(am_get_advisory_client_simple) interprets zero to mean
    399             non-fixed, which is not what we want */
    400       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
    401       if (!ok || advised != new_addr)
    402          goto eNOMEM;
    403       ok = VG_(am_relocate_nooverlap_client)
    404               ( &d, old_addr, old_len, new_addr, new_len );
    405       if (ok) {
    406          VG_TRACK( copy_mem_remap, old_addr, new_addr,
    407                                    MIN_SIZET(old_len,new_len) );
    408          if (new_len > old_len)
    409             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
    410                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
    411                       0/*di_handle*/ );
    412          VG_TRACK(die_mem_munmap, old_addr, old_len);
    413          if (d) {
    414             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
    415             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
    416          }
    417          return VG_(mk_SysRes_Success)( new_addr );
    418       }
    419       goto eNOMEM;
    420    }
    421 
    422    /* end of the 3 cases */
    423    /*NOTREACHED*/ vg_assert(0);
    424 
    425   grow_in_place_or_move_anywhere_or_fail:
    426    {
    427    /* try growing it in-place */
    428    Addr   needA = old_addr + old_len;
    429    SSizeT needL = new_len - old_len;
    430 
    431    vg_assert(needL > 0);
    432    vg_assert(needA > 0);
    433 
    434    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    435    if (ok) {
    436       /* Fixes bug #129866. */
    437       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
    438    }
    439    if (ok && advised == needA) {
    440       const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
    441       if (new_seg) {
    442          VG_TRACK( new_mem_mmap, needA, needL,
    443                                  new_seg->hasR,
    444                                  new_seg->hasW, new_seg->hasX,
    445                                  0/*di_handle*/ );
    446          return VG_(mk_SysRes_Success)( old_addr );
    447       }
    448    }
    449 
    450    /* that failed.  Look elsewhere. */
    451    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
    452    if (ok) {
    453       Bool oldR = old_seg->hasR;
    454       Bool oldW = old_seg->hasW;
    455       Bool oldX = old_seg->hasX;
    456       /* assert new area does not overlap old */
    457       vg_assert(advised+new_len-1 < old_addr
    458                 || advised > old_addr+old_len-1);
    459       ok = VG_(am_relocate_nooverlap_client)
    460               ( &d, old_addr, old_len, advised, new_len );
    461       if (ok) {
    462          VG_TRACK( copy_mem_remap, old_addr, advised,
    463                                    MIN_SIZET(old_len,new_len) );
    464          if (new_len > old_len)
    465             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
    466                       oldR, oldW, oldX, 0/*di_handle*/ );
    467          VG_TRACK(die_mem_munmap, old_addr, old_len);
    468          if (d) {
    469             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
    470             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
    471          }
    472          return VG_(mk_SysRes_Success)( advised );
    473       }
    474    }
    475    goto eNOMEM;
    476    }
    477    /*NOTREACHED*/ vg_assert(0);
    478 
    479   grow_in_place_or_fail:
    480    {
    481    Addr  needA = old_addr + old_len;
    482    SizeT needL = new_len - old_len;
    483 
    484    vg_assert(needA > 0);
    485 
    486    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    487    if (ok) {
    488       /* Fixes bug #129866. */
    489       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
    490    }
    491    if (!ok || advised != needA)
    492       goto eNOMEM;
    493    const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
    494    if (!new_seg)
    495       goto eNOMEM;
    496    VG_TRACK( new_mem_mmap, needA, needL,
    497                            new_seg->hasR, new_seg->hasW, new_seg->hasX,
    498                            0/*di_handle*/ );
    499 
    500    return VG_(mk_SysRes_Success)( old_addr );
    501    }
    502    /*NOTREACHED*/ vg_assert(0);
    503 
    504   shrink_in_place:
    505    {
    506    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
    507    if (sr_isError(sres))
    508       return sres;
    509    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
    510    if (d)
    511       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
    512                                  "do_remap(7)" );
    513    return VG_(mk_SysRes_Success)( old_addr );
    514    }
    515    /*NOTREACHED*/ vg_assert(0);
    516 
    517   same_in_place:
    518    return VG_(mk_SysRes_Success)( old_addr );
    519    /*NOTREACHED*/ vg_assert(0);
    520 
    521   eINVAL:
    522    return VG_(mk_SysRes_Error)( VKI_EINVAL );
    523   eNOMEM:
    524    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
    525 
    526 #  undef MIN_SIZET
    527 }
    528 #endif /* HAVE_MREMAP */
    529 
    530 
    531 /* ---------------------------------------------------------------------
    532    File-descriptor tracking
    533    ------------------------------------------------------------------ */
    534 
    535 /* One of these is allocated for each open file descriptor.  */
    536 typedef struct OpenFd
    537 {
    538    Int fd;                        /* The file descriptor */
    539    HChar *pathname;               /* NULL if not a regular file or unknown */
    540    ExeContext *where;             /* NULL if inherited from parent */
    541    struct OpenFd *next, *prev;
    542 } OpenFd;
    543 
    544 /* List of allocated file descriptors. */
    545 static OpenFd *allocated_fds = NULL;
    546 
    547 /* Count of open file descriptors. */
    548 static Int fd_count = 0;
    549 
    550 
    551 /* Note the fact that a file descriptor was just closed. */
    552 void ML_(record_fd_close)(Int fd)
    553 {
    554    OpenFd *i = allocated_fds;
    555 
    556    if (fd >= VG_(fd_hard_limit))
    557       return;			/* Valgrind internal */
    558 
    559    while(i) {
    560       if(i->fd == fd) {
    561          if(i->prev)
    562             i->prev->next = i->next;
    563          else
    564             allocated_fds = i->next;
    565          if(i->next)
    566             i->next->prev = i->prev;
    567          if(i->pathname)
    568             VG_(free) (i->pathname);
    569          VG_(free) (i);
    570          fd_count--;
    571          break;
    572       }
    573       i = i->next;
    574    }
    575 }
    576 
    577 /* Note the fact that a file descriptor was just opened.  If the
    578    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
    579    this either indicates a non-standard file (i.e. a pipe or socket or
    580    some such thing) or that we don't know the filename.  If the fd is
    581    already open, then we're probably doing a dup2() to an existing fd,
    582    so just overwrite the existing one. */
    583 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd,
    584                                          const HChar *pathname)
    585 {
    586    OpenFd *i;
    587 
    588    if (fd >= VG_(fd_hard_limit))
    589       return;			/* Valgrind internal */
    590 
    591    /* Check to see if this fd is already open. */
    592    i = allocated_fds;
    593    while (i) {
    594       if (i->fd == fd) {
    595          if (i->pathname) VG_(free)(i->pathname);
    596          break;
    597       }
    598       i = i->next;
    599    }
    600 
    601    /* Not already one: allocate an OpenFd */
    602    if (i == NULL) {
    603       i = VG_(malloc)("syswrap.rfdowgn.1", sizeof(OpenFd));
    604 
    605       i->prev = NULL;
    606       i->next = allocated_fds;
    607       if(allocated_fds) allocated_fds->prev = i;
    608       allocated_fds = i;
    609       fd_count++;
    610    }
    611 
    612    i->fd = fd;
    613    i->pathname = VG_(strdup)("syswrap.rfdowgn.2", pathname);
    614    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
    615 }
    616 
    617 // Record opening of an fd, and find its name.
    618 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
    619 {
    620    const HChar* buf;
    621    const HChar* name;
    622    if (VG_(resolve_filename)(fd, &buf))
    623       name = buf;
    624    else
    625       name = NULL;
    626 
    627    ML_(record_fd_open_with_given_name)(tid, fd, name);
    628 }
    629 
    630 // Record opening of a nameless fd.
    631 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
    632 {
    633    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
    634 }
    635 
    636 // Return if a given file descriptor is already recorded.
    637 Bool ML_(fd_recorded)(Int fd)
    638 {
    639    OpenFd *i = allocated_fds;
    640    while (i) {
    641       if (i->fd == fd)
    642          return True;
    643       i = i->next;
    644    }
    645    return False;
    646 }
    647 
    648 /* Returned string must not be modified nor free'd. */
    649 const HChar *ML_(find_fd_recorded_by_fd)(Int fd)
    650 {
    651    OpenFd *i = allocated_fds;
    652 
    653    while (i) {
    654       if (i->fd == fd)
    655          return i->pathname;
    656       i = i->next;
    657    }
    658 
    659    return NULL;
    660 }
    661 
    662 static
    663 HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
    664 {
    665    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
    666       VG_(sprintf)(name, "<unknown>");
    667    } else {
    668       VG_(sprintf)(name, "%s", sa->sun_path);
    669    }
    670 
    671    return name;
    672 }
    673 
    674 static
    675 HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
    676 {
    677    if (sa == NULL || len == 0) {
    678       VG_(sprintf)(name, "<unknown>");
    679    } else if (sa->sin_port == 0) {
    680       VG_(sprintf)(name, "<unbound>");
    681    } else {
    682       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
    683       VG_(sprintf)(name, "%u.%u.%u.%u:%u",
    684                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
    685                    (addr>>8) & 0xFF, addr & 0xFF,
    686                    VG_(ntohs)(sa->sin_port));
    687    }
    688 
    689    return name;
    690 }
    691 
    692 static
    693 void inet6_format(HChar *s, const UChar ip[16])
    694 {
    695    static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
    696 
    697    if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
    698       const struct vki_in_addr *sin_addr =
    699           (const struct vki_in_addr *)(ip + 12);
    700       UInt addr = VG_(ntohl)(sin_addr->s_addr);
    701 
    702       VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
    703                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
    704                    (addr>>8) & 0xFF, addr & 0xFF);
    705    } else {
    706       Bool compressing = False;
    707       Bool compressed = False;
    708       Int len = 0;
    709       Int i;
    710 
    711       for (i = 0; i < 16; i += 2) {
    712          UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
    713          if (word == 0 && !compressed) {
    714             compressing = True;
    715          } else {
    716             if (compressing) {
    717                compressing = False;
    718                compressed = True;
    719                s[len++] = ':';
    720             }
    721             if (i > 0) {
    722                s[len++] = ':';
    723             }
    724             len += VG_(sprintf)(s + len, "%x", word);
    725          }
    726       }
    727 
    728       if (compressing) {
    729          s[len++] = ':';
    730          s[len++] = ':';
    731       }
    732 
    733       s[len++] = 0;
    734    }
    735 
    736    return;
    737 }
    738 
    739 static
    740 HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
    741 {
    742    if (sa == NULL || len == 0) {
    743       VG_(sprintf)(name, "<unknown>");
    744    } else if (sa->sin6_port == 0) {
    745       VG_(sprintf)(name, "<unbound>");
    746    } else {
    747       HChar addr[100];    // large enough
    748       inet6_format(addr, (void *)&(sa->sin6_addr));
    749       VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
    750    }
    751 
    752    return name;
    753 }
    754 
    755 /*
    756  * Try get some details about a socket.
    757  */
    758 static void
    759 getsockdetails(Int fd)
    760 {
    761    union u {
    762       struct vki_sockaddr a;
    763       struct vki_sockaddr_in in;
    764       struct vki_sockaddr_in6 in6;
    765       struct vki_sockaddr_un un;
    766    } laddr;
    767    Int llen;
    768 
    769    llen = sizeof(laddr);
    770    VG_(memset)(&laddr, 0, llen);
    771 
    772    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
    773       switch(laddr.a.sa_family) {
    774       case VKI_AF_INET: {
    775          HChar lname[32];   // large enough
    776          HChar pname[32];   // large enough
    777          struct vki_sockaddr_in paddr;
    778          Int plen = sizeof(struct vki_sockaddr_in);
    779 
    780          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
    781             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
    782                          inet_to_name(&(laddr.in), llen, lname),
    783                          inet_to_name(&paddr, plen, pname));
    784          } else {
    785             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
    786                          fd, inet_to_name(&(laddr.in), llen, lname));
    787          }
    788          return;
    789          }
    790       case VKI_AF_INET6: {
    791          HChar lname[128];  // large enough
    792          HChar pname[128];  // large enough
    793          struct vki_sockaddr_in6 paddr;
    794          Int plen = sizeof(struct vki_sockaddr_in6);
    795 
    796          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
    797             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> %s\n", fd,
    798                          inet6_to_name(&(laddr.in6), llen, lname),
    799                          inet6_to_name(&paddr, plen, pname));
    800          } else {
    801             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> unbound\n",
    802                          fd, inet6_to_name(&(laddr.in6), llen, lname));
    803          }
    804          return;
    805          }
    806       case VKI_AF_UNIX: {
    807          static char lname[256];
    808          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
    809                       unix_to_name(&(laddr.un), llen, lname));
    810          return;
    811          }
    812       default:
    813          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
    814                       laddr.a.sa_family, fd);
    815          return;
    816       }
    817    }
    818 
    819    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
    820 }
    821 
    822 
    823 /* Dump out a summary, and a more detailed list, of open file descriptors. */
    824 void VG_(show_open_fds) (const HChar* when)
    825 {
    826    OpenFd *i = allocated_fds;
    827 
    828    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
    829 
    830    while (i) {
    831       if (i->pathname) {
    832          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
    833                       i->pathname);
    834       } else {
    835          Int val;
    836          Int len = sizeof(val);
    837 
    838          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
    839              == -1) {
    840             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
    841          } else {
    842             getsockdetails(i->fd);
    843          }
    844       }
    845 
    846       if(i->where) {
    847          VG_(pp_ExeContext)(i->where);
    848          VG_(message)(Vg_UserMsg, "\n");
    849       } else {
    850          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
    851          VG_(message)(Vg_UserMsg, "\n");
    852       }
    853 
    854       i = i->next;
    855    }
    856 
    857    VG_(message)(Vg_UserMsg, "\n");
    858 }
    859 
    860 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
    861    have /proc support compiled in, or a non-Linux kernel), then we need to
    862    find out what file descriptors we inherited from our parent process the
    863    hard way - by checking each fd in turn. */
    864 static
    865 void init_preopened_fds_without_proc_self_fd(void)
    866 {
    867    struct vki_rlimit lim;
    868    UInt count;
    869    Int i;
    870 
    871    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
    872       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
    873          an arbitrarily high number.  1024 happens to be the limit in
    874          the 2.4 Linux kernels. */
    875       count = 1024;
    876    } else {
    877       count = lim.rlim_cur;
    878    }
    879 
    880    for (i = 0; i < count; i++)
    881       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
    882          ML_(record_fd_open_named)(-1, i);
    883 }
    884 
    885 /* Initialize the list of open file descriptors with the file descriptors
    886    we inherited from out parent process. */
    887 
    888 void VG_(init_preopened_fds)(void)
    889 {
    890 // DDD: should probably use HAVE_PROC here or similar, instead.
    891 #if defined(VGO_linux)
    892    Int ret;
    893    struct vki_dirent64 d;
    894    SysRes f;
    895 
    896    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
    897    if (sr_isError(f)) {
    898       init_preopened_fds_without_proc_self_fd();
    899       return;
    900    }
    901 
    902    while ((ret = VG_(getdents64)(sr_Res(f), &d, sizeof(d))) != 0) {
    903       if (ret == -1)
    904          goto out;
    905 
    906       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
    907          HChar* s;
    908          Int fno = VG_(strtoll10)(d.d_name, &s);
    909          if (*s == '\0') {
    910             if (fno != sr_Res(f))
    911                if (VG_(clo_track_fds))
    912                   ML_(record_fd_open_named)(-1, fno);
    913          } else {
    914             VG_(message)(Vg_DebugMsg,
    915                "Warning: invalid file name in /proc/self/fd: %s\n",
    916                d.d_name);
    917          }
    918       }
    919 
    920       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
    921    }
    922 
    923   out:
    924    VG_(close)(sr_Res(f));
    925 
    926 #elif defined(VGO_darwin)
    927    init_preopened_fds_without_proc_self_fd();
    928 
    929 #elif defined(VGO_solaris)
    930    Int ret;
    931    Char buf[VKI_MAXGETDENTS_SIZE];
    932    SysRes f;
    933 
    934    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
    935    if (sr_isError(f)) {
    936       init_preopened_fds_without_proc_self_fd();
    937       return;
    938    }
    939 
    940    while ((ret = VG_(getdents64)(sr_Res(f), (struct vki_dirent64 *) buf,
    941                                  sizeof(buf))) > 0) {
    942       Int i = 0;
    943       while (i < ret) {
    944          /* Proceed one entry. */
    945          struct vki_dirent64 *d = (struct vki_dirent64 *) (buf + i);
    946          if (VG_(strcmp)(d->d_name, ".") && VG_(strcmp)(d->d_name, "..")) {
    947             HChar *s;
    948             Int fno = VG_(strtoll10)(d->d_name, &s);
    949             if (*s == '\0') {
    950                if (fno != sr_Res(f))
    951                   if (VG_(clo_track_fds))
    952                      ML_(record_fd_open_named)(-1, fno);
    953             } else {
    954                VG_(message)(Vg_DebugMsg,
    955                      "Warning: invalid file name in /proc/self/fd: %s\n",
    956                      d->d_name);
    957             }
    958          }
    959 
    960          /* Move on the next entry. */
    961          i += d->d_reclen;
    962       }
    963    }
    964 
    965    VG_(close)(sr_Res(f));
    966 
    967 #else
    968 #  error Unknown OS
    969 #endif
    970 }
    971 
    972 static
    973 HChar *strdupcat ( const HChar* cc, const HChar *s1, const HChar *s2,
    974                    ArenaId aid )
    975 {
    976    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
    977    HChar *result = VG_(arena_malloc) ( aid, cc, len );
    978    VG_(strcpy) ( result, s1 );
    979    VG_(strcat) ( result, s2 );
    980    return result;
    981 }
    982 
    983 static
    984 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
    985                             const HChar *msg, Addr base, SizeT size )
    986 {
    987    HChar *outmsg = strdupcat ( "di.syswrap.pmrs.1",
    988                                "sendmsg", msg, VG_AR_CORE );
    989    PRE_MEM_READ( outmsg, base, size );
    990    VG_(free) ( outmsg );
    991 }
    992 
    993 static
    994 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
    995                              const HChar *msg, Addr base, SizeT size )
    996 {
    997    HChar *outmsg = strdupcat ( "di.syswrap.pmwr.1",
    998                                "recvmsg", msg, VG_AR_CORE );
    999    if ( read )
   1000       PRE_MEM_READ( outmsg, base, size );
   1001    else
   1002       PRE_MEM_WRITE( outmsg, base, size );
   1003    VG_(free) ( outmsg );
   1004 }
   1005 
   1006 static
   1007 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
   1008                               const HChar *fieldName, Addr base, SizeT size )
   1009 {
   1010    if ( !read )
   1011       POST_MEM_WRITE( base, size );
   1012 }
   1013 
   1014 static
   1015 void msghdr_foreachfield (
   1016         ThreadId tid,
   1017         const HChar *name,
   1018         struct vki_msghdr *msg,
   1019         UInt length,
   1020         void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
   1021         Bool rekv /* "recv" apparently shadows some header decl on OSX108 */
   1022      )
   1023 {
   1024    HChar *fieldName;
   1025 
   1026    if ( !msg )
   1027       return;
   1028 
   1029    fieldName = VG_(malloc) ( "di.syswrap.mfef", VG_(strlen)(name) + 32 );
   1030 
   1031    VG_(sprintf) ( fieldName, "(%s)", name );
   1032 
   1033    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
   1034    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
   1035    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
   1036    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
   1037    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
   1038    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
   1039 
   1040    /* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
   1041       the field, but does write to it. */
   1042    if ( rekv )
   1043       foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
   1044 
   1045    if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
   1046         && msg->msg_name ) {
   1047       VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
   1048       foreach_func ( tid, False, fieldName,
   1049                      (Addr)msg->msg_name, msg->msg_namelen );
   1050    }
   1051 
   1052    if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
   1053         && msg->msg_iov ) {
   1054       struct vki_iovec *iov = msg->msg_iov;
   1055       UInt i;
   1056 
   1057       VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
   1058 
   1059       foreach_func ( tid, True, fieldName,
   1060                      (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
   1061 
   1062       for ( i = 0; i < msg->msg_iovlen; ++i, ++iov ) {
   1063          UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
   1064          VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
   1065          foreach_func ( tid, False, fieldName,
   1066                         (Addr)iov->iov_base, iov_len );
   1067          length = length - iov_len;
   1068       }
   1069    }
   1070 
   1071    if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
   1072         && msg->msg_control )
   1073    {
   1074       VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
   1075       foreach_func ( tid, False, fieldName,
   1076                      (Addr)msg->msg_control, msg->msg_controllen );
   1077    }
   1078 
   1079    VG_(free) ( fieldName );
   1080 }
   1081 
   1082 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
   1083 {
   1084    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
   1085 
   1086    while (cm) {
   1087       if (cm->cmsg_level == VKI_SOL_SOCKET &&
   1088           cm->cmsg_type == VKI_SCM_RIGHTS ) {
   1089          Int *fds = (Int *) VKI_CMSG_DATA(cm);
   1090          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
   1091                          / sizeof(int);
   1092          Int i;
   1093 
   1094          for (i = 0; i < fdc; i++)
   1095             if(VG_(clo_track_fds))
   1096                // XXX: must we check the range on these fds with
   1097                //      ML_(fd_allowed)()?
   1098                ML_(record_fd_open_named)(tid, fds[i]);
   1099       }
   1100 
   1101       cm = VKI_CMSG_NXTHDR(msg, cm);
   1102    }
   1103 }
   1104 
   1105 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
   1106 static
   1107 void pre_mem_read_sockaddr ( ThreadId tid,
   1108                              const HChar *description,
   1109                              struct vki_sockaddr *sa, UInt salen )
   1110 {
   1111    HChar *outmsg;
   1112    struct vki_sockaddr_un*  saun = (struct vki_sockaddr_un *)sa;
   1113    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
   1114    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
   1115 #  ifdef VKI_AF_BLUETOOTH
   1116    struct vki_sockaddr_rc*  rc   = (struct vki_sockaddr_rc *)sa;
   1117 #  endif
   1118 #  ifdef VKI_AF_NETLINK
   1119    struct vki_sockaddr_nl*  nl   = (struct vki_sockaddr_nl *)sa;
   1120 #  endif
   1121 
   1122    /* NULL/zero-length sockaddrs are legal */
   1123    if ( sa == NULL || salen == 0 ) return;
   1124 
   1125    outmsg = VG_(malloc) ( "di.syswrap.pmr_sockaddr.1",
   1126                           VG_(strlen)( description ) + 30 );
   1127 
   1128    VG_(sprintf) ( outmsg, description, "sa_family" );
   1129    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
   1130 
   1131    switch (sa->sa_family) {
   1132 
   1133       case VKI_AF_UNIX:
   1134          VG_(sprintf) ( outmsg, description, "sun_path" );
   1135          PRE_MEM_RASCIIZ( outmsg, (Addr) saun->sun_path );
   1136          // GrP fixme max of sun_len-2? what about nul char?
   1137          break;
   1138 
   1139       case VKI_AF_INET:
   1140          VG_(sprintf) ( outmsg, description, "sin_port" );
   1141          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
   1142          VG_(sprintf) ( outmsg, description, "sin_addr" );
   1143          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
   1144          break;
   1145 
   1146       case VKI_AF_INET6:
   1147          VG_(sprintf) ( outmsg, description, "sin6_port" );
   1148          PRE_MEM_READ( outmsg,
   1149             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
   1150          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
   1151          PRE_MEM_READ( outmsg,
   1152             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
   1153          VG_(sprintf) ( outmsg, description, "sin6_addr" );
   1154          PRE_MEM_READ( outmsg,
   1155             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
   1156          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
   1157          PRE_MEM_READ( outmsg,
   1158             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
   1159          break;
   1160 
   1161 #     ifdef VKI_AF_BLUETOOTH
   1162       case VKI_AF_BLUETOOTH:
   1163          VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
   1164          PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
   1165          VG_(sprintf) ( outmsg, description, "rc_channel" );
   1166          PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
   1167          break;
   1168 #     endif
   1169 
   1170 #     ifdef VKI_AF_NETLINK
   1171       case VKI_AF_NETLINK:
   1172          VG_(sprintf)(outmsg, description, "nl_pid");
   1173          PRE_MEM_READ(outmsg, (Addr)&nl->nl_pid, sizeof(nl->nl_pid));
   1174          VG_(sprintf)(outmsg, description, "nl_groups");
   1175          PRE_MEM_READ(outmsg, (Addr)&nl->nl_groups, sizeof(nl->nl_groups));
   1176          break;
   1177 #     endif
   1178 
   1179 #     ifdef VKI_AF_UNSPEC
   1180       case VKI_AF_UNSPEC:
   1181          break;
   1182 #     endif
   1183 
   1184       default:
   1185          /* No specific information about this address family.
   1186             Let's just check the full data following the family.
   1187             Note that this can give false positive if this (unknown)
   1188             struct sockaddr_???? has padding bytes between its elements. */
   1189          VG_(sprintf) ( outmsg, description, "sa_data" );
   1190          PRE_MEM_READ( outmsg, (Addr)&sa->sa_family + sizeof(sa->sa_family),
   1191                        salen -  sizeof(sa->sa_family));
   1192          break;
   1193    }
   1194 
   1195    VG_(free) ( outmsg );
   1196 }
   1197 
   1198 /* Dereference a pointer to a UInt. */
   1199 static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
   1200 {
   1201    UInt* a_p = (UInt*)a;
   1202    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
   1203    if (a_p == NULL)
   1204       return 0;
   1205    else
   1206       return *a_p;
   1207 }
   1208 
   1209 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
   1210                                   const HChar* buf_s, const HChar* buflen_s )
   1211 {
   1212    if (VG_(tdict).track_pre_mem_write) {
   1213       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
   1214       if (buflen_in > 0) {
   1215          VG_(tdict).track_pre_mem_write(
   1216             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
   1217       }
   1218    }
   1219 }
   1220 
   1221 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
   1222                                    Addr buf_p, Addr buflen_p, const HChar* s )
   1223 {
   1224    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
   1225       UInt buflen_out = deref_UInt( tid, buflen_p, s);
   1226       if (buflen_out > 0 && buf_p != (Addr)NULL) {
   1227          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
   1228       }
   1229    }
   1230 }
   1231 
   1232 /* ---------------------------------------------------------------------
   1233    Data seg end, for brk()
   1234    ------------------------------------------------------------------ */
   1235 
   1236 /*   +--------+------------+
   1237      | anon   |    resvn   |
   1238      +--------+------------+
   1239 
   1240      ^     ^  ^
   1241      |     |  boundary is page aligned
   1242      |     VG_(brk_limit) -- no alignment constraint
   1243      VG_(brk_base) -- page aligned -- does not move
   1244 
   1245      Both the anon part and the reservation part are always at least
   1246      one page.
   1247 */
   1248 
   1249 /* Set the new data segment end to NEWBRK.  If this succeeds, return
   1250    NEWBRK, else return the current data segment end. */
   1251 
   1252 static Addr do_brk ( Addr newbrk, ThreadId tid )
   1253 {
   1254    NSegment const* aseg;
   1255    Addr newbrkP;
   1256    SizeT delta;
   1257    Bool debug = False;
   1258 
   1259    if (debug)
   1260       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
   1261 		  VG_(brk_base), VG_(brk_limit), newbrk);
   1262 
   1263    if (0) VG_(am_show_nsegments)(0, "in_brk");
   1264 
   1265    if (newbrk < VG_(brk_base))
   1266       /* Clearly impossible. */
   1267       goto bad;
   1268 
   1269    if (newbrk < VG_(brk_limit)) {
   1270       /* shrinking the data segment.  Be lazy and don't munmap the
   1271          excess area. */
   1272       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
   1273       vg_assert(seg);
   1274 
   1275       if (seg->hasT)
   1276          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
   1277                                     "do_brk(shrink)" );
   1278       /* Since we're being lazy and not unmapping pages, we have to
   1279          zero out the area, so that if the area later comes back into
   1280          circulation, it will be filled with zeroes, as if it really
   1281          had been unmapped and later remapped.  Be a bit paranoid and
   1282          try hard to ensure we're not going to segfault by doing the
   1283          write - check both ends of the range are in the same segment
   1284          and that segment is writable. */
   1285       NSegment const * seg2;
   1286 
   1287       seg2 = VG_(am_find_nsegment)( VG_(brk_limit) - 1 );
   1288       vg_assert(seg2);
   1289 
   1290       if (seg == seg2 && seg->hasW)
   1291          VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
   1292 
   1293       VG_(brk_limit) = newbrk;
   1294       return newbrk;
   1295    }
   1296 
   1297    /* otherwise we're expanding the brk segment. */
   1298    if (VG_(brk_limit) > VG_(brk_base))
   1299       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1300    else
   1301       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
   1302 
   1303    /* These should be assured by setup_client_dataseg in m_main. */
   1304    vg_assert(aseg);
   1305    vg_assert(aseg->kind == SkAnonC);
   1306 
   1307    if (newbrk <= aseg->end + 1) {
   1308       /* still fits within the anon segment. */
   1309       VG_(brk_limit) = newbrk;
   1310       return newbrk;
   1311    }
   1312 
   1313    newbrkP = VG_PGROUNDUP(newbrk);
   1314    delta = newbrkP - (aseg->end + 1);
   1315    vg_assert(delta > 0);
   1316    vg_assert(VG_IS_PAGE_ALIGNED(delta));
   1317 
   1318    Bool overflow;
   1319    if (! VG_(am_extend_into_adjacent_reservation_client)( aseg->start, delta,
   1320                                                           &overflow)) {
   1321       if (overflow)
   1322          VG_(umsg)("brk segment overflow in thread #%u: can't grow to %#lx\n",
   1323                    tid, newbrkP);
   1324       else
   1325          VG_(umsg)("Cannot map memory to grow brk segment in thread #%u "
   1326                    "to %#lx\n", tid, newbrkP);
   1327       goto bad;
   1328    }
   1329 
   1330    VG_(brk_limit) = newbrk;
   1331    return newbrk;
   1332 
   1333   bad:
   1334    return VG_(brk_limit);
   1335 }
   1336 
   1337 
   1338 /* ---------------------------------------------------------------------
   1339    Vet file descriptors for sanity
   1340    ------------------------------------------------------------------ */
   1341 /*
   1342 > - what does the "Bool soft" parameter mean?
   1343 
   1344 (Tom Hughes, 3 Oct 05):
   1345 
   1346 Whether or not to consider a file descriptor invalid if it is above
   1347 the current soft limit.
   1348 
   1349 Basically if we are testing whether a newly created file descriptor is
   1350 valid (in a post handler) then we set soft to true, and if we are
   1351 testing whether a file descriptor that is about to be used (in a pre
   1352 handler) is valid [viz, an already-existing fd] then we set it to false.
   1353 
   1354 The point is that if the (virtual) soft limit is lowered then any
   1355 existing descriptors can still be read/written/closed etc (so long as
   1356 they are below the valgrind reserved descriptors) but no new
   1357 descriptors can be created above the new soft limit.
   1358 
   1359 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
   1360 */
   1361 
   1362 /* Return true if we're allowed to use or create this fd */
   1363 Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
   1364                      Bool isNewFd)
   1365 {
   1366    Bool allowed = True;
   1367 
   1368    /* hard limits always apply */
   1369    if (fd < 0 || fd >= VG_(fd_hard_limit))
   1370       allowed = False;
   1371 
   1372    /* hijacking the output fds is never allowed */
   1373    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
   1374       allowed = False;
   1375 
   1376    /* if creating a new fd (rather than using an existing one), the
   1377       soft limit must also be observed */
   1378    if (isNewFd && fd >= VG_(fd_soft_limit))
   1379       allowed = False;
   1380 
   1381    /* this looks like it ought to be included, but causes problems: */
   1382    /*
   1383    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
   1384       allowed = False;
   1385    */
   1386    /* The difficulty is as follows: consider a program P which expects
   1387       to be able to mess with (redirect) its own stderr (fd 2).
   1388       Usually to deal with P we would issue command line flags to send
   1389       logging somewhere other than stderr, so as not to disrupt P.
   1390       The problem is that -d unilaterally hijacks stderr with no
   1391       consultation with P.  And so, if this check is enabled, P will
   1392       work OK normally but fail if -d is issued.
   1393 
   1394       Basically -d is a hack and you take your chances when using it.
   1395       It's very useful for low level debugging -- particularly at
   1396       startup -- and having its presence change the behaviour of the
   1397       client is exactly what we don't want.  */
   1398 
   1399    /* croak? */
   1400    if ((!allowed) && VG_(showing_core_errors)() ) {
   1401       VG_(message)(Vg_UserMsg,
   1402          "Warning: invalid file descriptor %d in syscall %s()\n",
   1403          fd, syscallname);
   1404       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
   1405 	 VG_(message)(Vg_UserMsg,
   1406             "   Use --log-fd=<number> to select an alternative log fd.\n");
   1407       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
   1408 	 VG_(message)(Vg_UserMsg,
   1409             "   Use --xml-fd=<number> to select an alternative XML "
   1410             "output fd.\n");
   1411       // DDD: consider always printing this stack trace, it's useful.
   1412       // Also consider also making this a proper core error, ie.
   1413       // suppressible and all that.
   1414       if (VG_(clo_verbosity) > 1) {
   1415          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1416       }
   1417    }
   1418 
   1419    return allowed;
   1420 }
   1421 
   1422 
   1423 /* ---------------------------------------------------------------------
   1424    Deal with a bunch of socket-related syscalls
   1425    ------------------------------------------------------------------ */
   1426 
   1427 /* ------ */
   1428 
   1429 void
   1430 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
   1431                                   UWord arg0, UWord arg1,
   1432                                   UWord arg2, UWord arg3 )
   1433 {
   1434    /* int socketpair(int d, int type, int protocol, int sv[2]); */
   1435    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
   1436                   arg3, 2*sizeof(int) );
   1437 }
   1438 
   1439 SysRes
   1440 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
   1441                                    SysRes res,
   1442                                    UWord arg0, UWord arg1,
   1443                                    UWord arg2, UWord arg3 )
   1444 {
   1445    SysRes r = res;
   1446    Int fd1 = ((Int*)arg3)[0];
   1447    Int fd2 = ((Int*)arg3)[1];
   1448    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1449    POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1450    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
   1451        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
   1452       VG_(close)(fd1);
   1453       VG_(close)(fd2);
   1454       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1455    } else {
   1456       POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1457       if (VG_(clo_track_fds)) {
   1458          ML_(record_fd_open_nameless)(tid, fd1);
   1459          ML_(record_fd_open_nameless)(tid, fd2);
   1460       }
   1461    }
   1462    return r;
   1463 }
   1464 
   1465 /* ------ */
   1466 
   1467 SysRes
   1468 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
   1469 {
   1470    SysRes r = res;
   1471    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1472    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
   1473       VG_(close)(sr_Res(res));
   1474       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1475    } else {
   1476       if (VG_(clo_track_fds))
   1477          ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1478    }
   1479    return r;
   1480 }
   1481 
   1482 /* ------ */
   1483 
   1484 void
   1485 ML_(generic_PRE_sys_bind) ( ThreadId tid,
   1486                             UWord arg0, UWord arg1, UWord arg2 )
   1487 {
   1488    /* int bind(int sockfd, struct sockaddr *my_addr,
   1489                int addrlen); */
   1490    pre_mem_read_sockaddr(
   1491       tid, "socketcall.bind(my_addr.%s)",
   1492       (struct vki_sockaddr *) arg1, arg2
   1493    );
   1494 }
   1495 
   1496 /* ------ */
   1497 
   1498 void
   1499 ML_(generic_PRE_sys_accept) ( ThreadId tid,
   1500                               UWord arg0, UWord arg1, UWord arg2 )
   1501 {
   1502    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
   1503    Addr addr_p     = arg1;
   1504    Addr addrlen_p  = arg2;
   1505    if (addr_p != (Addr)NULL)
   1506       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
   1507                                    "socketcall.accept(addr)",
   1508                                    "socketcall.accept(addrlen_in)" );
   1509 }
   1510 
   1511 SysRes
   1512 ML_(generic_POST_sys_accept) ( ThreadId tid,
   1513                                SysRes res,
   1514                                UWord arg0, UWord arg1, UWord arg2 )
   1515 {
   1516    SysRes r = res;
   1517    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1518    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
   1519       VG_(close)(sr_Res(res));
   1520       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1521    } else {
   1522       Addr addr_p     = arg1;
   1523       Addr addrlen_p  = arg2;
   1524       if (addr_p != (Addr)NULL)
   1525          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
   1526                                        "socketcall.accept(addrlen_out)" );
   1527       if (VG_(clo_track_fds))
   1528           ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1529    }
   1530    return r;
   1531 }
   1532 
   1533 /* ------ */
   1534 
   1535 void
   1536 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
   1537                               UWord arg0, UWord arg1, UWord arg2,
   1538                               UWord arg3, UWord arg4, UWord arg5 )
   1539 {
   1540    /* int sendto(int s, const void *msg, int len,
   1541                  unsigned int flags,
   1542                  const struct sockaddr *to, int tolen); */
   1543    PRE_MEM_READ( "socketcall.sendto(msg)",
   1544                  arg1, /* msg */
   1545                  arg2  /* len */ );
   1546    pre_mem_read_sockaddr(
   1547       tid, "socketcall.sendto(to.%s)",
   1548       (struct vki_sockaddr *) arg4, arg5
   1549    );
   1550 }
   1551 
   1552 /* ------ */
   1553 
   1554 void
   1555 ML_(generic_PRE_sys_send) ( ThreadId tid,
   1556                             UWord arg0, UWord arg1, UWord arg2 )
   1557 {
   1558    /* int send(int s, const void *msg, size_t len, int flags); */
   1559    PRE_MEM_READ( "socketcall.send(msg)",
   1560                   arg1, /* msg */
   1561                   arg2  /* len */ );
   1562 
   1563 }
   1564 
   1565 /* ------ */
   1566 
   1567 void
   1568 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
   1569                                 UWord arg0, UWord arg1, UWord arg2,
   1570                                 UWord arg3, UWord arg4, UWord arg5 )
   1571 {
   1572    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
   1573                    struct sockaddr *from, int *fromlen); */
   1574    Addr buf_p      = arg1;
   1575    Int  len        = arg2;
   1576    Addr from_p     = arg4;
   1577    Addr fromlen_p  = arg5;
   1578    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
   1579    if (from_p != (Addr)NULL)
   1580       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
   1581                                    "socketcall.recvfrom(from)",
   1582                                    "socketcall.recvfrom(fromlen_in)" );
   1583 }
   1584 
   1585 void
   1586 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
   1587                                  SysRes res,
   1588                                  UWord arg0, UWord arg1, UWord arg2,
   1589                                  UWord arg3, UWord arg4, UWord arg5 )
   1590 {
   1591    Addr buf_p      = arg1;
   1592    Int  len        = arg2;
   1593    Addr from_p     = arg4;
   1594    Addr fromlen_p  = arg5;
   1595 
   1596    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1597    if (from_p != (Addr)NULL)
   1598       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
   1599                                     "socketcall.recvfrom(fromlen_out)" );
   1600    POST_MEM_WRITE( buf_p, len );
   1601 }
   1602 
   1603 /* ------ */
   1604 
   1605 void
   1606 ML_(generic_PRE_sys_recv) ( ThreadId tid,
   1607                             UWord arg0, UWord arg1, UWord arg2 )
   1608 {
   1609    /* int recv(int s, void *buf, int len, unsigned int flags); */
   1610    /* man 2 recv says:
   1611       The  recv call is normally used only on a connected socket
   1612       (see connect(2)) and is identical to recvfrom with a  NULL
   1613       from parameter.
   1614    */
   1615    PRE_MEM_WRITE( "socketcall.recv(buf)",
   1616                   arg1, /* buf */
   1617                   arg2  /* len */ );
   1618 }
   1619 
   1620 void
   1621 ML_(generic_POST_sys_recv) ( ThreadId tid,
   1622                              UWord res,
   1623                              UWord arg0, UWord arg1, UWord arg2 )
   1624 {
   1625    if (res >= 0 && arg1 != 0) {
   1626       POST_MEM_WRITE( arg1, /* buf */
   1627                       arg2  /* len */ );
   1628    }
   1629 }
   1630 
   1631 /* ------ */
   1632 
   1633 void
   1634 ML_(generic_PRE_sys_connect) ( ThreadId tid,
   1635                                UWord arg0, UWord arg1, UWord arg2 )
   1636 {
   1637    /* int connect(int sockfd,
   1638                   struct sockaddr *serv_addr, int addrlen ); */
   1639    pre_mem_read_sockaddr( tid,
   1640                           "socketcall.connect(serv_addr.%s)",
   1641                           (struct vki_sockaddr *) arg1, arg2);
   1642 }
   1643 
   1644 /* ------ */
   1645 
   1646 void
   1647 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
   1648                                   UWord arg0, UWord arg1, UWord arg2,
   1649                                   UWord arg3, UWord arg4 )
   1650 {
   1651    /* int setsockopt(int s, int level, int optname,
   1652                      const void *optval, int optlen); */
   1653    PRE_MEM_READ( "socketcall.setsockopt(optval)",
   1654                  arg3, /* optval */
   1655                  arg4  /* optlen */ );
   1656 }
   1657 
   1658 /* ------ */
   1659 
   1660 void
   1661 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
   1662                                    UWord arg0, UWord arg1, UWord arg2 )
   1663 {
   1664    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
   1665    Addr name_p     = arg1;
   1666    Addr namelen_p  = arg2;
   1667    /* Nb: name_p cannot be NULL */
   1668    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1669                                 "socketcall.getsockname(name)",
   1670                                 "socketcall.getsockname(namelen_in)" );
   1671 }
   1672 
   1673 void
   1674 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
   1675                                     SysRes res,
   1676                                     UWord arg0, UWord arg1, UWord arg2 )
   1677 {
   1678    Addr name_p     = arg1;
   1679    Addr namelen_p  = arg2;
   1680    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1681    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1682                                  "socketcall.getsockname(namelen_out)" );
   1683 }
   1684 
   1685 /* ------ */
   1686 
   1687 void
   1688 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
   1689                                    UWord arg0, UWord arg1, UWord arg2 )
   1690 {
   1691    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
   1692    Addr name_p     = arg1;
   1693    Addr namelen_p  = arg2;
   1694    /* Nb: name_p cannot be NULL */
   1695    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1696                                 "socketcall.getpeername(name)",
   1697                                 "socketcall.getpeername(namelen_in)" );
   1698 }
   1699 
   1700 void
   1701 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
   1702                                     SysRes res,
   1703                                     UWord arg0, UWord arg1, UWord arg2 )
   1704 {
   1705    Addr name_p     = arg1;
   1706    Addr namelen_p  = arg2;
   1707    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1708    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1709                                  "socketcall.getpeername(namelen_out)" );
   1710 }
   1711 
   1712 /* ------ */
   1713 
   1714 void
   1715 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
   1716                                struct vki_msghdr *msg )
   1717 {
   1718    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
   1719 }
   1720 
   1721 /* ------ */
   1722 
   1723 void
   1724 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
   1725                                struct vki_msghdr *msg )
   1726 {
   1727    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
   1728 }
   1729 
   1730 void
   1731 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
   1732                                 struct vki_msghdr *msg, UInt length )
   1733 {
   1734    msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
   1735    check_cmsg_for_fds( tid, msg );
   1736 }
   1737 
   1738 
   1739 /* ---------------------------------------------------------------------
   1740    Deal with a bunch of IPC related syscalls
   1741    ------------------------------------------------------------------ */
   1742 
   1743 /* ------ */
   1744 
   1745 void
   1746 ML_(generic_PRE_sys_semop) ( ThreadId tid,
   1747                              UWord arg0, UWord arg1, UWord arg2 )
   1748 {
   1749    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
   1750    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1751 }
   1752 
   1753 /* ------ */
   1754 
   1755 void
   1756 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
   1757                                   UWord arg0, UWord arg1,
   1758                                   UWord arg2, UWord arg3 )
   1759 {
   1760    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
   1761                      struct timespec *timeout); */
   1762    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1763    if (arg3 != 0)
   1764       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
   1765 }
   1766 
   1767 /* ------ */
   1768 
   1769 static
   1770 UInt get_sem_count( Int semid )
   1771 {
   1772    struct vki_semid_ds buf;
   1773    union vki_semun arg;
   1774    SysRes res;
   1775 
   1776    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
   1777       (experimental) otherwise complains that the use in the return
   1778       statement below is uninitialised. */
   1779    buf.sem_nsems = 0;
   1780 
   1781    arg.buf = &buf;
   1782 
   1783 #  if defined(__NR_semctl)
   1784    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
   1785 #  elif defined(__NR_semsys) /* Solaris */
   1786    res = VG_(do_syscall5)(__NR_semsys, VKI_SEMCTL, semid, 0, VKI_IPC_STAT,
   1787                           *(UWord *)&arg);
   1788 #  else
   1789    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
   1790                           VKI_IPC_STAT, (UWord)&arg);
   1791 #  endif
   1792    if (sr_isError(res))
   1793       return 0;
   1794 
   1795    return buf.sem_nsems;
   1796 }
   1797 
   1798 void
   1799 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
   1800                               UWord arg0, UWord arg1,
   1801                               UWord arg2, UWord arg3 )
   1802 {
   1803    /* int semctl(int semid, int semnum, int cmd, ...); */
   1804    union vki_semun arg = *(union vki_semun *)&arg3;
   1805    UInt nsems;
   1806    switch (arg2 /* cmd */) {
   1807 #if defined(VKI_IPC_INFO)
   1808    case VKI_IPC_INFO:
   1809    case VKI_SEM_INFO:
   1810    case VKI_IPC_INFO|VKI_IPC_64:
   1811    case VKI_SEM_INFO|VKI_IPC_64:
   1812       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
   1813                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1814       break;
   1815 #endif
   1816 
   1817    case VKI_IPC_STAT:
   1818 #if defined(VKI_SEM_STAT)
   1819    case VKI_SEM_STAT:
   1820 #endif
   1821       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1822                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1823       break;
   1824 
   1825 #if defined(VKI_IPC_64)
   1826    case VKI_IPC_STAT|VKI_IPC_64:
   1827 #if defined(VKI_SEM_STAT)
   1828    case VKI_SEM_STAT|VKI_IPC_64:
   1829 #endif
   1830 #endif
   1831 #if defined(VKI_IPC_STAT64)
   1832    case VKI_IPC_STAT64:
   1833 #endif
   1834 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
   1835       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1836                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1837       break;
   1838 #endif
   1839 
   1840    case VKI_IPC_SET:
   1841       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1842                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1843       break;
   1844 
   1845 #if defined(VKI_IPC_64)
   1846    case VKI_IPC_SET|VKI_IPC_64:
   1847 #endif
   1848 #if defined(VKI_IPC_SET64)
   1849    case VKI_IPC_SET64:
   1850 #endif
   1851 #if defined(VKI_IPC64) || defined(VKI_IPC_SET64)
   1852       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1853                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1854       break;
   1855 #endif
   1856 
   1857    case VKI_GETALL:
   1858 #if defined(VKI_IPC_64)
   1859    case VKI_GETALL|VKI_IPC_64:
   1860 #endif
   1861       nsems = get_sem_count( arg0 );
   1862       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
   1863                      (Addr)arg.array, sizeof(unsigned short) * nsems );
   1864       break;
   1865 
   1866    case VKI_SETALL:
   1867 #if defined(VKI_IPC_64)
   1868    case VKI_SETALL|VKI_IPC_64:
   1869 #endif
   1870       nsems = get_sem_count( arg0 );
   1871       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
   1872                     (Addr)arg.array, sizeof(unsigned short) * nsems );
   1873       break;
   1874    }
   1875 }
   1876 
   1877 void
   1878 ML_(generic_POST_sys_semctl) ( ThreadId tid,
   1879                                UWord res,
   1880                                UWord arg0, UWord arg1,
   1881                                UWord arg2, UWord arg3 )
   1882 {
   1883    union vki_semun arg = *(union vki_semun *)&arg3;
   1884    UInt nsems;
   1885    switch (arg2 /* cmd */) {
   1886 #if defined(VKI_IPC_INFO)
   1887    case VKI_IPC_INFO:
   1888    case VKI_SEM_INFO:
   1889    case VKI_IPC_INFO|VKI_IPC_64:
   1890    case VKI_SEM_INFO|VKI_IPC_64:
   1891       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1892       break;
   1893 #endif
   1894 
   1895    case VKI_IPC_STAT:
   1896 #if defined(VKI_SEM_STAT)
   1897    case VKI_SEM_STAT:
   1898 #endif
   1899       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1900       break;
   1901 
   1902 #if defined(VKI_IPC_64)
   1903    case VKI_IPC_STAT|VKI_IPC_64:
   1904    case VKI_SEM_STAT|VKI_IPC_64:
   1905 #endif
   1906 #if defined(VKI_IPC_STAT64)
   1907    case VKI_IPC_STAT64:
   1908 #endif
   1909 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
   1910       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1911       break;
   1912 #endif
   1913 
   1914    case VKI_GETALL:
   1915 #if defined(VKI_IPC_64)
   1916    case VKI_GETALL|VKI_IPC_64:
   1917 #endif
   1918       nsems = get_sem_count( arg0 );
   1919       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
   1920       break;
   1921    }
   1922 }
   1923 
   1924 /* ------ */
   1925 
   1926 /* ------ */
   1927 
   1928 static
   1929 SizeT get_shm_size ( Int shmid )
   1930 {
   1931 #if defined(__NR_shmctl)
   1932 #  ifdef VKI_IPC_64
   1933    struct vki_shmid64_ds buf;
   1934 #    if defined(VGP_amd64_linux) || defined(VGP_arm64_linux)
   1935      /* See bug 222545 comment 7 */
   1936      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1937                                      VKI_IPC_STAT, (UWord)&buf);
   1938 #    else
   1939      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1940                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
   1941 #    endif
   1942 #  else /* !def VKI_IPC_64 */
   1943    struct vki_shmid_ds buf;
   1944    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
   1945 #  endif /* def VKI_IPC_64 */
   1946 #elif defined(__NR_shmsys) /* Solaris */
   1947    struct vki_shmid_ds buf;
   1948    SysRes __res = VG_(do_syscall4)(__NR_shmsys, VKI_SHMCTL, shmid, VKI_IPC_STAT,
   1949                          (UWord)&buf);
   1950 #else
   1951    struct vki_shmid_ds buf;
   1952    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
   1953                                  VKI_IPC_STAT, 0, (UWord)&buf);
   1954 #endif
   1955    if (sr_isError(__res))
   1956       return 0;
   1957 
   1958    return (SizeT) buf.shm_segsz;
   1959 }
   1960 
   1961 UWord
   1962 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
   1963                              UWord arg0, UWord arg1, UWord arg2 )
   1964 {
   1965    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
   1966    SizeT  segmentSize = get_shm_size ( arg0 );
   1967    UWord tmp;
   1968    Bool  ok;
   1969    if (arg1 == 0) {
   1970       /* arm-linux only: work around the fact that
   1971          VG_(am_get_advisory_client_simple) produces something that is
   1972          VKI_PAGE_SIZE aligned, whereas what we want is something
   1973          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
   1974          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
   1975          then round the result up to the next VKI_SHMLBA boundary.
   1976          See bug 222545 comment 15.  So far, arm-linux is the only
   1977          platform where this is known to be necessary. */
   1978       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
   1979       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1980          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
   1981       }
   1982       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
   1983       if (ok) {
   1984          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1985             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
   1986          } else {
   1987             arg1 = tmp;
   1988          }
   1989       }
   1990    }
   1991    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
   1992       arg1 = 0;
   1993    return arg1;
   1994 }
   1995 
   1996 void
   1997 ML_(generic_POST_sys_shmat) ( ThreadId tid,
   1998                               UWord res,
   1999                               UWord arg0, UWord arg1, UWord arg2 )
   2000 {
   2001    SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
   2002    if ( segmentSize > 0 ) {
   2003       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
   2004       Bool d;
   2005 
   2006       if (arg2 & VKI_SHM_RDONLY)
   2007          prot &= ~VKI_PROT_WRITE;
   2008       /* It isn't exactly correct to pass 0 for the fd and offset
   2009          here.  The kernel seems to think the corresponding section
   2010          does have dev/ino numbers:
   2011 
   2012          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
   2013 
   2014          However there is no obvious way to find them.  In order to
   2015          cope with the discrepancy, aspacem's sync checker omits the
   2016          dev/ino correspondence check in cases where V does not know
   2017          the dev/ino. */
   2018       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
   2019 
   2020       /* we don't distinguish whether it's read-only or
   2021        * read-write -- it doesn't matter really. */
   2022       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
   2023                               0/*di_handle*/ );
   2024       if (d)
   2025          VG_(discard_translations)( (Addr)res,
   2026                                     (ULong)VG_PGROUNDUP(segmentSize),
   2027                                     "ML_(generic_POST_sys_shmat)" );
   2028    }
   2029 }
   2030 
   2031 /* ------ */
   2032 
   2033 Bool
   2034 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
   2035 {
   2036    /* int shmdt(const void *shmaddr); */
   2037    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
   2038 }
   2039 
   2040 void
   2041 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
   2042 {
   2043    NSegment const* s = VG_(am_find_nsegment)(arg0);
   2044 
   2045    if (s != NULL) {
   2046       Addr  s_start = s->start;
   2047       SizeT s_len   = s->end+1 - s->start;
   2048       Bool  d;
   2049 
   2050       vg_assert(s->kind == SkShmC);
   2051       vg_assert(s->start == arg0);
   2052 
   2053       d = VG_(am_notify_munmap)(s_start, s_len);
   2054       s = NULL; /* s is now invalid */
   2055       VG_TRACK( die_mem_munmap, s_start, s_len );
   2056       if (d)
   2057          VG_(discard_translations)( s_start,
   2058                                     (ULong)s_len,
   2059                                     "ML_(generic_POST_sys_shmdt)" );
   2060    }
   2061 }
   2062 /* ------ */
   2063 
   2064 void
   2065 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
   2066                               UWord arg0, UWord arg1, UWord arg2 )
   2067 {
   2068    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
   2069    switch (arg1 /* cmd */) {
   2070 #if defined(VKI_IPC_INFO)
   2071    case VKI_IPC_INFO:
   2072       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   2073                      arg2, sizeof(struct vki_shminfo) );
   2074       break;
   2075 #if defined(VKI_IPC_64)
   2076    case VKI_IPC_INFO|VKI_IPC_64:
   2077       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   2078                      arg2, sizeof(struct vki_shminfo64) );
   2079       break;
   2080 #endif
   2081 #endif
   2082 
   2083 #if defined(VKI_SHM_INFO)
   2084    case VKI_SHM_INFO:
   2085 #if defined(VKI_IPC_64)
   2086    case VKI_SHM_INFO|VKI_IPC_64:
   2087 #endif
   2088       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
   2089                      arg2, sizeof(struct vki_shm_info) );
   2090       break;
   2091 #endif
   2092 
   2093    case VKI_IPC_STAT:
   2094 #if defined(VKI_SHM_STAT)
   2095    case VKI_SHM_STAT:
   2096 #endif
   2097       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
   2098                      arg2, sizeof(struct vki_shmid_ds) );
   2099       break;
   2100 
   2101 #if defined(VKI_IPC_64)
   2102    case VKI_IPC_STAT|VKI_IPC_64:
   2103    case VKI_SHM_STAT|VKI_IPC_64:
   2104       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
   2105                      arg2, sizeof(struct vki_shmid64_ds) );
   2106       break;
   2107 #endif
   2108 
   2109    case VKI_IPC_SET:
   2110       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   2111                     arg2, sizeof(struct vki_shmid_ds) );
   2112       break;
   2113 
   2114 #if defined(VKI_IPC_64)
   2115    case VKI_IPC_SET|VKI_IPC_64:
   2116       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   2117                     arg2, sizeof(struct vki_shmid64_ds) );
   2118       break;
   2119 #endif
   2120    }
   2121 }
   2122 
   2123 void
   2124 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
   2125                                UWord res,
   2126                                UWord arg0, UWord arg1, UWord arg2 )
   2127 {
   2128    switch (arg1 /* cmd */) {
   2129 #if defined(VKI_IPC_INFO)
   2130    case VKI_IPC_INFO:
   2131       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
   2132       break;
   2133    case VKI_IPC_INFO|VKI_IPC_64:
   2134       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
   2135       break;
   2136 #endif
   2137 
   2138 #if defined(VKI_SHM_INFO)
   2139    case VKI_SHM_INFO:
   2140    case VKI_SHM_INFO|VKI_IPC_64:
   2141       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
   2142       break;
   2143 #endif
   2144 
   2145    case VKI_IPC_STAT:
   2146 #if defined(VKI_SHM_STAT)
   2147    case VKI_SHM_STAT:
   2148 #endif
   2149       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
   2150       break;
   2151 
   2152 #if defined(VKI_IPC_64)
   2153    case VKI_IPC_STAT|VKI_IPC_64:
   2154    case VKI_SHM_STAT|VKI_IPC_64:
   2155       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
   2156       break;
   2157 #endif
   2158 
   2159 
   2160    }
   2161 }
   2162 
   2163 /* ---------------------------------------------------------------------
   2164    Generic handler for mmap
   2165    ------------------------------------------------------------------ */
   2166 
   2167 /*
   2168  * Although mmap is specified by POSIX and the argument are generally
   2169  * consistent across platforms the precise details of the low level
   2170  * argument passing conventions differ. For example:
   2171  *
   2172  * - On x86-linux there is mmap (aka old_mmap) which takes the
   2173  *   arguments in a memory block and the offset in bytes; and
   2174  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   2175  *   way and the offset in pages.
   2176  *
   2177  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
   2178  *   arguments in the normal way and the offset in bytes; and
   2179  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   2180  *   way and the offset in pages.
   2181  *
   2182  * - On amd64-linux everything is simple and there is just the one
   2183  *   call, mmap (aka sys_mmap)  which takes the arguments in the
   2184  *   normal way and the offset in bytes.
   2185  *
   2186  * - On s390x-linux there is mmap (aka old_mmap) which takes the
   2187  *   arguments in a memory block and the offset in bytes. mmap2
   2188  *   is also available (but not exported via unistd.h) with
   2189  *   arguments in a memory block and the offset in pages.
   2190  *
   2191  * To cope with all this we provide a generic handler function here
   2192  * and then each platform implements one or more system call handlers
   2193  * which call this generic routine after extracting and normalising
   2194  * the arguments.
   2195  */
   2196 
   2197 SysRes
   2198 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
   2199                             UWord arg1, UWord arg2, UWord arg3,
   2200                             UWord arg4, UWord arg5, Off64T arg6 )
   2201 {
   2202    Addr       advised;
   2203    SysRes     sres;
   2204    MapRequest mreq;
   2205    Bool       mreq_ok;
   2206 
   2207 #  if defined(VGO_darwin)
   2208    // Nb: we can't use this on Darwin, it has races:
   2209    // * needs to RETRY if advisory succeeds but map fails
   2210    //   (could have been some other thread in a nonblocking call)
   2211    // * needs to not use fixed-position mmap() on Darwin
   2212    //   (mmap will cheerfully smash whatever's already there, which might
   2213    //   be a new mapping from some other thread in a nonblocking call)
   2214    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
   2215 #  endif
   2216 
   2217    if (arg2 == 0) {
   2218       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
   2219          shall be established. */
   2220       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2221    }
   2222 
   2223    if (!VG_IS_PAGE_ALIGNED(arg1)) {
   2224       /* zap any misaligned addresses. */
   2225       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
   2226          to fail.   Here, we catch them all. */
   2227       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2228    }
   2229 
   2230    if (!VG_IS_PAGE_ALIGNED(arg6)) {
   2231       /* zap any misaligned offsets. */
   2232       /* SuSV3 says: The off argument is constrained to be aligned and
   2233          sized according to the value returned by sysconf() when
   2234          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
   2235       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2236    }
   2237 
   2238    /* Figure out what kind of allocation constraints there are
   2239       (fixed/hint/any), and ask aspacem what we should do. */
   2240    mreq.start = arg1;
   2241    mreq.len   = arg2;
   2242    if (arg4 & VKI_MAP_FIXED) {
   2243       mreq.rkind = MFixed;
   2244    } else
   2245 #if defined(VKI_MAP_ALIGN) /* Solaris specific */
   2246    if (arg4 & VKI_MAP_ALIGN) {
   2247       mreq.rkind = MAlign;
   2248       if (mreq.start == 0) {
   2249          mreq.start = VKI_PAGE_SIZE;
   2250       }
   2251       /* VKI_MAP_FIXED and VKI_MAP_ALIGN don't like each other. */
   2252       arg4 &= ~VKI_MAP_ALIGN;
   2253    } else
   2254 #endif
   2255    if (arg1 != 0) {
   2256       mreq.rkind = MHint;
   2257    } else {
   2258       mreq.rkind = MAny;
   2259    }
   2260 
   2261    /* Enquire ... */
   2262    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2263    if (!mreq_ok) {
   2264       /* Our request was bounced, so we'd better fail. */
   2265       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2266    }
   2267 
   2268 #  if defined(VKI_MAP_32BIT)
   2269    /* MAP_32BIT is royally unportable, so if the client asks for it, try our
   2270       best to make it work (but without complexifying aspacemgr).
   2271       If the user requested MAP_32BIT, the mmap-ed space must be in the
   2272       first 2GB of the address space. So, return ENOMEM if aspacemgr
   2273       advisory is above the first 2GB. If MAP_FIXED is also requested,
   2274       MAP_32BIT has to be ignored.
   2275       Assumption about aspacemgr behaviour: aspacemgr scans the address space
   2276       from low addresses to find a free segment. No special effort is done
   2277       to keep the first 2GB 'free' for this MAP_32BIT. So, this will often
   2278       fail once the program has already allocated significant memory. */
   2279    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)) {
   2280       if (advised + arg2 >= 0x80000000)
   2281          return VG_(mk_SysRes_Error)( VKI_ENOMEM );
   2282    }
   2283 #  endif
   2284 
   2285    /* Otherwise we're OK (so far).  Install aspacem's choice of
   2286       address, and let the mmap go through.  */
   2287    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2288                                     arg4 | VKI_MAP_FIXED,
   2289                                     arg5, arg6);
   2290 
   2291 #  if defined(VKI_MAP_32BIT)
   2292    /* No recovery trial if the advisory was not accepted. */
   2293    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)
   2294        && sr_isError(sres)) {
   2295       return VG_(mk_SysRes_Error)( VKI_ENOMEM );
   2296    }
   2297 #  endif
   2298 
   2299    /* A refinement: it may be that the kernel refused aspacem's choice
   2300       of address.  If we were originally asked for a hinted mapping,
   2301       there is still a last chance: try again at any address.
   2302       Hence: */
   2303    if (mreq.rkind == MHint && sr_isError(sres)) {
   2304       mreq.start = 0;
   2305       mreq.len   = arg2;
   2306       mreq.rkind = MAny;
   2307       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2308       if (!mreq_ok) {
   2309          /* Our request was bounced, so we'd better fail. */
   2310          return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2311       }
   2312       /* and try again with the kernel */
   2313       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2314                                        arg4 | VKI_MAP_FIXED,
   2315                                        arg5, arg6);
   2316    }
   2317 
   2318    /* Yet another refinement : sometimes valgrind chooses an address
   2319       which is not acceptable by the kernel. This at least happens
   2320       when mmap-ing huge pages, using the flag MAP_HUGETLB.
   2321       valgrind aspacem does not know about huge pages, and modifying
   2322       it to handle huge pages is not straightforward (e.g. need
   2323       to understand special file system mount options).
   2324       So, let's just redo an mmap, without giving any constraint to
   2325       the kernel. If that succeeds, check with aspacem that the returned
   2326       address is acceptable.
   2327       This will give a similar effect as if the user would have
   2328       hinted that address.
   2329       The aspacem state will be correctly updated afterwards.
   2330       We however cannot do this last refinement when the user asked
   2331       for a fixed mapping, as the user asked a specific address. */
   2332    if (sr_isError(sres) && !(arg4 & VKI_MAP_FIXED)) {
   2333       advised = 0;
   2334       /* try mmap with NULL address and without VKI_MAP_FIXED
   2335          to let the kernel decide. */
   2336       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2337                                        arg4,
   2338                                        arg5, arg6);
   2339       if (!sr_isError(sres)) {
   2340          /* The kernel is supposed to know what it is doing, but let's
   2341             do a last sanity check anyway, as if the chosen address had
   2342             been initially hinted by the client. The whole point of this
   2343             last try was to allow mmap of huge pages to succeed without
   2344             making aspacem understand them, on the other hand the kernel
   2345             does not know about valgrind reservations, so this mapping
   2346             can end up in free space and reservations. */
   2347          mreq.start = (Addr)sr_Res(sres);
   2348          mreq.len   = arg2;
   2349          mreq.rkind = MHint;
   2350          advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2351          vg_assert(mreq_ok && advised == mreq.start);
   2352       }
   2353    }
   2354 
   2355    if (!sr_isError(sres)) {
   2356       ULong di_handle;
   2357       /* Notify aspacem. */
   2358       notify_core_of_mmap(
   2359          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2360          arg2, /* length */
   2361          arg3, /* prot */
   2362          arg4, /* the original flags value */
   2363          arg5, /* fd */
   2364          arg6  /* offset */
   2365       );
   2366       /* Load symbols? */
   2367       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
   2368                                        False/*allow_SkFileV*/, (Int)arg5 );
   2369       /* Notify the tool. */
   2370       notify_tool_of_mmap(
   2371          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2372          arg2, /* length */
   2373          arg3, /* prot */
   2374          di_handle /* so the tool can refer to the read debuginfo later,
   2375                       if it wants. */
   2376       );
   2377    }
   2378 
   2379    /* Stay sane */
   2380    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
   2381       vg_assert(sr_Res(sres) == arg1);
   2382 
   2383    return sres;
   2384 }
   2385 
   2386 
   2387 /* ---------------------------------------------------------------------
   2388    The Main Entertainment ... syscall wrappers
   2389    ------------------------------------------------------------------ */
   2390 
   2391 /* Note: the PRE() and POST() wrappers are for the actual functions
   2392    implementing the system calls in the OS kernel.  These mostly have
   2393    names like sys_write();  a few have names like old_mmap().  See the
   2394    comment for ML_(syscall_table)[] for important info about the __NR_foo
   2395    constants and their relationship to the sys_foo() functions.
   2396 
   2397    Some notes about names used for syscalls and args:
   2398    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
   2399      ambiguity.
   2400 
   2401    - For error messages, we generally use a somewhat generic name
   2402      for the syscall (eg. "write" rather than "sys_write").  This should be
   2403      good enough for the average user to understand what is happening,
   2404      without confusing them with names like "sys_write".
   2405 
   2406    - Also, for error messages the arg names are mostly taken from the man
   2407      pages (even though many of those man pages are really for glibc
   2408      functions of the same name), rather than from the OS kernel source,
   2409      for the same reason -- a user presented with a "bogus foo(bar)" arg
   2410      will most likely look at the "foo" man page to see which is the "bar"
   2411      arg.
   2412 
   2413    Note that we use our own vki_* types.  The one exception is in
   2414    PRE_REG_READn calls, where pointer types haven't been changed, because
   2415    they don't need to be -- eg. for "foo*" to be used, the type foo need not
   2416    be visible.
   2417 
   2418    XXX: some of these are arch-specific, and should be factored out.
   2419 */
   2420 
   2421 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
   2422 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
   2423 
   2424 // Macros to support 64-bit syscall args split into two 32 bit values
   2425 #if defined(VG_LITTLEENDIAN)
   2426 #define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2427 #define MERGE64_FIRST(name) name##_low
   2428 #define MERGE64_SECOND(name) name##_high
   2429 #elif defined(VG_BIGENDIAN)
   2430 #define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2431 #define MERGE64_FIRST(name) name##_high
   2432 #define MERGE64_SECOND(name) name##_low
   2433 #else
   2434 #error Unknown endianness
   2435 #endif
   2436 
   2437 PRE(sys_exit)
   2438 {
   2439    ThreadState* tst;
   2440    /* simple; just make this thread exit */
   2441    PRINT("exit( %ld )", SARG1);
   2442    PRE_REG_READ1(void, "exit", int, status);
   2443    tst = VG_(get_ThreadState)(tid);
   2444    /* Set the thread's status to be exiting, then claim that the
   2445       syscall succeeded. */
   2446    tst->exitreason = VgSrc_ExitThread;
   2447    tst->os_state.exitcode = ARG1;
   2448    SET_STATUS_Success(0);
   2449 }
   2450 
   2451 PRE(sys_ni_syscall)
   2452 {
   2453    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
   2454       VG_SYSNUM_STRING(SYSNO));
   2455    PRE_REG_READ0(long, "ni_syscall");
   2456    SET_STATUS_Failure( VKI_ENOSYS );
   2457 }
   2458 
   2459 PRE(sys_iopl)
   2460 {
   2461    PRINT("sys_iopl ( %lu )", ARG1);
   2462    PRE_REG_READ1(long, "iopl", unsigned long, level);
   2463 }
   2464 
   2465 PRE(sys_fsync)
   2466 {
   2467    *flags |= SfMayBlock;
   2468    PRINT("sys_fsync ( %lu )", ARG1);
   2469    PRE_REG_READ1(long, "fsync", unsigned int, fd);
   2470 }
   2471 
   2472 PRE(sys_fdatasync)
   2473 {
   2474    *flags |= SfMayBlock;
   2475    PRINT("sys_fdatasync ( %lu )", ARG1);
   2476    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
   2477 }
   2478 
   2479 PRE(sys_msync)
   2480 {
   2481    *flags |= SfMayBlock;
   2482    PRINT("sys_msync ( %#lx, %lu, %#lx )", ARG1, ARG2, ARG3);
   2483    PRE_REG_READ3(long, "msync",
   2484                  unsigned long, start, vki_size_t, length, int, flags);
   2485    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
   2486 }
   2487 
   2488 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
   2489 // versions of LiS (Linux Streams).  They are not part of the kernel.
   2490 // Therefore, we have to provide this type ourself, rather than getting it
   2491 // from the kernel sources.
   2492 struct vki_pmsg_strbuf {
   2493    int     maxlen;         /* no. of bytes in buffer */
   2494    int     len;            /* no. of bytes returned */
   2495    vki_caddr_t buf;        /* pointer to data */
   2496 };
   2497 PRE(sys_getpmsg)
   2498 {
   2499    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
   2500    struct vki_pmsg_strbuf *ctrl;
   2501    struct vki_pmsg_strbuf *data;
   2502    *flags |= SfMayBlock;
   2503    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", SARG1, ARG2, ARG3,
   2504          ARG4, ARG5);
   2505    PRE_REG_READ5(int, "getpmsg",
   2506                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2507                  int *, bandp, int *, flagsp);
   2508    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2509    data = (struct vki_pmsg_strbuf *)ARG3;
   2510    if (ctrl && ctrl->maxlen > 0)
   2511       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
   2512    if (data && data->maxlen > 0)
   2513       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
   2514    if (ARG4)
   2515       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
   2516    if (ARG5)
   2517       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
   2518 }
   2519 POST(sys_getpmsg)
   2520 {
   2521    struct vki_pmsg_strbuf *ctrl;
   2522    struct vki_pmsg_strbuf *data;
   2523    vg_assert(SUCCESS);
   2524    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2525    data = (struct vki_pmsg_strbuf *)ARG3;
   2526    if (RES == 0 && ctrl && ctrl->len > 0) {
   2527       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
   2528    }
   2529    if (RES == 0 && data && data->len > 0) {
   2530       POST_MEM_WRITE( (Addr)data->buf, data->len);
   2531    }
   2532 }
   2533 
   2534 PRE(sys_putpmsg)
   2535 {
   2536    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
   2537    struct vki_pmsg_strbuf *ctrl;
   2538    struct vki_pmsg_strbuf *data;
   2539    *flags |= SfMayBlock;
   2540    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", SARG1, ARG2, ARG3,
   2541          SARG4, SARG5);
   2542    PRE_REG_READ5(int, "putpmsg",
   2543                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2544                  int, band, int, flags);
   2545    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2546    data = (struct vki_pmsg_strbuf *)ARG3;
   2547    if (ctrl && ctrl->len > 0)
   2548       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
   2549    if (data && data->len > 0)
   2550       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
   2551 }
   2552 
   2553 PRE(sys_getitimer)
   2554 {
   2555    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2556    PRINT("sys_getitimer ( %ld, %#lx )", SARG1, ARG2);
   2557    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
   2558 
   2559    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
   2560    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
   2561 }
   2562 
   2563 POST(sys_getitimer)
   2564 {
   2565    if (ARG2 != (Addr)NULL) {
   2566       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2567       POST_timeval_WRITE( &(value->it_interval) );
   2568       POST_timeval_WRITE( &(value->it_value) );
   2569    }
   2570 }
   2571 
   2572 PRE(sys_setitimer)
   2573 {
   2574    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", SARG1, ARG2, ARG3);
   2575    PRE_REG_READ3(long, "setitimer",
   2576                  int, which,
   2577                  struct itimerval *, value, struct itimerval *, ovalue);
   2578    if (ARG2 != (Addr)NULL) {
   2579       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2580       PRE_timeval_READ( "setitimer(&value->it_interval)",
   2581                          &(value->it_interval));
   2582       PRE_timeval_READ( "setitimer(&value->it_value)",
   2583                          &(value->it_value));
   2584    }
   2585    if (ARG3 != (Addr)NULL) {
   2586       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2587       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
   2588                          &(ovalue->it_interval));
   2589       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
   2590                          &(ovalue->it_value));
   2591    }
   2592 }
   2593 
   2594 POST(sys_setitimer)
   2595 {
   2596    if (ARG3 != (Addr)NULL) {
   2597       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2598       POST_timeval_WRITE( &(ovalue->it_interval) );
   2599       POST_timeval_WRITE( &(ovalue->it_value) );
   2600    }
   2601 }
   2602 
   2603 PRE(sys_chroot)
   2604 {
   2605    PRINT("sys_chroot ( %#lx )", ARG1);
   2606    PRE_REG_READ1(long, "chroot", const char *, path);
   2607    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
   2608 }
   2609 
   2610 PRE(sys_madvise)
   2611 {
   2612    *flags |= SfMayBlock;
   2613    PRINT("sys_madvise ( %#lx, %lu, %ld )", ARG1, ARG2, SARG3);
   2614    PRE_REG_READ3(long, "madvise",
   2615                  unsigned long, start, vki_size_t, length, int, advice);
   2616 }
   2617 
   2618 #if HAVE_MREMAP
   2619 PRE(sys_mremap)
   2620 {
   2621    // Nb: this is different to the glibc version described in the man pages,
   2622    // which lacks the fifth 'new_address' argument.
   2623    if (ARG4 & VKI_MREMAP_FIXED) {
   2624       PRINT("sys_mremap ( %#lx, %lu, %lu, %#lx, %#lx )",
   2625             ARG1, ARG2, ARG3, ARG4, ARG5);
   2626       PRE_REG_READ5(unsigned long, "mremap",
   2627                     unsigned long, old_addr, unsigned long, old_size,
   2628                     unsigned long, new_size, unsigned long, flags,
   2629                     unsigned long, new_addr);
   2630    } else {
   2631       PRINT("sys_mremap ( %#lx, %lu, %lu, 0x%lx )",
   2632             ARG1, ARG2, ARG3, ARG4);
   2633       PRE_REG_READ4(unsigned long, "mremap",
   2634                     unsigned long, old_addr, unsigned long, old_size,
   2635                     unsigned long, new_size, unsigned long, flags);
   2636    }
   2637    SET_STATUS_from_SysRes(
   2638       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
   2639    );
   2640 }
   2641 #endif /* HAVE_MREMAP */
   2642 
   2643 PRE(sys_nice)
   2644 {
   2645    PRINT("sys_nice ( %ld )", SARG1);
   2646    PRE_REG_READ1(long, "nice", int, inc);
   2647 }
   2648 
   2649 PRE(sys_mlock)
   2650 {
   2651    *flags |= SfMayBlock;
   2652    PRINT("sys_mlock ( %#lx, %lu )", ARG1, ARG2);
   2653    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
   2654 }
   2655 
   2656 PRE(sys_munlock)
   2657 {
   2658    *flags |= SfMayBlock;
   2659    PRINT("sys_munlock ( %#lx, %lu )", ARG1, ARG2);
   2660    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
   2661 }
   2662 
   2663 PRE(sys_mlockall)
   2664 {
   2665    *flags |= SfMayBlock;
   2666    PRINT("sys_mlockall ( %lx )", ARG1);
   2667    PRE_REG_READ1(long, "mlockall", int, flags);
   2668 }
   2669 
   2670 PRE(sys_setpriority)
   2671 {
   2672    PRINT("sys_setpriority ( %ld, %ld, %ld )", SARG1, SARG2, SARG3);
   2673    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
   2674 }
   2675 
   2676 PRE(sys_getpriority)
   2677 {
   2678    PRINT("sys_getpriority ( %ld, %ld )", SARG1, SARG2);
   2679    PRE_REG_READ2(long, "getpriority", int, which, int, who);
   2680 }
   2681 
   2682 PRE(sys_pwrite64)
   2683 {
   2684    *flags |= SfMayBlock;
   2685 #if VG_WORDSIZE == 4
   2686    PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %lld )",
   2687          ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
   2688    PRE_REG_READ5(ssize_t, "pwrite64",
   2689                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2690                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2691 #elif VG_WORDSIZE == 8
   2692    PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %ld )",
   2693          ARG1, ARG2, ARG3, SARG4);
   2694    PRE_REG_READ4(ssize_t, "pwrite64",
   2695                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2696                  Word, offset);
   2697 #else
   2698 #  error Unexpected word size
   2699 #endif
   2700    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
   2701 }
   2702 
   2703 PRE(sys_sync)
   2704 {
   2705    *flags |= SfMayBlock;
   2706    PRINT("sys_sync ( )");
   2707    PRE_REG_READ0(long, "sync");
   2708 }
   2709 
   2710 PRE(sys_fstatfs)
   2711 {
   2712    FUSE_COMPATIBLE_MAY_BLOCK();
   2713    PRINT("sys_fstatfs ( %lu, %#lx )", ARG1, ARG2);
   2714    PRE_REG_READ2(long, "fstatfs",
   2715                  unsigned int, fd, struct statfs *, buf);
   2716    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
   2717 }
   2718 
   2719 POST(sys_fstatfs)
   2720 {
   2721    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   2722 }
   2723 
   2724 PRE(sys_fstatfs64)
   2725 {
   2726    FUSE_COMPATIBLE_MAY_BLOCK();
   2727    PRINT("sys_fstatfs64 ( %lu, %lu, %#lx )", ARG1, ARG2, ARG3);
   2728    PRE_REG_READ3(long, "fstatfs64",
   2729                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
   2730    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
   2731 }
   2732 POST(sys_fstatfs64)
   2733 {
   2734    POST_MEM_WRITE( ARG3, ARG2 );
   2735 }
   2736 
   2737 PRE(sys_getsid)
   2738 {
   2739    PRINT("sys_getsid ( %ld )", SARG1);
   2740    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
   2741 }
   2742 
   2743 PRE(sys_pread64)
   2744 {
   2745    *flags |= SfMayBlock;
   2746 #if VG_WORDSIZE == 4
   2747    PRINT("sys_pread64 ( %lu, %#lx, %lu, %lld )",
   2748          ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
   2749    PRE_REG_READ5(ssize_t, "pread64",
   2750                  unsigned int, fd, char *, buf, vki_size_t, count,
   2751                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2752 #elif VG_WORDSIZE == 8
   2753    PRINT("sys_pread64 ( %lu, %#lx, %lu, %ld )",
   2754          ARG1, ARG2, ARG3, SARG4);
   2755    PRE_REG_READ4(ssize_t, "pread64",
   2756                  unsigned int, fd, char *, buf, vki_size_t, count,
   2757                  Word, offset);
   2758 #else
   2759 #  error Unexpected word size
   2760 #endif
   2761    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
   2762 }
   2763 POST(sys_pread64)
   2764 {
   2765    vg_assert(SUCCESS);
   2766    if (RES > 0) {
   2767       POST_MEM_WRITE( ARG2, RES );
   2768    }
   2769 }
   2770 
   2771 PRE(sys_mknod)
   2772 {
   2773    FUSE_COMPATIBLE_MAY_BLOCK();
   2774    PRINT("sys_mknod ( %#lx(%s), %#lx, %#lx )", ARG1, (HChar*)ARG1, ARG2, ARG3 );
   2775    PRE_REG_READ3(long, "mknod",
   2776                  const char *, pathname, int, mode, unsigned, dev);
   2777    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
   2778 }
   2779 
   2780 PRE(sys_flock)
   2781 {
   2782    *flags |= SfMayBlock;
   2783    PRINT("sys_flock ( %lu, %lu )", ARG1, ARG2 );
   2784    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
   2785 }
   2786 
   2787 // Pre_read a char** argument.
   2788 void ML_(pre_argv_envp)(Addr a, ThreadId tid, const HChar *s1, const HChar *s2)
   2789 {
   2790    while (True) {
   2791       Addr a_deref;
   2792       Addr* a_p = (Addr*)a;
   2793       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
   2794       a_deref = *a_p;
   2795       if (0 == a_deref)
   2796          break;
   2797       PRE_MEM_RASCIIZ( s2, a_deref );
   2798       a += sizeof(char*);
   2799    }
   2800 }
   2801 
   2802 static Bool i_am_the_only_thread ( void )
   2803 {
   2804    Int c = VG_(count_living_threads)();
   2805    vg_assert(c >= 1); /* stay sane */
   2806    return c == 1;
   2807 }
   2808 
   2809 /* Wait until all other threads disappear. */
   2810 void VG_(reap_threads)(ThreadId self)
   2811 {
   2812    while (!i_am_the_only_thread()) {
   2813       /* Let other thread(s) run */
   2814       VG_(vg_yield)();
   2815       VG_(poll_signals)(self);
   2816    }
   2817    vg_assert(i_am_the_only_thread());
   2818 }
   2819 
   2820 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
   2821 // but it seems to work nonetheless...
   2822 PRE(sys_execve)
   2823 {
   2824    HChar*       path = NULL;       /* path to executable */
   2825    HChar**      envp = NULL;
   2826    HChar**      argv = NULL;
   2827    HChar**      arg2copy;
   2828    HChar*       launcher_basename = NULL;
   2829    ThreadState* tst;
   2830    Int          i, j, tot_args;
   2831    SysRes       res;
   2832    Bool         setuid_allowed, trace_this_child;
   2833 
   2834    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
   2835    PRE_REG_READ3(vki_off_t, "execve",
   2836                  char *, filename, char **, argv, char **, envp);
   2837    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
   2838    if (ARG2 != 0)
   2839       ML_(pre_argv_envp)( ARG2, tid, "execve(argv)", "execve(argv[i])" );
   2840    if (ARG3 != 0)
   2841       ML_(pre_argv_envp)( ARG3, tid, "execve(envp)", "execve(envp[i])" );
   2842 
   2843    vg_assert(VG_(is_valid_tid)(tid));
   2844    tst = VG_(get_ThreadState)(tid);
   2845 
   2846    /* Erk.  If the exec fails, then the following will have made a
   2847       mess of things which makes it hard for us to continue.  The
   2848       right thing to do is piece everything together again in
   2849       POST(execve), but that's close to impossible.  Instead, we make
   2850       an effort to check that the execve will work before actually
   2851       doing it. */
   2852 
   2853    /* Check that the name at least begins in client-accessible storage. */
   2854    if (ARG1 == 0 /* obviously bogus */
   2855        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
   2856       SET_STATUS_Failure( VKI_EFAULT );
   2857       return;
   2858    }
   2859 
   2860    // debug-only printing
   2861    if (0) {
   2862       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
   2863       if (ARG2) {
   2864          VG_(printf)("ARG2 = ");
   2865          Int q;
   2866          HChar** vec = (HChar**)ARG2;
   2867          for (q = 0; vec[q]; q++)
   2868             VG_(printf)("%p(%s) ", vec[q], vec[q]);
   2869          VG_(printf)("\n");
   2870       } else {
   2871          VG_(printf)("ARG2 = null\n");
   2872       }
   2873    }
   2874 
   2875    // Decide whether or not we want to follow along
   2876    { // Make 'child_argv' be a pointer to the child's arg vector
   2877      // (skipping the exe name)
   2878      const HChar** child_argv = (const HChar**)ARG2;
   2879      if (child_argv && child_argv[0] == NULL)
   2880         child_argv = NULL;
   2881      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
   2882    }
   2883 
   2884    // Do the important checks:  it is a file, is executable, permissions are
   2885    // ok, etc.  We allow setuid executables to run only in the case when
   2886    // we are not simulating them, that is, they to be run natively.
   2887    setuid_allowed = trace_this_child  ? False  : True;
   2888    res = VG_(pre_exec_check)((const HChar *)ARG1, NULL, setuid_allowed);
   2889    if (sr_isError(res)) {
   2890       SET_STATUS_Failure( sr_Err(res) );
   2891       return;
   2892    }
   2893 
   2894    /* If we're tracing the child, and the launcher name looks bogus
   2895       (possibly because launcher.c couldn't figure it out, see
   2896       comments therein) then we have no option but to fail. */
   2897    if (trace_this_child
   2898        && (VG_(name_of_launcher) == NULL
   2899            || VG_(name_of_launcher)[0] != '/')) {
   2900       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
   2901       return;
   2902    }
   2903 
   2904    /* After this point, we can't recover if the execve fails. */
   2905    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)ARG1);
   2906 
   2907 
   2908    // Terminate gdbserver if it is active.
   2909    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
   2910       // If the child will not be traced, we need to terminate gdbserver
   2911       // to cleanup the gdbserver resources (e.g. the FIFO files).
   2912       // If child will be traced, we also terminate gdbserver: the new
   2913       // Valgrind will start a fresh gdbserver after exec.
   2914       VG_(gdbserver) (0);
   2915    }
   2916 
   2917    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
   2918       this. (Really, nuke them all, since the new process will make
   2919       its own new thread.) */
   2920    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
   2921    VG_(reap_threads)(tid);
   2922 
   2923    // Set up the child's exe path.
   2924    //
   2925    if (trace_this_child) {
   2926 
   2927       // We want to exec the launcher.  Get its pre-remembered path.
   2928       path = VG_(name_of_launcher);
   2929       // VG_(name_of_launcher) should have been acquired by m_main at
   2930       // startup.
   2931       vg_assert(path);
   2932 
   2933       launcher_basename = VG_(strrchr)(path, '/');
   2934       if (launcher_basename == NULL || launcher_basename[1] == 0) {
   2935          launcher_basename = path;  // hmm, tres dubious
   2936       } else {
   2937          launcher_basename++;
   2938       }
   2939 
   2940    } else {
   2941       path = (HChar*)ARG1;
   2942    }
   2943 
   2944    // Set up the child's environment.
   2945    //
   2946    // Remove the valgrind-specific stuff from the environment so the
   2947    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
   2948    // This is done unconditionally, since if we are tracing the child,
   2949    // the child valgrind will set up the appropriate client environment.
   2950    // Nb: we make a copy of the environment before trying to mangle it
   2951    // as it might be in read-only memory (this was bug #101881).
   2952    //
   2953    // Then, if tracing the child, set VALGRIND_LIB for it.
   2954    //
   2955    if (ARG3 == 0) {
   2956       envp = NULL;
   2957    } else {
   2958       envp = VG_(env_clone)( (HChar**)ARG3 );
   2959       if (envp == NULL) goto hosed;
   2960       VG_(env_remove_valgrind_env_stuff)( envp, True /*ro_strings*/, NULL );
   2961    }
   2962 
   2963    if (trace_this_child) {
   2964       // Set VALGRIND_LIB in ARG3 (the environment)
   2965       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
   2966    }
   2967 
   2968    // Set up the child's args.  If not tracing it, they are
   2969    // simply ARG2.  Otherwise, they are
   2970    //
   2971    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
   2972    //
   2973    // except that the first VG_(args_for_valgrind_noexecpass) args
   2974    // are omitted.
   2975    //
   2976    if (!trace_this_child) {
   2977       argv = (HChar**)ARG2;
   2978    } else {
   2979       vg_assert( VG_(args_for_valgrind) );
   2980       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
   2981       vg_assert( VG_(args_for_valgrind_noexecpass)
   2982                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
   2983       /* how many args in total will there be? */
   2984       // launcher basename
   2985       tot_args = 1;
   2986       // V's args
   2987       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
   2988       tot_args -= VG_(args_for_valgrind_noexecpass);
   2989       // name of client exe
   2990       tot_args++;
   2991       // args for client exe, skipping [0]
   2992       arg2copy = (HChar**)ARG2;
   2993       if (arg2copy && arg2copy[0]) {
   2994          for (i = 1; arg2copy[i]; i++)
   2995             tot_args++;
   2996       }
   2997       // allocate
   2998       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
   2999                           (tot_args+1) * sizeof(HChar*) );
   3000       // copy
   3001       j = 0;
   3002       argv[j++] = launcher_basename;
   3003       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
   3004          if (i < VG_(args_for_valgrind_noexecpass))
   3005             continue;
   3006          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
   3007       }
   3008       argv[j++] = (HChar*)ARG1;
   3009       if (arg2copy && arg2copy[0])
   3010          for (i = 1; arg2copy[i]; i++)
   3011             argv[j++] = arg2copy[i];
   3012       argv[j++] = NULL;
   3013       // check
   3014       vg_assert(j == tot_args+1);
   3015    }
   3016 
   3017    /* restore the DATA rlimit for the child */
   3018    VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
   3019 
   3020    /*
   3021       Set the signal state up for exec.
   3022 
   3023       We need to set the real signal state to make sure the exec'd
   3024       process gets SIG_IGN properly.
   3025 
   3026       Also set our real sigmask to match the client's sigmask so that
   3027       the exec'd child will get the right mask.  First we need to
   3028       clear out any pending signals so they they don't get delivered,
   3029       which would confuse things.
   3030 
   3031       XXX This is a bug - the signals should remain pending, and be
   3032       delivered to the new process after exec.  There's also a
   3033       race-condition, since if someone delivers us a signal between
   3034       the sigprocmask and the execve, we'll still get the signal. Oh
   3035       well.
   3036    */
   3037    {
   3038       vki_sigset_t allsigs;
   3039       vki_siginfo_t info;
   3040 
   3041       /* What this loop does: it queries SCSS (the signal state that
   3042          the client _thinks_ the kernel is in) by calling
   3043          VG_(do_sys_sigaction), and modifies the real kernel signal
   3044          state accordingly. */
   3045       for (i = 1; i < VG_(max_signal); i++) {
   3046          vki_sigaction_fromK_t sa_f;
   3047          vki_sigaction_toK_t   sa_t;
   3048          VG_(do_sys_sigaction)(i, NULL, &sa_f);
   3049          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
   3050          if (sa_t.ksa_handler == VKI_SIG_IGN)
   3051             VG_(sigaction)(i, &sa_t, NULL);
   3052          else {
   3053             sa_t.ksa_handler = VKI_SIG_DFL;
   3054             VG_(sigaction)(i, &sa_t, NULL);
   3055          }
   3056       }
   3057 
   3058       VG_(sigfillset)(&allsigs);
   3059       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
   3060          ;
   3061 
   3062       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
   3063    }
   3064 
   3065    if (0) {
   3066       HChar **cpp;
   3067       VG_(printf)("exec: %s\n", path);
   3068       for (cpp = argv; cpp && *cpp; cpp++)
   3069          VG_(printf)("argv: %s\n", *cpp);
   3070       if (0)
   3071          for (cpp = envp; cpp && *cpp; cpp++)
   3072             VG_(printf)("env: %s\n", *cpp);
   3073    }
   3074 
   3075    SET_STATUS_from_SysRes(
   3076       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
   3077    );
   3078 
   3079    /* If we got here, then the execve failed.  We've already made way
   3080       too much of a mess to continue, so we have to abort. */
   3081   hosed:
   3082    vg_assert(FAILURE);
   3083    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %lu\n",
   3084                 ARG1, (HChar*)ARG1, ARG2, ARG3, ERR);
   3085    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
   3086                             "execve() failing, so I'm dying.\n");
   3087    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
   3088                             "or work out how to recover.\n");
   3089    VG_(exit)(101);
   3090 }
   3091 
   3092 PRE(sys_access)
   3093 {
   3094    PRINT("sys_access ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
   3095    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
   3096    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
   3097 }
   3098 
   3099 PRE(sys_alarm)
   3100 {
   3101    PRINT("sys_alarm ( %lu )", ARG1);
   3102    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
   3103 }
   3104 
   3105 PRE(sys_brk)
   3106 {
   3107    Addr brk_limit = VG_(brk_limit);
   3108    Addr brk_new;
   3109 
   3110    /* libc   says: int   brk(void *end_data_segment);
   3111       kernel says: void* brk(void* end_data_segment);  (more or less)
   3112 
   3113       libc returns 0 on success, and -1 (and sets errno) on failure.
   3114       Nb: if you ask to shrink the dataseg end below what it
   3115       currently is, that always succeeds, even if the dataseg end
   3116       doesn't actually change (eg. brk(0)).  Unless it seg faults.
   3117 
   3118       Kernel returns the new dataseg end.  If the brk() failed, this
   3119       will be unchanged from the old one.  That's why calling (kernel)
   3120       brk(0) gives the current dataseg end (libc brk() just returns
   3121       zero in that case).
   3122 
   3123       Both will seg fault if you shrink it back into a text segment.
   3124    */
   3125    PRINT("sys_brk ( %#lx )", ARG1);
   3126    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
   3127 
   3128    brk_new = do_brk(ARG1, tid);
   3129    SET_STATUS_Success( brk_new );
   3130 
   3131    if (brk_new == ARG1) {
   3132       /* brk() succeeded */
   3133       if (brk_new < brk_limit) {
   3134          /* successfully shrunk the data segment. */
   3135          VG_TRACK( die_mem_brk, (Addr)ARG1,
   3136 		   brk_limit-ARG1 );
   3137       } else
   3138       if (brk_new > brk_limit) {
   3139          /* successfully grew the data segment */
   3140          VG_TRACK( new_mem_brk, brk_limit,
   3141                    ARG1-brk_limit, tid );
   3142       }
   3143    } else {
   3144       /* brk() failed */
   3145       vg_assert(brk_limit == brk_new);
   3146    }
   3147 }
   3148 
   3149 PRE(sys_chdir)
   3150 {
   3151    FUSE_COMPATIBLE_MAY_BLOCK();
   3152    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   3153    PRE_REG_READ1(long, "chdir", const char *, path);
   3154    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
   3155 }
   3156 
   3157 PRE(sys_chmod)
   3158 {
   3159    FUSE_COMPATIBLE_MAY_BLOCK();
   3160    PRINT("sys_chmod ( %#lx(%s), %lu )", ARG1, (HChar*)ARG1, ARG2);
   3161    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
   3162    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
   3163 }
   3164 
   3165 PRE(sys_chown)
   3166 {
   3167    FUSE_COMPATIBLE_MAY_BLOCK();
   3168    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   3169    PRE_REG_READ3(long, "chown",
   3170                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   3171    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
   3172 }
   3173 
   3174 PRE(sys_lchown)
   3175 {
   3176    FUSE_COMPATIBLE_MAY_BLOCK();
   3177    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   3178    PRE_REG_READ3(long, "lchown",
   3179                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   3180    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
   3181 }
   3182 
   3183 PRE(sys_close)
   3184 {
   3185    FUSE_COMPATIBLE_MAY_BLOCK();
   3186    PRINT("sys_close ( %lu )", ARG1);
   3187    PRE_REG_READ1(long, "close", unsigned int, fd);
   3188 
   3189    /* Detect and negate attempts by the client to close Valgrind's log fd */
   3190    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
   3191         /* If doing -d style logging (which is to fd=2), don't
   3192            allow that to be closed either. */
   3193         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
   3194       SET_STATUS_Failure( VKI_EBADF );
   3195 }
   3196 
   3197 POST(sys_close)
   3198 {
   3199    if (VG_(clo_track_fds)) ML_(record_fd_close)(ARG1);
   3200 }
   3201 
   3202 PRE(sys_dup)
   3203 {
   3204    PRINT("sys_dup ( %lu )", ARG1);
   3205    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
   3206 }
   3207 
   3208 POST(sys_dup)
   3209 {
   3210    vg_assert(SUCCESS);
   3211    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
   3212       VG_(close)(RES);
   3213       SET_STATUS_Failure( VKI_EMFILE );
   3214    } else {
   3215       if (VG_(clo_track_fds))
   3216          ML_(record_fd_open_named)(tid, RES);
   3217    }
   3218 }
   3219 
   3220 PRE(sys_dup2)
   3221 {
   3222    PRINT("sys_dup2 ( %lu, %lu )", ARG1, ARG2);
   3223    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
   3224    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
   3225       SET_STATUS_Failure( VKI_EBADF );
   3226 }
   3227 
   3228 POST(sys_dup2)
   3229 {
   3230    vg_assert(SUCCESS);
   3231    if (VG_(clo_track_fds))
   3232       ML_(record_fd_open_named)(tid, RES);
   3233 }
   3234 
   3235 PRE(sys_fchdir)
   3236 {
   3237    FUSE_COMPATIBLE_MAY_BLOCK();
   3238    PRINT("sys_fchdir ( %lu )", ARG1);
   3239    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
   3240 }
   3241 
   3242 PRE(sys_fchown)
   3243 {
   3244    FUSE_COMPATIBLE_MAY_BLOCK();
   3245    PRINT("sys_fchown ( %lu, %lu, %lu )", ARG1, ARG2, ARG3);
   3246    PRE_REG_READ3(long, "fchown",
   3247                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
   3248 }
   3249 
   3250 PRE(sys_fchmod)
   3251 {
   3252    FUSE_COMPATIBLE_MAY_BLOCK();
   3253    PRINT("sys_fchmod ( %lu, %lu )", ARG1, ARG2);
   3254    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
   3255 }
   3256 
   3257 PRE(sys_newfstat)
   3258 {
   3259    FUSE_COMPATIBLE_MAY_BLOCK();
   3260    PRINT("sys_newfstat ( %lu, %#lx )", ARG1, ARG2);
   3261    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
   3262    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
   3263 }
   3264 
   3265 POST(sys_newfstat)
   3266 {
   3267    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3268 }
   3269 
   3270 #if !defined(VGO_solaris)
   3271 static vki_sigset_t fork_saved_mask;
   3272 
   3273 // In Linux, the sys_fork() function varies across architectures, but we
   3274 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
   3275 PRE(sys_fork)
   3276 {
   3277    Bool is_child;
   3278    Int child_pid;
   3279    vki_sigset_t mask;
   3280 
   3281    PRINT("sys_fork ( )");
   3282    PRE_REG_READ0(long, "fork");
   3283 
   3284    /* Block all signals during fork, so that we can fix things up in
   3285       the child without being interrupted. */
   3286    VG_(sigfillset)(&mask);
   3287    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
   3288 
   3289    VG_(do_atfork_pre)(tid);
   3290 
   3291    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
   3292 
   3293    if (!SUCCESS) return;
   3294 
   3295 #if defined(VGO_linux)
   3296    // RES is 0 for child, non-0 (the child's PID) for parent.
   3297    is_child = ( RES == 0 ? True : False );
   3298    child_pid = ( is_child ? -1 : RES );
   3299 #elif defined(VGO_darwin)
   3300    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
   3301    is_child = RESHI;
   3302    child_pid = RES;
   3303 #else
   3304 #  error Unknown OS
   3305 #endif
   3306 
   3307    if (is_child) {
   3308       VG_(do_atfork_child)(tid);
   3309 
   3310       /* restore signal mask */
   3311       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3312 
   3313       /* If --child-silent-after-fork=yes was specified, set the
   3314          output file descriptors to 'impossible' values.  This is
   3315          noticed by send_bytes_to_logging_sink in m_libcprint.c, which
   3316          duly stops writing any further output. */
   3317       if (VG_(clo_child_silent_after_fork)) {
   3318          if (!VG_(log_output_sink).is_socket)
   3319             VG_(log_output_sink).fd = -1;
   3320          if (!VG_(xml_output_sink).is_socket)
   3321             VG_(xml_output_sink).fd = -1;
   3322       }
   3323 
   3324    } else {
   3325       VG_(do_atfork_parent)(tid);
   3326 
   3327       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
   3328 
   3329       /* restore signal mask */
   3330       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3331    }
   3332 }
   3333 #endif // !defined(VGO_solaris)
   3334 
   3335 PRE(sys_ftruncate)
   3336 {
   3337    *flags |= SfMayBlock;
   3338    PRINT("sys_ftruncate ( %lu, %lu )", ARG1, ARG2);
   3339    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
   3340 }
   3341 
   3342 PRE(sys_truncate)
   3343 {
   3344    *flags |= SfMayBlock;
   3345    PRINT("sys_truncate ( %#lx(%s), %lu )", ARG1, (HChar*)ARG1, ARG2);
   3346    PRE_REG_READ2(long, "truncate",
   3347                  const char *, path, unsigned long, length);
   3348    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
   3349 }
   3350 
   3351 PRE(sys_ftruncate64)
   3352 {
   3353    *flags |= SfMayBlock;
   3354 #if VG_WORDSIZE == 4
   3355    PRINT("sys_ftruncate64 ( %lu, %llu )", ARG1, MERGE64(ARG2,ARG3));
   3356    PRE_REG_READ3(long, "ftruncate64",
   3357                  unsigned int, fd,
   3358                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3359 #else
   3360    PRINT("sys_ftruncate64 ( %lu, %lu )", ARG1, ARG2);
   3361    PRE_REG_READ2(long, "ftruncate64",
   3362                  unsigned int,fd, UWord,length);
   3363 #endif
   3364 }
   3365 
   3366 PRE(sys_truncate64)
   3367 {
   3368    *flags |= SfMayBlock;
   3369 #if VG_WORDSIZE == 4
   3370    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
   3371    PRE_REG_READ3(long, "truncate64",
   3372                  const char *, path,
   3373                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3374 #else
   3375    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
   3376    PRE_REG_READ2(long, "truncate64",
   3377                  const char *,path, UWord,length);
   3378 #endif
   3379    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
   3380 }
   3381 
   3382 PRE(sys_getdents)
   3383 {
   3384    *flags |= SfMayBlock;
   3385    PRINT("sys_getdents ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   3386    PRE_REG_READ3(long, "getdents",
   3387                  unsigned int, fd, struct vki_dirent *, dirp,
   3388                  unsigned int, count);
   3389    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
   3390 }
   3391 
   3392 POST(sys_getdents)
   3393 {
   3394    vg_assert(SUCCESS);
   3395    if (RES > 0)
   3396       POST_MEM_WRITE( ARG2, RES );
   3397 }
   3398 
   3399 PRE(sys_getdents64)
   3400 {
   3401    *flags |= SfMayBlock;
   3402    PRINT("sys_getdents64 ( %lu, %#lx, %lu )",ARG1, ARG2, ARG3);
   3403    PRE_REG_READ3(long, "getdents64",
   3404                  unsigned int, fd, struct vki_dirent64 *, dirp,
   3405                  unsigned int, count);
   3406    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
   3407 }
   3408 
   3409 POST(sys_getdents64)
   3410 {
   3411    vg_assert(SUCCESS);
   3412    if (RES > 0)
   3413       POST_MEM_WRITE( ARG2, RES );
   3414 }
   3415 
   3416 PRE(sys_getgroups)
   3417 {
   3418    PRINT("sys_getgroups ( %ld, %#lx )", SARG1, ARG2);
   3419    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
   3420    if (ARG1 > 0)
   3421       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   3422 }
   3423 
   3424 POST(sys_getgroups)
   3425 {
   3426    vg_assert(SUCCESS);
   3427    if (ARG1 > 0 && RES > 0)
   3428       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
   3429 }
   3430 
   3431 PRE(sys_getcwd)
   3432 {
   3433    // Comment from linux/fs/dcache.c:
   3434    //   NOTE! The user-level library version returns a character pointer.
   3435    //   The kernel system call just returns the length of the buffer filled
   3436    //   (which includes the ending '\0' character), or a negative error
   3437    //   value.
   3438    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
   3439    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3440    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
   3441    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
   3442 }
   3443 
   3444 POST(sys_getcwd)
   3445 {
   3446    vg_assert(SUCCESS);
   3447    if (RES != (Addr)NULL)
   3448       POST_MEM_WRITE( ARG1, RES );
   3449 }
   3450 
   3451 PRE(sys_geteuid)
   3452 {
   3453    PRINT("sys_geteuid ( )");
   3454    PRE_REG_READ0(long, "geteuid");
   3455 }
   3456 
   3457 PRE(sys_getegid)
   3458 {
   3459    PRINT("sys_getegid ( )");
   3460    PRE_REG_READ0(long, "getegid");
   3461 }
   3462 
   3463 PRE(sys_getgid)
   3464 {
   3465    PRINT("sys_getgid ( )");
   3466    PRE_REG_READ0(long, "getgid");
   3467 }
   3468 
   3469 PRE(sys_getpid)
   3470 {
   3471    PRINT("sys_getpid ()");
   3472    PRE_REG_READ0(long, "getpid");
   3473 }
   3474 
   3475 PRE(sys_getpgid)
   3476 {
   3477    PRINT("sys_getpgid ( %ld )", SARG1);
   3478    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
   3479 }
   3480 
   3481 PRE(sys_getpgrp)
   3482 {
   3483    PRINT("sys_getpgrp ()");
   3484    PRE_REG_READ0(long, "getpgrp");
   3485 }
   3486 
   3487 PRE(sys_getppid)
   3488 {
   3489    PRINT("sys_getppid ()");
   3490    PRE_REG_READ0(long, "getppid");
   3491 }
   3492 
   3493 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
   3494 {
   3495    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
   3496 
   3497 #ifdef _RLIMIT_POSIX_FLAG
   3498    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
   3499    // Unset it here to make the switch case below work correctly.
   3500    a1 &= ~_RLIMIT_POSIX_FLAG;
   3501 #endif
   3502 
   3503    switch (a1) {
   3504    case VKI_RLIMIT_NOFILE:
   3505       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
   3506       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
   3507       break;
   3508 
   3509    case VKI_RLIMIT_DATA:
   3510       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
   3511       break;
   3512 
   3513    case VKI_RLIMIT_STACK:
   3514       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
   3515       break;
   3516    }
   3517 }
   3518 
   3519 PRE(sys_old_getrlimit)
   3520 {
   3521    PRINT("sys_old_getrlimit ( %lu, %#lx )", ARG1, ARG2);
   3522    PRE_REG_READ2(long, "old_getrlimit",
   3523                  unsigned int, resource, struct rlimit *, rlim);
   3524    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3525 }
   3526 
   3527 POST(sys_old_getrlimit)
   3528 {
   3529    common_post_getrlimit(tid, ARG1, ARG2);
   3530 }
   3531 
   3532 PRE(sys_getrlimit)
   3533 {
   3534    PRINT("sys_getrlimit ( %lu, %#lx )", ARG1, ARG2);
   3535    PRE_REG_READ2(long, "getrlimit",
   3536                  unsigned int, resource, struct rlimit *, rlim);
   3537    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3538 }
   3539 
   3540 POST(sys_getrlimit)
   3541 {
   3542    common_post_getrlimit(tid, ARG1, ARG2);
   3543 }
   3544 
   3545 PRE(sys_getrusage)
   3546 {
   3547    PRINT("sys_getrusage ( %ld, %#lx )", SARG1, ARG2);
   3548    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
   3549    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
   3550 }
   3551 
   3552 POST(sys_getrusage)
   3553 {
   3554    vg_assert(SUCCESS);
   3555    if (RES == 0)
   3556       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
   3557 }
   3558 
   3559 PRE(sys_gettimeofday)
   3560 {
   3561    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3562    PRE_REG_READ2(long, "gettimeofday",
   3563                  struct timeval *, tv, struct timezone *, tz);
   3564    // GrP fixme does darwin write to *tz anymore?
   3565    if (ARG1 != 0)
   3566       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
   3567    if (ARG2 != 0)
   3568       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3569 }
   3570 
   3571 POST(sys_gettimeofday)
   3572 {
   3573    vg_assert(SUCCESS);
   3574    if (RES == 0) {
   3575       if (ARG1 != 0)
   3576          POST_timeval_WRITE( ARG1 );
   3577       if (ARG2 != 0)
   3578 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
   3579    }
   3580 }
   3581 
   3582 PRE(sys_settimeofday)
   3583 {
   3584    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3585    PRE_REG_READ2(long, "settimeofday",
   3586                  struct timeval *, tv, struct timezone *, tz);
   3587    if (ARG1 != 0)
   3588       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
   3589    if (ARG2 != 0) {
   3590       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3591       /* maybe should warn if tz->tz_dsttime is non-zero? */
   3592    }
   3593 }
   3594 
   3595 PRE(sys_getuid)
   3596 {
   3597    PRINT("sys_getuid ( )");
   3598    PRE_REG_READ0(long, "getuid");
   3599 }
   3600 
   3601 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
   3602 {
   3603    /* We don't have any specific information on it, so
   3604       try to do something reasonable based on direction and
   3605       size bits.  The encoding scheme is described in
   3606       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3607 
   3608       According to Simon Hausmann, _IOC_READ means the kernel
   3609       writes a value to the ioctl value passed from the user
   3610       space and the other way around with _IOC_WRITE. */
   3611 
   3612 #if defined(VGO_solaris)
   3613    /* Majority of Solaris ioctl requests does not honour direction hints. */
   3614    UInt dir  = _VKI_IOC_NONE;
   3615 #else
   3616    UInt dir  = _VKI_IOC_DIR(request);
   3617 #endif
   3618    UInt size = _VKI_IOC_SIZE(request);
   3619 
   3620    if (SimHintiS(SimHint_lax_ioctls, VG_(clo_sim_hints))) {
   3621       /*
   3622        * Be very lax about ioctl handling; the only
   3623        * assumption is that the size is correct. Doesn't
   3624        * require the full buffer to be initialized when
   3625        * writing.  Without this, using some device
   3626        * drivers with a large number of strange ioctl
   3627        * commands becomes very tiresome.
   3628        */
   3629    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
   3630       static UWord unknown_ioctl[10];
   3631       static Int moans = sizeof(unknown_ioctl) / sizeof(unknown_ioctl[0]);
   3632 
   3633       if (moans > 0 && !VG_(clo_xml)) {
   3634          /* Check if have not already moaned for this request. */
   3635          UInt i;
   3636          for (i = 0; i < sizeof(unknown_ioctl)/sizeof(unknown_ioctl[0]); i++) {
   3637             if (unknown_ioctl[i] == request)
   3638                break;
   3639             if (unknown_ioctl[i] == 0) {
   3640                unknown_ioctl[i] = request;
   3641                moans--;
   3642                VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
   3643                          " with no size/direction hints.\n", request);
   3644                VG_(umsg)("   This could cause spurious value errors to appear.\n");
   3645                VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
   3646                          "guidance on writing a proper wrapper.\n" );
   3647                //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3648                return;
   3649             }
   3650          }
   3651       }
   3652    } else {
   3653       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
   3654       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3655       if ((dir & _VKI_IOC_WRITE) && size > 0)
   3656          PRE_MEM_READ( "ioctl(generic)", arg, size);
   3657       if ((dir & _VKI_IOC_READ) && size > 0)
   3658          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
   3659    }
   3660 }
   3661 
   3662 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
   3663 {
   3664    /* We don't have any specific information on it, so
   3665       try to do something reasonable based on direction and
   3666       size bits.  The encoding scheme is described in
   3667       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3668 
   3669       According to Simon Hausmann, _IOC_READ means the kernel
   3670       writes a value to the ioctl value passed from the user
   3671       space and the other way around with _IOC_WRITE. */
   3672 
   3673    UInt dir  = _VKI_IOC_DIR(request);
   3674    UInt size = _VKI_IOC_SIZE(request);
   3675    if (size > 0 && (dir & _VKI_IOC_READ)
   3676        && res == 0
   3677        && arg != (Addr)NULL)
   3678    {
   3679       POST_MEM_WRITE(arg, size);
   3680    }
   3681 }
   3682 
   3683 /*
   3684    If we're sending a SIGKILL to one of our own threads, then simulate
   3685    it rather than really sending the signal, so that the target thread
   3686    gets a chance to clean up.  Returns True if we did the killing (or
   3687    no killing is necessary), and False if the caller should use the
   3688    normal kill syscall.
   3689 
   3690    "pid" is any pid argument which can be passed to kill; group kills
   3691    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
   3692    they'll most likely hit all the threads and we won't need to worry
   3693    about cleanup.  In truth, we can't fully emulate these multicast
   3694    kills.
   3695 
   3696    "tgid" is a thread group id.  If it is not -1, then the target
   3697    thread must be in that thread group.
   3698  */
   3699 Bool ML_(do_sigkill)(Int pid, Int tgid)
   3700 {
   3701    ThreadState *tst;
   3702    ThreadId tid;
   3703 
   3704    if (pid <= 0)
   3705       return False;
   3706 
   3707    tid = VG_(lwpid_to_vgtid)(pid);
   3708    if (tid == VG_INVALID_THREADID)
   3709       return False;		/* none of our threads */
   3710 
   3711    tst = VG_(get_ThreadState)(tid);
   3712    if (tst == NULL || tst->status == VgTs_Empty)
   3713       return False;		/* hm, shouldn't happen */
   3714 
   3715    if (tgid != -1 && tst->os_state.threadgroup != tgid)
   3716       return False;		/* not the right thread group */
   3717 
   3718    /* Check to see that the target isn't already exiting. */
   3719    if (!VG_(is_exiting)(tid)) {
   3720       if (VG_(clo_trace_signals))
   3721 	 VG_(message)(Vg_DebugMsg,
   3722                       "Thread %u being killed with SIGKILL\n",
   3723                       tst->tid);
   3724 
   3725       tst->exitreason = VgSrc_FatalSig;
   3726       tst->os_state.fatalsig = VKI_SIGKILL;
   3727 
   3728       if (!VG_(is_running_thread)(tid))
   3729 	 VG_(get_thread_out_of_syscall)(tid);
   3730    }
   3731 
   3732    return True;
   3733 }
   3734 
   3735 PRE(sys_kill)
   3736 {
   3737    PRINT("sys_kill ( %ld, %ld )", SARG1, SARG2);
   3738    PRE_REG_READ2(long, "kill", int, pid, int, signal);
   3739    if (!ML_(client_signal_OK)(ARG2)) {
   3740       SET_STATUS_Failure( VKI_EINVAL );
   3741       return;
   3742    }
   3743 
   3744    /* If we're sending SIGKILL, check to see if the target is one of
   3745       our threads and handle it specially. */
   3746    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
   3747       SET_STATUS_Success(0);
   3748    else
   3749       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
   3750          affecting how posix-compliant the call is.  I guess it is
   3751          harmless to pass the 3rd arg on other platforms; hence pass
   3752          it on all. */
   3753       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
   3754 
   3755    if (VG_(clo_trace_signals))
   3756       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
   3757 		   SARG2, SARG1);
   3758 
   3759    /* This kill might have given us a pending signal.  Ask for a check once
   3760       the syscall is done. */
   3761    *flags |= SfPollAfter;
   3762 }
   3763 
   3764 PRE(sys_link)
   3765 {
   3766    *flags |= SfMayBlock;
   3767    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   3768    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
   3769    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
   3770    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
   3771 }
   3772 
   3773 PRE(sys_newlstat)
   3774 {
   3775    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   3776    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
   3777    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
   3778    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
   3779 }
   3780 
   3781 POST(sys_newlstat)
   3782 {
   3783    vg_assert(SUCCESS);
   3784    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3785 }
   3786 
   3787 PRE(sys_mkdir)
   3788 {
   3789    *flags |= SfMayBlock;
   3790    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
   3791    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
   3792    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
   3793 }
   3794 
   3795 PRE(sys_mprotect)
   3796 {
   3797    PRINT("sys_mprotect ( %#lx, %lu, %lu )", ARG1, ARG2, ARG3);
   3798    PRE_REG_READ3(long, "mprotect",
   3799                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
   3800 
   3801    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
   3802       SET_STATUS_Failure( VKI_ENOMEM );
   3803    }
   3804 #if defined(VKI_PROT_GROWSDOWN)
   3805    else
   3806    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
   3807       /* Deal with mprotects on growable stack areas.
   3808 
   3809          The critical files to understand all this are mm/mprotect.c
   3810          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
   3811          glibc.
   3812 
   3813          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
   3814          round the start/end address of mprotect to the start/end of
   3815          the underlying vma and glibc uses that as an easy way to
   3816          change the protection of the stack by calling mprotect on the
   3817          last page of the stack with PROT_GROWSDOWN set.
   3818 
   3819          The sanity check provided by the kernel is that the vma must
   3820          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
   3821       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
   3822       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
   3823       NSegment const *rseg;
   3824 
   3825       vg_assert(aseg);
   3826 
   3827       if (grows == VKI_PROT_GROWSDOWN) {
   3828          rseg = VG_(am_next_nsegment)( aseg, False/*backwards*/ );
   3829          if (rseg &&
   3830              rseg->kind == SkResvn &&
   3831              rseg->smode == SmUpper &&
   3832              rseg->end+1 == aseg->start) {
   3833             Addr end = ARG1 + ARG2;
   3834             ARG1 = aseg->start;
   3835             ARG2 = end - aseg->start;
   3836             ARG3 &= ~VKI_PROT_GROWSDOWN;
   3837          } else {
   3838             SET_STATUS_Failure( VKI_EINVAL );
   3839          }
   3840       } else if (grows == VKI_PROT_GROWSUP) {
   3841          rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
   3842          if (rseg &&
   3843              rseg->kind == SkResvn &&
   3844              rseg->smode == SmLower &&
   3845              aseg->end+1 == rseg->start) {
   3846             ARG2 = aseg->end - ARG1 + 1;
   3847             ARG3 &= ~VKI_PROT_GROWSUP;
   3848          } else {
   3849             SET_STATUS_Failure( VKI_EINVAL );
   3850          }
   3851       } else {
   3852          /* both GROWSUP and GROWSDOWN */
   3853          SET_STATUS_Failure( VKI_EINVAL );
   3854       }
   3855    }
   3856 #endif   // defined(VKI_PROT_GROWSDOWN)
   3857 }
   3858 
   3859 POST(sys_mprotect)
   3860 {
   3861    Addr a    = ARG1;
   3862    SizeT len = ARG2;
   3863    Int  prot = ARG3;
   3864 
   3865    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
   3866 }
   3867 
   3868 PRE(sys_munmap)
   3869 {
   3870    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
   3871    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3872    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
   3873 
   3874    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
   3875       SET_STATUS_Failure( VKI_EINVAL );
   3876 }
   3877 
   3878 POST(sys_munmap)
   3879 {
   3880    Addr  a   = ARG1;
   3881    SizeT len = ARG2;
   3882 
   3883    ML_(notify_core_and_tool_of_munmap)( a, len );
   3884 }
   3885 
   3886 PRE(sys_mincore)
   3887 {
   3888    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
   3889    PRE_REG_READ3(long, "mincore",
   3890                  unsigned long, start, vki_size_t, length,
   3891                  unsigned char *, vec);
   3892    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3893 }
   3894 POST(sys_mincore)
   3895 {
   3896    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3897 }
   3898 
   3899 PRE(sys_nanosleep)
   3900 {
   3901    *flags |= SfMayBlock|SfPostOnFail;
   3902    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
   3903    PRE_REG_READ2(long, "nanosleep",
   3904                  struct timespec *, req, struct timespec *, rem);
   3905    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
   3906    if (ARG2 != 0)
   3907       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
   3908 }
   3909 
   3910 POST(sys_nanosleep)
   3911 {
   3912    vg_assert(SUCCESS || FAILURE);
   3913    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
   3914       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
   3915 }
   3916 
   3917 #if defined(VGO_linux) || defined(VGO_solaris)
   3918 /* Handles the case where the open is of /proc/self/auxv or
   3919    /proc/<pid>/auxv, and just gives out a copy of the fd for the
   3920    fake file we cooked up at startup (in m_main).  Also, seeks the
   3921    cloned fd back to the start.
   3922    Returns True if auxv open was handled (status is set). */
   3923 Bool ML_(handle_auxv_open)(SyscallStatus *status, const HChar *filename,
   3924                            int flags)
   3925 {
   3926    HChar  name[30];   // large enough
   3927 
   3928    if (!ML_(safe_to_deref)((const void *) filename, 1))
   3929       return False;
   3930 
   3931    /* Opening /proc/<pid>/auxv or /proc/self/auxv? */
   3932    VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
   3933    if (!VG_STREQ(filename, name) && !VG_STREQ(filename, "/proc/self/auxv"))
   3934       return False;
   3935 
   3936    /* Allow to open the file only for reading. */
   3937    if (flags & (VKI_O_WRONLY | VKI_O_RDWR)) {
   3938       SET_STATUS_Failure(VKI_EACCES);
   3939       return True;
   3940    }
   3941 
   3942 #  if defined(VGO_solaris)
   3943    VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_auxv_fd));
   3944    SysRes sres = VG_(open)(name, flags, 0);
   3945    SET_STATUS_from_SysRes(sres);
   3946 #  else
   3947    SysRes sres = VG_(dup)(VG_(cl_auxv_fd));
   3948    SET_STATUS_from_SysRes(sres);
   3949    if (!sr_isError(sres)) {
   3950       OffT off = VG_(lseek)(sr_Res(sres), 0, VKI_SEEK_SET);
   3951       if (off < 0)
   3952          SET_STATUS_Failure(VKI_EMFILE);
   3953    }
   3954 #  endif
   3955 
   3956    return True;
   3957 }
   3958 #endif // defined(VGO_linux) || defined(VGO_solaris)
   3959 
   3960 PRE(sys_open)
   3961 {
   3962    if (ARG2 & VKI_O_CREAT) {
   3963       // 3-arg version
   3964       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1, (HChar*)ARG1, SARG2, SARG3);
   3965       PRE_REG_READ3(long, "open",
   3966                     const char *, filename, int, flags, int, mode);
   3967    } else {
   3968       // 2-arg version
   3969       PRINT("sys_open ( %#lx(%s), %ld )",ARG1, (HChar*)ARG1, SARG2);
   3970       PRE_REG_READ2(long, "open",
   3971                     const char *, filename, int, flags);
   3972    }
   3973    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
   3974 
   3975 #if defined(VGO_linux)
   3976    /* Handle the case where the open is of /proc/self/cmdline or
   3977       /proc/<pid>/cmdline, and just give it a copy of the fd for the
   3978       fake file we cooked up at startup (in m_main).  Also, seek the
   3979       cloned fd back to the start. */
   3980    {
   3981       HChar  name[30];   // large enough
   3982       HChar* arg1s = (HChar*) ARG1;
   3983       SysRes sres;
   3984 
   3985       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
   3986       if (ML_(safe_to_deref)( arg1s, 1 ) &&
   3987           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
   3988          )
   3989       {
   3990          sres = VG_(dup)( VG_(cl_cmdline_fd) );
   3991          SET_STATUS_from_SysRes( sres );
   3992          if (!sr_isError(sres)) {
   3993             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
   3994             if (off < 0)
   3995                SET_STATUS_Failure( VKI_EMFILE );
   3996          }
   3997          return;
   3998       }
   3999    }
   4000 
   4001    /* Handle also the case of /proc/self/auxv or /proc/<pid>/auxv. */
   4002    if (ML_(handle_auxv_open)(status, (const HChar *)ARG1, ARG2))
   4003       return;
   4004 #endif // defined(VGO_linux)
   4005 
   4006    /* Otherwise handle normally */
   4007    *flags |= SfMayBlock;
   4008 }
   4009 
   4010 POST(sys_open)
   4011 {
   4012    vg_assert(SUCCESS);
   4013    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
   4014       VG_(close)(RES);
   4015       SET_STATUS_Failure( VKI_EMFILE );
   4016    } else {
   4017       if (VG_(clo_track_fds))
   4018          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
   4019    }
   4020 }
   4021 
   4022 PRE(sys_read)
   4023 {
   4024    *flags |= SfMayBlock;
   4025    PRINT("sys_read ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   4026    PRE_REG_READ3(ssize_t, "read",
   4027                  unsigned int, fd, char *, buf, vki_size_t, count);
   4028 
   4029    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
   4030       SET_STATUS_Failure( VKI_EBADF );
   4031    else
   4032       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
   4033 }
   4034 
   4035 POST(sys_read)
   4036 {
   4037    vg_assert(SUCCESS);
   4038    POST_MEM_WRITE( ARG2, RES );
   4039 }
   4040 
   4041 PRE(sys_write)
   4042 {
   4043    Bool ok;
   4044    *flags |= SfMayBlock;
   4045    PRINT("sys_write ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   4046    PRE_REG_READ3(ssize_t, "write",
   4047                  unsigned int, fd, const char *, buf, vki_size_t, count);
   4048    /* check to see if it is allowed.  If not, try for an exemption from
   4049       --sim-hints=enable-outer (used for self hosting). */
   4050    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
   4051    if (!ok && ARG1 == 2/*stderr*/
   4052            && SimHintiS(SimHint_enable_outer, VG_(clo_sim_hints)))
   4053       ok = True;
   4054 #if defined(VGO_solaris)
   4055    if (!ok && VG_(vfork_fildes_addr) != NULL &&
   4056        *VG_(vfork_fildes_addr) >= 0 && *VG_(vfork_fildes_addr) == ARG1)
   4057       ok = True;
   4058 #endif
   4059    if (!ok)
   4060       SET_STATUS_Failure( VKI_EBADF );
   4061    else
   4062       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
   4063 }
   4064 
   4065 PRE(sys_creat)
   4066 {
   4067    *flags |= SfMayBlock;
   4068    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
   4069    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
   4070    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
   4071 }
   4072 
   4073 POST(sys_creat)
   4074 {
   4075    vg_assert(SUCCESS);
   4076    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
   4077       VG_(close)(RES);
   4078       SET_STATUS_Failure( VKI_EMFILE );
   4079    } else {
   4080       if (VG_(clo_track_fds))
   4081          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
   4082    }
   4083 }
   4084 
   4085 PRE(sys_poll)
   4086 {
   4087    /* struct pollfd {
   4088         int fd;           -- file descriptor
   4089         short events;     -- requested events
   4090         short revents;    -- returned events
   4091       };
   4092       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
   4093    */
   4094    UInt i;
   4095    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   4096    *flags |= SfMayBlock;
   4097    PRINT("sys_poll ( %#lx, %lu, %ld )\n", ARG1, ARG2, SARG3);
   4098    PRE_REG_READ3(long, "poll",
   4099                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
   4100 
   4101    for (i = 0; i < ARG2; i++) {
   4102       PRE_MEM_READ( "poll(ufds.fd)",
   4103                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
   4104       PRE_MEM_READ( "poll(ufds.events)",
   4105                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
   4106       PRE_MEM_WRITE( "poll(ufds.revents)",
   4107                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   4108    }
   4109 }
   4110 
   4111 POST(sys_poll)
   4112 {
   4113    if (RES >= 0) {
   4114       UInt i;
   4115       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   4116       for (i = 0; i < ARG2; i++)
   4117 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   4118    }
   4119 }
   4120 
   4121 PRE(sys_readlink)
   4122 {
   4123    FUSE_COMPATIBLE_MAY_BLOCK();
   4124    Word saved = SYSNO;
   4125 
   4126    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
   4127    PRE_REG_READ3(long, "readlink",
   4128                  const char *, path, char *, buf, int, bufsiz);
   4129    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
   4130    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
   4131 
   4132    {
   4133 #if defined(VGO_linux)
   4134       /*
   4135        * Handle the case where readlink is looking at /proc/self/exe or
   4136        * /proc/<pid>/exe.
   4137        */
   4138       HChar  name[30];   // large enough
   4139       HChar* arg1s = (HChar*) ARG1;
   4140       VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
   4141       if (ML_(safe_to_deref)(arg1s, 1) &&
   4142           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
   4143          )
   4144       {
   4145          VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
   4146          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
   4147                                                          ARG2, ARG3));
   4148       } else
   4149 #elif defined(VGO_solaris)
   4150       /* Same for Solaris, but /proc/self/path/a.out and
   4151          /proc/<pid>/path/a.out. */
   4152       HChar  name[30];   // large enough
   4153       HChar* arg1s = (HChar*) ARG1;
   4154       VG_(sprintf)(name, "/proc/%d/path/a.out", VG_(getpid)());
   4155       if (ML_(safe_to_deref)(arg1s, 1) &&
   4156           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/path/a.out"))
   4157          )
   4158       {
   4159          VG_(sprintf)(name, "/proc/self/path/%d", VG_(cl_exec_fd));
   4160          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
   4161                                                          ARG2, ARG3));
   4162       } else
   4163 #endif
   4164       {
   4165          /* Normal case */
   4166          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
   4167       }
   4168    }
   4169 
   4170    if (SUCCESS && RES > 0)
   4171       POST_MEM_WRITE( ARG2, RES );
   4172 }
   4173 
   4174 PRE(sys_readv)
   4175 {
   4176    Int i;
   4177    struct vki_iovec * vec;
   4178    *flags |= SfMayBlock;
   4179    PRINT("sys_readv ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   4180    PRE_REG_READ3(ssize_t, "readv",
   4181                  unsigned long, fd, const struct iovec *, vector,
   4182                  unsigned long, count);
   4183    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
   4184       SET_STATUS_Failure( VKI_EBADF );
   4185    } else {
   4186       if ((Int)ARG3 >= 0)
   4187          PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
   4188 
   4189       if (ARG2 != 0) {
   4190          /* ToDo: don't do any of the following if the vector is invalid */
   4191          vec = (struct vki_iovec *)ARG2;
   4192          for (i = 0; i < (Int)ARG3; i++)
   4193             PRE_MEM_WRITE( "readv(vector[...])",
   4194                            (Addr)vec[i].iov_base, vec[i].iov_len );
   4195       }
   4196    }
   4197 }
   4198 
   4199 POST(sys_readv)
   4200 {
   4201    vg_assert(SUCCESS);
   4202    if (RES > 0) {
   4203       Int i;
   4204       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
   4205       Int remains = RES;
   4206 
   4207       /* RES holds the number of bytes read. */
   4208       for (i = 0; i < (Int)ARG3; i++) {
   4209 	 Int nReadThisBuf = vec[i].iov_len;
   4210 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
   4211 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
   4212 	 remains -= nReadThisBuf;
   4213 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
   4214       }
   4215    }
   4216 }
   4217 
   4218 PRE(sys_rename)
   4219 {
   4220    FUSE_COMPATIBLE_MAY_BLOCK();
   4221    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   4222    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
   4223    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
   4224    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
   4225 }
   4226 
   4227 PRE(sys_rmdir)
   4228 {
   4229    *flags |= SfMayBlock;
   4230    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   4231    PRE_REG_READ1(long, "rmdir", const char *, pathname);
   4232    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
   4233 }
   4234 
   4235 PRE(sys_select)
   4236 {
   4237    *flags |= SfMayBlock;
   4238    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", SARG1, ARG2, ARG3,
   4239          ARG4, ARG5);
   4240    PRE_REG_READ5(long, "select",
   4241                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
   4242                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
   4243    // XXX: this possibly understates how much memory is read.
   4244    if (ARG2 != 0)
   4245       PRE_MEM_READ( "select(readfds)",
   4246 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
   4247    if (ARG3 != 0)
   4248       PRE_MEM_READ( "select(writefds)",
   4249 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
   4250    if (ARG4 != 0)
   4251       PRE_MEM_READ( "select(exceptfds)",
   4252 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
   4253    if (ARG5 != 0)
   4254       PRE_timeval_READ( "select(timeout)", ARG5 );
   4255 }
   4256 
   4257 PRE(sys_setgid)
   4258 {
   4259    PRINT("sys_setgid ( %lu )", ARG1);
   4260    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
   4261 }
   4262 
   4263 PRE(sys_setsid)
   4264 {
   4265    PRINT("sys_setsid ( )");
   4266    PRE_REG_READ0(long, "setsid");
   4267 }
   4268 
   4269 PRE(sys_setgroups)
   4270 {
   4271    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
   4272    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
   4273    if (ARG1 > 0)
   4274       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   4275 }
   4276 
   4277 PRE(sys_setpgid)
   4278 {
   4279    PRINT("setpgid ( %ld, %ld )", SARG1, SARG2);
   4280    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
   4281 }
   4282 
   4283 PRE(sys_setregid)
   4284 {
   4285    PRINT("sys_setregid ( %lu, %lu )", ARG1, ARG2);
   4286    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
   4287 }
   4288 
   4289 PRE(sys_setreuid)
   4290 {
   4291    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
   4292    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
   4293 }
   4294 
   4295 PRE(sys_setrlimit)
   4296 {
   4297    UWord arg1 = ARG1;
   4298    PRINT("sys_setrlimit ( %lu, %#lx )", ARG1, ARG2);
   4299    PRE_REG_READ2(long, "setrlimit",
   4300                  unsigned int, resource, struct rlimit *, rlim);
   4301    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   4302 
   4303 #ifdef _RLIMIT_POSIX_FLAG
   4304    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
   4305    // Unset it here to make the if statements below work correctly.
   4306    arg1 &= ~_RLIMIT_POSIX_FLAG;
   4307 #endif
   4308 
   4309    if (!VG_(am_is_valid_for_client)(ARG2, sizeof(struct vki_rlimit),
   4310                                     VKI_PROT_READ)) {
   4311       SET_STATUS_Failure( VKI_EFAULT );
   4312    }
   4313    else if (((struct vki_rlimit *)ARG2)->rlim_cur
   4314             > ((struct vki_rlimit *)ARG2)->rlim_max) {
   4315       SET_STATUS_Failure( VKI_EINVAL );
   4316    }
   4317    else if (arg1 == VKI_RLIMIT_NOFILE) {
   4318       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
   4319           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
   4320          SET_STATUS_Failure( VKI_EPERM );
   4321       }
   4322       else {
   4323          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
   4324          SET_STATUS_Success( 0 );
   4325       }
   4326    }
   4327    else if (arg1 == VKI_RLIMIT_DATA) {
   4328       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
   4329           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
   4330          SET_STATUS_Failure( VKI_EPERM );
   4331       }
   4332       else {
   4333          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
   4334          SET_STATUS_Success( 0 );
   4335       }
   4336    }
   4337    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
   4338       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
   4339           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
   4340          SET_STATUS_Failure( VKI_EPERM );
   4341       }
   4342       else {
   4343          /* Change the value of client_stack_szB to the rlim_cur value but
   4344             only if it is smaller than the size of the allocated stack for the
   4345             client.
   4346             TODO: All platforms should set VG_(clstk_max_size) as part of their
   4347                   setup_client_stack(). */
   4348          if ((VG_(clstk_max_size) == 0)
   4349              || (((struct vki_rlimit *) ARG2)->rlim_cur <= VG_(clstk_max_size)))
   4350             VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
   4351 
   4352          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
   4353          SET_STATUS_Success( 0 );
   4354       }
   4355    }
   4356 }
   4357 
   4358 PRE(sys_setuid)
   4359 {
   4360    PRINT("sys_setuid ( %lu )", ARG1);
   4361    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
   4362 }
   4363 
   4364 PRE(sys_newstat)
   4365 {
   4366    FUSE_COMPATIBLE_MAY_BLOCK();
   4367    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   4368    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
   4369    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
   4370    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
   4371 }
   4372 
   4373 POST(sys_newstat)
   4374 {
   4375    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   4376 }
   4377 
   4378 PRE(sys_statfs)
   4379 {
   4380    FUSE_COMPATIBLE_MAY_BLOCK();
   4381    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
   4382    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
   4383    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
   4384    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
   4385 }
   4386 POST(sys_statfs)
   4387 {
   4388    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   4389 }
   4390 
   4391 PRE(sys_statfs64)
   4392 {
   4393    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
   4394    PRE_REG_READ3(long, "statfs64",
   4395                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
   4396    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
   4397    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
   4398 }
   4399 POST(sys_statfs64)
   4400 {
   4401    POST_MEM_WRITE( ARG3, ARG2 );
   4402 }
   4403 
   4404 PRE(sys_symlink)
   4405 {
   4406    *flags |= SfMayBlock;
   4407    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   4408    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
   4409    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
   4410    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
   4411 }
   4412 
   4413 PRE(sys_time)
   4414 {
   4415    /* time_t time(time_t *t); */
   4416    PRINT("sys_time ( %#lx )",ARG1);
   4417    PRE_REG_READ1(long, "time", int *, t);
   4418    if (ARG1 != 0) {
   4419       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
   4420    }
   4421 }
   4422 
   4423 POST(sys_time)
   4424 {
   4425    if (ARG1 != 0) {
   4426       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
   4427    }
   4428 }
   4429 
   4430 PRE(sys_times)
   4431 {
   4432    PRINT("sys_times ( %#lx )", ARG1);
   4433    PRE_REG_READ1(long, "times", struct tms *, buf);
   4434    if (ARG1 != 0) {
   4435       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
   4436    }
   4437 }
   4438 
   4439 POST(sys_times)
   4440 {
   4441    if (ARG1 != 0) {
   4442       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
   4443    }
   4444 }
   4445 
   4446 PRE(sys_umask)
   4447 {
   4448    PRINT("sys_umask ( %ld )", SARG1);
   4449    PRE_REG_READ1(long, "umask", int, mask);
   4450 }
   4451 
   4452 PRE(sys_unlink)
   4453 {
   4454    *flags |= SfMayBlock;
   4455    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
   4456    PRE_REG_READ1(long, "unlink", const char *, pathname);
   4457    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
   4458 }
   4459 
   4460 PRE(sys_newuname)
   4461 {
   4462    PRINT("sys_newuname ( %#lx )", ARG1);
   4463    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
   4464    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
   4465 }
   4466 
   4467 POST(sys_newuname)
   4468 {
   4469    if (ARG1 != 0) {
   4470       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
   4471    }
   4472 }
   4473 
   4474 PRE(sys_waitpid)
   4475 {
   4476    *flags |= SfMayBlock;
   4477    PRINT("sys_waitpid ( %ld, %#lx, %ld )", SARG1, ARG2, SARG3);
   4478    PRE_REG_READ3(long, "waitpid",
   4479                  vki_pid_t, pid, unsigned int *, status, int, options);
   4480 
   4481    if (ARG2 != (Addr)NULL)
   4482       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
   4483 }
   4484 
   4485 POST(sys_waitpid)
   4486 {
   4487    if (ARG2 != (Addr)NULL)
   4488       POST_MEM_WRITE( ARG2, sizeof(int) );
   4489 }
   4490 
   4491 PRE(sys_wait4)
   4492 {
   4493    *flags |= SfMayBlock;
   4494    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", SARG1, ARG2, SARG3, ARG4);
   4495 
   4496    PRE_REG_READ4(long, "wait4",
   4497                  vki_pid_t, pid, unsigned int *, status, int, options,
   4498                  struct rusage *, rusage);
   4499    if (ARG2 != (Addr)NULL)
   4500       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
   4501    if (ARG4 != (Addr)NULL)
   4502       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
   4503 }
   4504 
   4505 POST(sys_wait4)
   4506 {
   4507    if (ARG2 != (Addr)NULL)
   4508       POST_MEM_WRITE( ARG2, sizeof(int) );
   4509    if (ARG4 != (Addr)NULL)
   4510       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
   4511 }
   4512 
   4513 PRE(sys_writev)
   4514 {
   4515    Int i;
   4516    struct vki_iovec * vec;
   4517    *flags |= SfMayBlock;
   4518    PRINT("sys_writev ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   4519    PRE_REG_READ3(ssize_t, "writev",
   4520                  unsigned long, fd, const struct iovec *, vector,
   4521                  unsigned long, count);
   4522    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
   4523       SET_STATUS_Failure( VKI_EBADF );
   4524    } else {
   4525       if ((Int)ARG3 >= 0)
   4526          PRE_MEM_READ( "writev(vector)",
   4527                        ARG2, ARG3 * sizeof(struct vki_iovec) );
   4528       if (ARG2 != 0) {
   4529          /* ToDo: don't do any of the following if the vector is invalid */
   4530          vec = (struct vki_iovec *)ARG2;
   4531          for (i = 0; i < (Int)ARG3; i++)
   4532             PRE_MEM_READ( "writev(vector[...])",
   4533                            (Addr)vec[i].iov_base, vec[i].iov_len );
   4534       }
   4535    }
   4536 }
   4537 
   4538 PRE(sys_utimes)
   4539 {
   4540    FUSE_COMPATIBLE_MAY_BLOCK();
   4541    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   4542    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
   4543    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
   4544    if (ARG2 != 0) {
   4545       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
   4546       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
   4547    }
   4548 }
   4549 
   4550 PRE(sys_acct)
   4551 {
   4552    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
   4553    PRE_REG_READ1(long, "acct", const char *, filename);
   4554    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
   4555 }
   4556 
   4557 PRE(sys_pause)
   4558 {
   4559    *flags |= SfMayBlock;
   4560    PRINT("sys_pause ( )");
   4561    PRE_REG_READ0(long, "pause");
   4562 }
   4563 
   4564 PRE(sys_sigaltstack)
   4565 {
   4566    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
   4567    PRE_REG_READ2(int, "sigaltstack",
   4568                  const vki_stack_t *, ss, vki_stack_t *, oss);
   4569    if (ARG1 != 0) {
   4570       const vki_stack_t *ss = (vki_stack_t *)ARG1;
   4571       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
   4572       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
   4573       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
   4574    }
   4575    if (ARG2 != 0) {
   4576       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
   4577    }
   4578 
   4579    /* Be safe. */
   4580    if (ARG1 && !ML_(safe_to_deref((void*)ARG1, sizeof(vki_stack_t)))) {
   4581       SET_STATUS_Failure(VKI_EFAULT);
   4582       return;
   4583    }
   4584    if (ARG2 && !ML_(safe_to_deref((void*)ARG2, sizeof(vki_stack_t)))) {
   4585       SET_STATUS_Failure(VKI_EFAULT);
   4586       return;
   4587    }
   4588 
   4589    SET_STATUS_from_SysRes(
   4590       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
   4591                               (vki_stack_t*)ARG2)
   4592    );
   4593 }
   4594 POST(sys_sigaltstack)
   4595 {
   4596    vg_assert(SUCCESS);
   4597    if (RES == 0 && ARG2 != 0)
   4598       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
   4599 }
   4600 
   4601 PRE(sys_sethostname)
   4602 {
   4603    PRINT("sys_sethostname ( %#lx, %ld )", ARG1, SARG2);
   4604    PRE_REG_READ2(long, "sethostname", char *, name, int, len);
   4605    PRE_MEM_READ( "sethostname(name)", ARG1, ARG2 );
   4606 }
   4607 
   4608 #undef PRE
   4609 #undef POST
   4610 
   4611 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
   4612 
   4613 /*--------------------------------------------------------------------*/
   4614 /*--- end                                                          ---*/
   4615 /*--------------------------------------------------------------------*/
   4616