Home | History | Annotate | Download | only in m_syswrap
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- Wrappers for generic Unix system calls                       ---*/
      5 /*---                                            syswrap-generic.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Valgrind, a dynamic binary instrumentation
     10    framework.
     11 
     12    Copyright (C) 2000-2017 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
     34 
     35 #include "pub_core_basics.h"
     36 #include "pub_core_vki.h"
     37 #include "pub_core_vkiscnums.h"
     38 #include "pub_core_threadstate.h"
     39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
     40 #include "pub_core_aspacemgr.h"
     41 #include "pub_core_transtab.h"      // VG_(discard_translations)
     42 #include "pub_core_xarray.h"
     43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
     44 #include "pub_core_debuglog.h"
     45 #include "pub_core_errormgr.h"
     46 #include "pub_core_gdbserver.h"     // VG_(gdbserver)
     47 #include "pub_core_libcbase.h"
     48 #include "pub_core_libcassert.h"
     49 #include "pub_core_libcfile.h"
     50 #include "pub_core_libcprint.h"
     51 #include "pub_core_libcproc.h"
     52 #include "pub_core_libcsignal.h"
     53 #include "pub_core_machine.h"       // VG_(get_SP)
     54 #include "pub_core_mallocfree.h"
     55 #include "pub_core_options.h"
     56 #include "pub_core_scheduler.h"
     57 #include "pub_core_signals.h"
     58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     59 #include "pub_core_syscall.h"
     60 #include "pub_core_syswrap.h"
     61 #include "pub_core_tooliface.h"
     62 #include "pub_core_ume.h"
     63 #include "pub_core_stacks.h"
     64 
     65 #include "priv_types_n_macros.h"
     66 #include "priv_syswrap-generic.h"
     67 
     68 #include "config.h"
     69 
     70 
     71 void ML_(guess_and_register_stack) (Addr sp, ThreadState* tst)
     72 {
     73    Bool debug = False;
     74    NSegment const* seg;
     75 
     76    /* We don't really know where the client stack is, because its
     77       allocated by the client.  The best we can do is look at the
     78       memory mappings and try to derive some useful information.  We
     79       assume that sp starts near its highest possible value, and can
     80       only go down to the start of the mmaped segment. */
     81    seg = VG_(am_find_nsegment)(sp);
     82    if (seg
     83        && VG_(am_is_valid_for_client)(sp, 1, VKI_PROT_READ | VKI_PROT_WRITE)) {
     84       tst->client_stack_highest_byte = (Addr)VG_PGROUNDUP(sp)-1;
     85       tst->client_stack_szB = tst->client_stack_highest_byte - seg->start + 1;
     86 
     87       tst->os_state.stk_id
     88          = VG_(register_stack)(seg->start, tst->client_stack_highest_byte);
     89 
     90       if (debug)
     91 	 VG_(printf)("tid %u: guessed client stack range [%#lx-%#lx]"
     92                      " as stk_id %lu\n",
     93 		     tst->tid, seg->start, tst->client_stack_highest_byte,
     94                      tst->os_state.stk_id);
     95    } else {
     96       VG_(message)(Vg_UserMsg,
     97                    "!? New thread %u starts with SP(%#lx) unmapped\n",
     98 		   tst->tid, sp);
     99       tst->client_stack_highest_byte = 0;
    100       tst->client_stack_szB  = 0;
    101    }
    102 }
    103 
    104 /* Returns True iff address range is something the client can
    105    plausibly mess with: all of it is either already belongs to the
    106    client or is free or a reservation. */
    107 
    108 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
    109                                    const HChar *syscallname)
    110 {
    111    Bool ret;
    112 
    113    if (size == 0)
    114       return True;
    115 
    116    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
    117             (start,size,VKI_PROT_NONE);
    118 
    119    if (0)
    120       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
    121 		  syscallname, start, start+size-1, (Int)ret);
    122 
    123    if (!ret && syscallname != NULL) {
    124       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
    125                                "to modify addresses %#lx-%#lx\n",
    126                                syscallname, start, start+size-1);
    127       if (VG_(clo_verbosity) > 1) {
    128          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
    129       }
    130    }
    131 
    132    return ret;
    133 }
    134 
    135 
    136 Bool ML_(client_signal_OK)(Int sigNo)
    137 {
    138    /* signal 0 is OK for kill */
    139    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
    140 
    141    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
    142 
    143    return ret;
    144 }
    145 
    146 
    147 /* Handy small function to help stop wrappers from segfaulting when
    148    presented with bogus client addresses.  Is not used for generating
    149    user-visible errors. */
    150 
    151 Bool ML_(safe_to_deref) ( const void *start, SizeT size )
    152 {
    153    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
    154 }
    155 
    156 
    157 /* ---------------------------------------------------------------------
    158    Doing mmap, mremap
    159    ------------------------------------------------------------------ */
    160 
    161 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
    162    munmap, mprotect (and mremap??) work at the page level.  So addresses
    163    and lengths must be adjusted for this. */
    164 
    165 /* Mash around start and length so that the area exactly covers
    166    an integral number of pages.  If we don't do that, memcheck's
    167    idea of addressible memory diverges from that of the
    168    kernel's, which causes the leak detector to crash. */
    169 static
    170 void page_align_addr_and_len( Addr* a, SizeT* len)
    171 {
    172    Addr ra;
    173 
    174    ra = VG_PGROUNDDN(*a);
    175    *len = VG_PGROUNDUP(*a + *len) - ra;
    176    *a = ra;
    177 }
    178 
    179 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
    180                                 UInt flags, Int fd, Off64T offset)
    181 {
    182    Bool d;
    183 
    184    /* 'a' is the return value from a real kernel mmap, hence: */
    185    vg_assert(VG_IS_PAGE_ALIGNED(a));
    186    /* whereas len is whatever the syscall supplied.  So: */
    187    len = VG_PGROUNDUP(len);
    188 
    189    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
    190 
    191    if (d)
    192       VG_(discard_translations)( a, (ULong)len,
    193                                  "notify_core_of_mmap" );
    194 }
    195 
    196 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
    197 {
    198    Bool rr, ww, xx;
    199 
    200    /* 'a' is the return value from a real kernel mmap, hence: */
    201    vg_assert(VG_IS_PAGE_ALIGNED(a));
    202    /* whereas len is whatever the syscall supplied.  So: */
    203    len = VG_PGROUNDUP(len);
    204 
    205    rr = toBool(prot & VKI_PROT_READ);
    206    ww = toBool(prot & VKI_PROT_WRITE);
    207    xx = toBool(prot & VKI_PROT_EXEC);
    208 
    209    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
    210 }
    211 
    212 
    213 /* When a client mmap has been successfully done, this function must
    214    be called.  It notifies both aspacem and the tool of the new
    215    mapping.
    216 
    217    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
    218    it is called from is POST(sys_io_setup).  In particular,
    219    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
    220    client mmap.  But it doesn't call this function; instead it does the
    221    relevant notifications itself.  Here, we just pass di_handle=0 to
    222    notify_tool_of_mmap as we have no better information.  But really this
    223    function should be done away with; problem is I don't understand what
    224    POST(sys_io_setup) does or how it works.
    225 
    226    [However, this function is used lots for Darwin, because
    227     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
    228  */
    229 void
    230 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
    231                                     UInt flags, Int fd, Off64T offset )
    232 {
    233    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
    234    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
    235    // Should it?  --njn
    236    notify_core_of_mmap(a, len, prot, flags, fd, offset);
    237    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
    238 }
    239 
    240 void
    241 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
    242 {
    243    Bool d;
    244 
    245    page_align_addr_and_len(&a, &len);
    246    d = VG_(am_notify_munmap)(a, len);
    247    VG_TRACK( die_mem_munmap, a, len );
    248    VG_(di_notify_munmap)( a, len );
    249    if (d)
    250       VG_(discard_translations)( a, (ULong)len,
    251                                  "ML_(notify_core_and_tool_of_munmap)" );
    252 }
    253 
    254 void
    255 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
    256 {
    257    Bool rr = toBool(prot & VKI_PROT_READ);
    258    Bool ww = toBool(prot & VKI_PROT_WRITE);
    259    Bool xx = toBool(prot & VKI_PROT_EXEC);
    260    Bool d;
    261 
    262    page_align_addr_and_len(&a, &len);
    263    d = VG_(am_notify_mprotect)(a, len, prot);
    264    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
    265    VG_(di_notify_mprotect)( a, len, prot );
    266    if (d)
    267       VG_(discard_translations)( a, (ULong)len,
    268                                  "ML_(notify_core_and_tool_of_mprotect)" );
    269 }
    270 
    271 
    272 
    273 #if HAVE_MREMAP
    274 /* Expand (or shrink) an existing mapping, potentially moving it at
    275    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
    276 */
    277 static
    278 SysRes do_mremap( Addr old_addr, SizeT old_len,
    279                   Addr new_addr, SizeT new_len,
    280                   UWord flags, ThreadId tid )
    281 {
    282 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
    283 
    284    Bool      ok, d;
    285    NSegment const* old_seg;
    286    Addr      advised;
    287    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
    288    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
    289 
    290    if (0)
    291       VG_(printf)("do_remap (old %#lx %lu) (new %#lx %lu) %s %s\n",
    292                   old_addr,old_len,new_addr,new_len,
    293                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
    294                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
    295    if (0)
    296       VG_(am_show_nsegments)(0, "do_remap: before");
    297 
    298    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
    299       goto eINVAL;
    300 
    301    if (!VG_IS_PAGE_ALIGNED(old_addr))
    302       goto eINVAL;
    303 
    304    old_len = VG_PGROUNDUP(old_len);
    305    new_len = VG_PGROUNDUP(new_len);
    306 
    307    if (new_len == 0)
    308       goto eINVAL;
    309 
    310    /* kernel doesn't reject this, but we do. */
    311    if (old_len == 0)
    312       goto eINVAL;
    313 
    314    /* reject wraparounds */
    315    if (old_addr + old_len < old_addr)
    316       goto eINVAL;
    317    if (f_fixed == True && new_addr + new_len < new_len)
    318       goto eINVAL;
    319 
    320    /* kernel rejects all fixed, no-move requests (which are
    321       meaningless). */
    322    if (f_fixed == True && f_maymove == False)
    323       goto eINVAL;
    324 
    325    /* Stay away from non-client areas. */
    326    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
    327       goto eINVAL;
    328 
    329    /* In all remaining cases, if the old range does not fall within a
    330       single segment, fail. */
    331    old_seg = VG_(am_find_nsegment)( old_addr );
    332    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
    333       goto eINVAL;
    334    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC
    335        && old_seg->kind != SkShmC)
    336       goto eINVAL;
    337 
    338    vg_assert(old_len > 0);
    339    vg_assert(new_len > 0);
    340    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
    341    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
    342    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
    343 
    344    /* There are 3 remaining cases:
    345 
    346       * maymove == False
    347 
    348         new space has to be at old address, so:
    349             - shrink    -> unmap end
    350             - same size -> do nothing
    351             - grow      -> if can grow in-place, do so, else fail
    352 
    353       * maymove == True, fixed == False
    354 
    355         new space can be anywhere, so:
    356             - shrink    -> unmap end
    357             - same size -> do nothing
    358             - grow      -> if can grow in-place, do so, else
    359                            move to anywhere large enough, else fail
    360 
    361       * maymove == True, fixed == True
    362 
    363         new space must be at new address, so:
    364 
    365             - if new address is not page aligned, fail
    366             - if new address range overlaps old one, fail
    367             - if new address range cannot be allocated, fail
    368             - else move to new address range with new size
    369             - else fail
    370    */
    371 
    372    if (f_maymove == False) {
    373       /* new space has to be at old address */
    374       if (new_len < old_len)
    375          goto shrink_in_place;
    376       if (new_len > old_len)
    377          goto grow_in_place_or_fail;
    378       goto same_in_place;
    379    }
    380 
    381    if (f_maymove == True && f_fixed == False) {
    382       /* new space can be anywhere */
    383       if (new_len < old_len)
    384          goto shrink_in_place;
    385       if (new_len > old_len)
    386          goto grow_in_place_or_move_anywhere_or_fail;
    387       goto same_in_place;
    388    }
    389 
    390    if (f_maymove == True && f_fixed == True) {
    391       /* new space can only be at the new address */
    392       if (!VG_IS_PAGE_ALIGNED(new_addr))
    393          goto eINVAL;
    394       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
    395          /* no overlap */
    396       } else {
    397          goto eINVAL;
    398       }
    399       if (new_addr == 0)
    400          goto eINVAL;
    401          /* VG_(am_get_advisory_client_simple) interprets zero to mean
    402             non-fixed, which is not what we want */
    403       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
    404       if (!ok || advised != new_addr)
    405          goto eNOMEM;
    406       ok = VG_(am_relocate_nooverlap_client)
    407               ( &d, old_addr, old_len, new_addr, new_len );
    408       if (ok) {
    409          VG_TRACK( copy_mem_remap, old_addr, new_addr,
    410                                    MIN_SIZET(old_len,new_len) );
    411          if (new_len > old_len)
    412             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
    413                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
    414                       0/*di_handle*/ );
    415          VG_TRACK(die_mem_munmap, old_addr, old_len);
    416          if (d) {
    417             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
    418             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
    419          }
    420          return VG_(mk_SysRes_Success)( new_addr );
    421       }
    422       goto eNOMEM;
    423    }
    424 
    425    /* end of the 3 cases */
    426    /*NOTREACHED*/ vg_assert(0);
    427 
    428   grow_in_place_or_move_anywhere_or_fail:
    429    {
    430    /* try growing it in-place */
    431    Addr   needA = old_addr + old_len;
    432    SSizeT needL = new_len - old_len;
    433 
    434    vg_assert(needL > 0);
    435    vg_assert(needA > 0);
    436 
    437    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    438    if (ok) {
    439       /* Fixes bug #129866. */
    440       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
    441    }
    442    if (ok && advised == needA) {
    443       const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
    444       if (new_seg) {
    445          VG_TRACK( new_mem_mmap, needA, needL,
    446                                  new_seg->hasR,
    447                                  new_seg->hasW, new_seg->hasX,
    448                                  0/*di_handle*/ );
    449          return VG_(mk_SysRes_Success)( old_addr );
    450       }
    451    }
    452 
    453    /* that failed.  Look elsewhere. */
    454    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
    455    if (ok) {
    456       Bool oldR = old_seg->hasR;
    457       Bool oldW = old_seg->hasW;
    458       Bool oldX = old_seg->hasX;
    459       /* assert new area does not overlap old */
    460       vg_assert(advised+new_len-1 < old_addr
    461                 || advised > old_addr+old_len-1);
    462       ok = VG_(am_relocate_nooverlap_client)
    463               ( &d, old_addr, old_len, advised, new_len );
    464       if (ok) {
    465          VG_TRACK( copy_mem_remap, old_addr, advised,
    466                                    MIN_SIZET(old_len,new_len) );
    467          if (new_len > old_len)
    468             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
    469                       oldR, oldW, oldX, 0/*di_handle*/ );
    470          VG_TRACK(die_mem_munmap, old_addr, old_len);
    471          if (d) {
    472             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
    473             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
    474          }
    475          return VG_(mk_SysRes_Success)( advised );
    476       }
    477    }
    478    goto eNOMEM;
    479    }
    480    /*NOTREACHED*/ vg_assert(0);
    481 
    482   grow_in_place_or_fail:
    483    {
    484    Addr  needA = old_addr + old_len;
    485    SizeT needL = new_len - old_len;
    486 
    487    vg_assert(needA > 0);
    488 
    489    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    490    if (ok) {
    491       /* Fixes bug #129866. */
    492       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
    493    }
    494    if (!ok || advised != needA)
    495       goto eNOMEM;
    496    const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
    497    if (!new_seg)
    498       goto eNOMEM;
    499    VG_TRACK( new_mem_mmap, needA, needL,
    500                            new_seg->hasR, new_seg->hasW, new_seg->hasX,
    501                            0/*di_handle*/ );
    502 
    503    return VG_(mk_SysRes_Success)( old_addr );
    504    }
    505    /*NOTREACHED*/ vg_assert(0);
    506 
    507   shrink_in_place:
    508    {
    509    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
    510    if (sr_isError(sres))
    511       return sres;
    512    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
    513    if (d)
    514       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
    515                                  "do_remap(7)" );
    516    return VG_(mk_SysRes_Success)( old_addr );
    517    }
    518    /*NOTREACHED*/ vg_assert(0);
    519 
    520   same_in_place:
    521    return VG_(mk_SysRes_Success)( old_addr );
    522    /*NOTREACHED*/ vg_assert(0);
    523 
    524   eINVAL:
    525    return VG_(mk_SysRes_Error)( VKI_EINVAL );
    526   eNOMEM:
    527    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
    528 
    529 #  undef MIN_SIZET
    530 }
    531 #endif /* HAVE_MREMAP */
    532 
    533 
    534 /* ---------------------------------------------------------------------
    535    File-descriptor tracking
    536    ------------------------------------------------------------------ */
    537 
    538 /* One of these is allocated for each open file descriptor.  */
    539 typedef struct OpenFd
    540 {
    541    Int fd;                        /* The file descriptor */
    542    HChar *pathname;               /* NULL if not a regular file or unknown */
    543    ExeContext *where;             /* NULL if inherited from parent */
    544    struct OpenFd *next, *prev;
    545 } OpenFd;
    546 
    547 /* List of allocated file descriptors. */
    548 static OpenFd *allocated_fds = NULL;
    549 
    550 /* Count of open file descriptors. */
    551 static Int fd_count = 0;
    552 
    553 
    554 /* Note the fact that a file descriptor was just closed. */
    555 void ML_(record_fd_close)(Int fd)
    556 {
    557    OpenFd *i = allocated_fds;
    558 
    559    if (fd >= VG_(fd_hard_limit))
    560       return;			/* Valgrind internal */
    561 
    562    while(i) {
    563       if(i->fd == fd) {
    564          if(i->prev)
    565             i->prev->next = i->next;
    566          else
    567             allocated_fds = i->next;
    568          if(i->next)
    569             i->next->prev = i->prev;
    570          if(i->pathname)
    571             VG_(free) (i->pathname);
    572          VG_(free) (i);
    573          fd_count--;
    574          break;
    575       }
    576       i = i->next;
    577    }
    578 }
    579 
    580 /* Note the fact that a file descriptor was just opened.  If the
    581    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
    582    this either indicates a non-standard file (i.e. a pipe or socket or
    583    some such thing) or that we don't know the filename.  If the fd is
    584    already open, then we're probably doing a dup2() to an existing fd,
    585    so just overwrite the existing one. */
    586 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd,
    587                                          const HChar *pathname)
    588 {
    589    OpenFd *i;
    590 
    591    if (fd >= VG_(fd_hard_limit))
    592       return;			/* Valgrind internal */
    593 
    594    /* Check to see if this fd is already open. */
    595    i = allocated_fds;
    596    while (i) {
    597       if (i->fd == fd) {
    598          if (i->pathname) VG_(free)(i->pathname);
    599          break;
    600       }
    601       i = i->next;
    602    }
    603 
    604    /* Not already one: allocate an OpenFd */
    605    if (i == NULL) {
    606       i = VG_(malloc)("syswrap.rfdowgn.1", sizeof(OpenFd));
    607 
    608       i->prev = NULL;
    609       i->next = allocated_fds;
    610       if(allocated_fds) allocated_fds->prev = i;
    611       allocated_fds = i;
    612       fd_count++;
    613    }
    614 
    615    i->fd = fd;
    616    i->pathname = VG_(strdup)("syswrap.rfdowgn.2", pathname);
    617    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
    618 }
    619 
    620 // Record opening of an fd, and find its name.
    621 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
    622 {
    623    const HChar* buf;
    624    const HChar* name;
    625    if (VG_(resolve_filename)(fd, &buf))
    626       name = buf;
    627    else
    628       name = NULL;
    629 
    630    ML_(record_fd_open_with_given_name)(tid, fd, name);
    631 }
    632 
    633 // Record opening of a nameless fd.
    634 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
    635 {
    636    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
    637 }
    638 
    639 // Return if a given file descriptor is already recorded.
    640 Bool ML_(fd_recorded)(Int fd)
    641 {
    642    OpenFd *i = allocated_fds;
    643    while (i) {
    644       if (i->fd == fd)
    645          return True;
    646       i = i->next;
    647    }
    648    return False;
    649 }
    650 
    651 /* Returned string must not be modified nor free'd. */
    652 const HChar *ML_(find_fd_recorded_by_fd)(Int fd)
    653 {
    654    OpenFd *i = allocated_fds;
    655 
    656    while (i) {
    657       if (i->fd == fd)
    658          return i->pathname;
    659       i = i->next;
    660    }
    661 
    662    return NULL;
    663 }
    664 
    665 static
    666 HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
    667 {
    668    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
    669       VG_(sprintf)(name, "<unknown>");
    670    } else {
    671       VG_(sprintf)(name, "%s", sa->sun_path);
    672    }
    673 
    674    return name;
    675 }
    676 
    677 static
    678 HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
    679 {
    680    if (sa == NULL || len == 0) {
    681       VG_(sprintf)(name, "<unknown>");
    682    } else if (sa->sin_port == 0) {
    683       VG_(sprintf)(name, "<unbound>");
    684    } else {
    685       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
    686       VG_(sprintf)(name, "%u.%u.%u.%u:%u",
    687                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
    688                    (addr>>8) & 0xFF, addr & 0xFF,
    689                    VG_(ntohs)(sa->sin_port));
    690    }
    691 
    692    return name;
    693 }
    694 
    695 static
    696 void inet6_format(HChar *s, const UChar ip[16])
    697 {
    698    static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
    699 
    700    if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
    701       const struct vki_in_addr *sin_addr =
    702           (const struct vki_in_addr *)(ip + 12);
    703       UInt addr = VG_(ntohl)(sin_addr->s_addr);
    704 
    705       VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
    706                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
    707                    (addr>>8) & 0xFF, addr & 0xFF);
    708    } else {
    709       Bool compressing = False;
    710       Bool compressed = False;
    711       Int len = 0;
    712       Int i;
    713 
    714       for (i = 0; i < 16; i += 2) {
    715          UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
    716          if (word == 0 && !compressed) {
    717             compressing = True;
    718          } else {
    719             if (compressing) {
    720                compressing = False;
    721                compressed = True;
    722                s[len++] = ':';
    723             }
    724             if (i > 0) {
    725                s[len++] = ':';
    726             }
    727             len += VG_(sprintf)(s + len, "%x", word);
    728          }
    729       }
    730 
    731       if (compressing) {
    732          s[len++] = ':';
    733          s[len++] = ':';
    734       }
    735 
    736       s[len++] = 0;
    737    }
    738 
    739    return;
    740 }
    741 
    742 static
    743 HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
    744 {
    745    if (sa == NULL || len == 0) {
    746       VG_(sprintf)(name, "<unknown>");
    747    } else if (sa->sin6_port == 0) {
    748       VG_(sprintf)(name, "<unbound>");
    749    } else {
    750       HChar addr[100];    // large enough
    751       inet6_format(addr, (void *)&(sa->sin6_addr));
    752       VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
    753    }
    754 
    755    return name;
    756 }
    757 
    758 /*
    759  * Try get some details about a socket.
    760  */
    761 static void
    762 getsockdetails(Int fd)
    763 {
    764    union u {
    765       struct vki_sockaddr a;
    766       struct vki_sockaddr_in in;
    767       struct vki_sockaddr_in6 in6;
    768       struct vki_sockaddr_un un;
    769    } laddr;
    770    Int llen;
    771 
    772    llen = sizeof(laddr);
    773    VG_(memset)(&laddr, 0, llen);
    774 
    775    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
    776       switch(laddr.a.sa_family) {
    777       case VKI_AF_INET: {
    778          HChar lname[32];   // large enough
    779          HChar pname[32];   // large enough
    780          struct vki_sockaddr_in paddr;
    781          Int plen = sizeof(struct vki_sockaddr_in);
    782 
    783          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
    784             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
    785                          inet_to_name(&(laddr.in), llen, lname),
    786                          inet_to_name(&paddr, plen, pname));
    787          } else {
    788             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
    789                          fd, inet_to_name(&(laddr.in), llen, lname));
    790          }
    791          return;
    792          }
    793       case VKI_AF_INET6: {
    794          HChar lname[128];  // large enough
    795          HChar pname[128];  // large enough
    796          struct vki_sockaddr_in6 paddr;
    797          Int plen = sizeof(struct vki_sockaddr_in6);
    798 
    799          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
    800             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> %s\n", fd,
    801                          inet6_to_name(&(laddr.in6), llen, lname),
    802                          inet6_to_name(&paddr, plen, pname));
    803          } else {
    804             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> unbound\n",
    805                          fd, inet6_to_name(&(laddr.in6), llen, lname));
    806          }
    807          return;
    808          }
    809       case VKI_AF_UNIX: {
    810          static char lname[256];
    811          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
    812                       unix_to_name(&(laddr.un), llen, lname));
    813          return;
    814          }
    815       default:
    816          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
    817                       laddr.a.sa_family, fd);
    818          return;
    819       }
    820    }
    821 
    822    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
    823 }
    824 
    825 
    826 /* Dump out a summary, and a more detailed list, of open file descriptors. */
    827 void VG_(show_open_fds) (const HChar* when)
    828 {
    829    OpenFd *i = allocated_fds;
    830 
    831    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
    832 
    833    while (i) {
    834       if (i->pathname) {
    835          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
    836                       i->pathname);
    837       } else {
    838          Int val;
    839          Int len = sizeof(val);
    840 
    841          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
    842              == -1) {
    843             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
    844          } else {
    845             getsockdetails(i->fd);
    846          }
    847       }
    848 
    849       if(i->where) {
    850          VG_(pp_ExeContext)(i->where);
    851          VG_(message)(Vg_UserMsg, "\n");
    852       } else {
    853          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
    854          VG_(message)(Vg_UserMsg, "\n");
    855       }
    856 
    857       i = i->next;
    858    }
    859 
    860    VG_(message)(Vg_UserMsg, "\n");
    861 }
    862 
    863 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
    864    have /proc support compiled in, or a non-Linux kernel), then we need to
    865    find out what file descriptors we inherited from our parent process the
    866    hard way - by checking each fd in turn. */
    867 static
    868 void init_preopened_fds_without_proc_self_fd(void)
    869 {
    870    struct vki_rlimit lim;
    871    UInt count;
    872    Int i;
    873 
    874    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
    875       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
    876          an arbitrarily high number.  1024 happens to be the limit in
    877          the 2.4 Linux kernels. */
    878       count = 1024;
    879    } else {
    880       count = lim.rlim_cur;
    881    }
    882 
    883    for (i = 0; i < count; i++)
    884       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
    885          ML_(record_fd_open_named)(-1, i);
    886 }
    887 
    888 /* Initialize the list of open file descriptors with the file descriptors
    889    we inherited from out parent process. */
    890 
    891 void VG_(init_preopened_fds)(void)
    892 {
    893 // DDD: should probably use HAVE_PROC here or similar, instead.
    894 #if defined(VGO_linux)
    895    Int ret;
    896    struct vki_dirent64 d;
    897    SysRes f;
    898 
    899    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
    900    if (sr_isError(f)) {
    901       init_preopened_fds_without_proc_self_fd();
    902       return;
    903    }
    904 
    905    while ((ret = VG_(getdents64)(sr_Res(f), &d, sizeof(d))) != 0) {
    906       if (ret == -1)
    907          goto out;
    908 
    909       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
    910          HChar* s;
    911          Int fno = VG_(strtoll10)(d.d_name, &s);
    912          if (*s == '\0') {
    913             if (fno != sr_Res(f))
    914                if (VG_(clo_track_fds))
    915                   ML_(record_fd_open_named)(-1, fno);
    916          } else {
    917             VG_(message)(Vg_DebugMsg,
    918                "Warning: invalid file name in /proc/self/fd: %s\n",
    919                d.d_name);
    920          }
    921       }
    922 
    923       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
    924    }
    925 
    926   out:
    927    VG_(close)(sr_Res(f));
    928 
    929 #elif defined(VGO_darwin)
    930    init_preopened_fds_without_proc_self_fd();
    931 
    932 #elif defined(VGO_solaris)
    933    Int ret;
    934    Char buf[VKI_MAXGETDENTS_SIZE];
    935    SysRes f;
    936 
    937    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
    938    if (sr_isError(f)) {
    939       init_preopened_fds_without_proc_self_fd();
    940       return;
    941    }
    942 
    943    while ((ret = VG_(getdents64)(sr_Res(f), (struct vki_dirent64 *) buf,
    944                                  sizeof(buf))) > 0) {
    945       Int i = 0;
    946       while (i < ret) {
    947          /* Proceed one entry. */
    948          struct vki_dirent64 *d = (struct vki_dirent64 *) (buf + i);
    949          if (VG_(strcmp)(d->d_name, ".") && VG_(strcmp)(d->d_name, "..")) {
    950             HChar *s;
    951             Int fno = VG_(strtoll10)(d->d_name, &s);
    952             if (*s == '\0') {
    953                if (fno != sr_Res(f))
    954                   if (VG_(clo_track_fds))
    955                      ML_(record_fd_open_named)(-1, fno);
    956             } else {
    957                VG_(message)(Vg_DebugMsg,
    958                      "Warning: invalid file name in /proc/self/fd: %s\n",
    959                      d->d_name);
    960             }
    961          }
    962 
    963          /* Move on the next entry. */
    964          i += d->d_reclen;
    965       }
    966    }
    967 
    968    VG_(close)(sr_Res(f));
    969 
    970 #else
    971 #  error Unknown OS
    972 #endif
    973 }
    974 
    975 static
    976 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
    977                             const HChar *msg, Addr base, SizeT size )
    978 {
    979    HChar outmsg[VG_(strlen)(msg) + 10]; // large enough
    980    VG_(sprintf)(outmsg, "sendmsg%s", msg);
    981    PRE_MEM_READ( outmsg, base, size );
    982 }
    983 
    984 static
    985 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
    986                              const HChar *msg, Addr base, SizeT size )
    987 {
    988    HChar outmsg[VG_(strlen)(msg) + 10]; // large enough
    989    VG_(sprintf)(outmsg, "recvmsg%s", msg);
    990    if ( read )
    991       PRE_MEM_READ( outmsg, base, size );
    992    else
    993       PRE_MEM_WRITE( outmsg, base, size );
    994 }
    995 
    996 static
    997 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
    998                               const HChar *fieldName, Addr base, SizeT size )
    999 {
   1000    if ( !read )
   1001       POST_MEM_WRITE( base, size );
   1002 }
   1003 
   1004 static
   1005 void msghdr_foreachfield (
   1006         ThreadId tid,
   1007         const HChar *name,
   1008         struct vki_msghdr *msg,
   1009         UInt length,
   1010         void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
   1011         Bool rekv /* "recv" apparently shadows some header decl on OSX108 */
   1012      )
   1013 {
   1014    HChar fieldName[VG_(strlen)(name) + 32]; // large enough.
   1015    Addr a;
   1016    SizeT s;
   1017 
   1018    if ( !msg )
   1019       return;
   1020 
   1021    VG_(sprintf) ( fieldName, "(%s)", name );
   1022 
   1023    /* FIELDPAIR helps the compiler do one call to foreach_func
   1024       for consecutive (no holes) fields. */
   1025 #define FIELDPAIR(f1,f2) \
   1026    if (offsetof(struct vki_msghdr, f1) + sizeof(msg->f1)                \
   1027        == offsetof(struct vki_msghdr, f2))                              \
   1028       s += sizeof(msg->f2);                                             \
   1029    else {                                                               \
   1030       foreach_func (tid, True, fieldName, a, s);                        \
   1031       a = (Addr)&msg->f2;                                               \
   1032       s = sizeof(msg->f2);                                              \
   1033    }
   1034 
   1035    a = (Addr)&msg->msg_name;
   1036    s = sizeof(msg->msg_name);
   1037    FIELDPAIR(msg_name,    msg_namelen);
   1038    FIELDPAIR(msg_namelen, msg_iov);
   1039    FIELDPAIR(msg_iov,     msg_iovlen);
   1040    FIELDPAIR(msg_iovlen,  msg_control);
   1041    FIELDPAIR(msg_control, msg_controllen);
   1042    foreach_func ( tid, True, fieldName, a, s);
   1043 #undef FIELDPAIR
   1044 
   1045    /* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
   1046       the field, but does write to it. */
   1047    if ( rekv )
   1048       foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
   1049 
   1050    if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
   1051         && msg->msg_name ) {
   1052       VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
   1053       foreach_func ( tid, False, fieldName,
   1054                      (Addr)msg->msg_name, msg->msg_namelen );
   1055    }
   1056 
   1057    if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
   1058         && msg->msg_iov ) {
   1059       struct vki_iovec *iov = msg->msg_iov;
   1060       UInt i;
   1061 
   1062       if (ML_(safe_to_deref)(&msg->msg_iovlen, sizeof (UInt))) {
   1063          VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
   1064          foreach_func ( tid, True, fieldName, (Addr)iov,
   1065                         msg->msg_iovlen * sizeof( struct vki_iovec ) );
   1066 
   1067          for ( i = 0; i < msg->msg_iovlen && length > 0; ++i, ++iov ) {
   1068             if (ML_(safe_to_deref)(&iov->iov_len, sizeof (UInt))) {
   1069                UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
   1070                VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
   1071                foreach_func ( tid, False, fieldName,
   1072                               (Addr)iov->iov_base, iov_len );
   1073                length = length - iov_len;
   1074             }
   1075          }
   1076       }
   1077    }
   1078 
   1079    if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
   1080         && msg->msg_control ) {
   1081       VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
   1082       foreach_func ( tid, False, fieldName,
   1083                      (Addr)msg->msg_control, msg->msg_controllen );
   1084    }
   1085 
   1086 }
   1087 
   1088 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
   1089 {
   1090    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
   1091 
   1092    while (cm) {
   1093       if (cm->cmsg_level == VKI_SOL_SOCKET
   1094           && cm->cmsg_type == VKI_SCM_RIGHTS ) {
   1095          Int *fds = (Int *) VKI_CMSG_DATA(cm);
   1096          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
   1097                          / sizeof(int);
   1098          Int i;
   1099 
   1100          for (i = 0; i < fdc; i++)
   1101             if(VG_(clo_track_fds))
   1102                // XXX: must we check the range on these fds with
   1103                //      ML_(fd_allowed)()?
   1104                ML_(record_fd_open_named)(tid, fds[i]);
   1105       }
   1106 
   1107       cm = VKI_CMSG_NXTHDR(msg, cm);
   1108    }
   1109 }
   1110 
   1111 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
   1112 static
   1113 void pre_mem_read_sockaddr ( ThreadId tid,
   1114                              const HChar *description,
   1115                              struct vki_sockaddr *sa, UInt salen )
   1116 {
   1117    HChar outmsg[VG_(strlen)( description ) + 30]; // large enough
   1118    struct vki_sockaddr_un*  saun = (struct vki_sockaddr_un *)sa;
   1119    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
   1120    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
   1121 #  ifdef VKI_AF_BLUETOOTH
   1122    struct vki_sockaddr_rc*  rc   = (struct vki_sockaddr_rc *)sa;
   1123 #  endif
   1124 #  ifdef VKI_AF_NETLINK
   1125    struct vki_sockaddr_nl*  nl   = (struct vki_sockaddr_nl *)sa;
   1126 #  endif
   1127 
   1128    /* NULL/zero-length sockaddrs are legal */
   1129    if ( sa == NULL || salen == 0 ) return;
   1130 
   1131    VG_(sprintf) ( outmsg, description, "sa_family" );
   1132    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
   1133 
   1134    /* Don't do any extra checking if we cannot determine the sa_family. */
   1135    if (! ML_(safe_to_deref) (&sa->sa_family, sizeof(vki_sa_family_t)))
   1136       return;
   1137 
   1138    switch (sa->sa_family) {
   1139 
   1140       case VKI_AF_UNIX:
   1141          if (ML_(safe_to_deref) (&saun->sun_path, sizeof (Addr))) {
   1142             VG_(sprintf) ( outmsg, description, "sun_path" );
   1143             PRE_MEM_RASCIIZ( outmsg, (Addr) saun->sun_path );
   1144             // GrP fixme max of sun_len-2? what about nul char?
   1145          }
   1146          break;
   1147 
   1148       case VKI_AF_INET:
   1149          VG_(sprintf) ( outmsg, description, "sin_port" );
   1150          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
   1151          VG_(sprintf) ( outmsg, description, "sin_addr" );
   1152          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
   1153          break;
   1154 
   1155       case VKI_AF_INET6:
   1156          VG_(sprintf) ( outmsg, description, "sin6_port" );
   1157          PRE_MEM_READ( outmsg,
   1158             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
   1159          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
   1160          PRE_MEM_READ( outmsg,
   1161             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
   1162          VG_(sprintf) ( outmsg, description, "sin6_addr" );
   1163          PRE_MEM_READ( outmsg,
   1164             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
   1165          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
   1166          PRE_MEM_READ( outmsg,
   1167             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
   1168          break;
   1169 
   1170 #     ifdef VKI_AF_BLUETOOTH
   1171       case VKI_AF_BLUETOOTH:
   1172          VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
   1173          PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
   1174          VG_(sprintf) ( outmsg, description, "rc_channel" );
   1175          PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
   1176          break;
   1177 #     endif
   1178 
   1179 #     ifdef VKI_AF_NETLINK
   1180       case VKI_AF_NETLINK:
   1181          VG_(sprintf)(outmsg, description, "nl_pid");
   1182          PRE_MEM_READ(outmsg, (Addr)&nl->nl_pid, sizeof(nl->nl_pid));
   1183          VG_(sprintf)(outmsg, description, "nl_groups");
   1184          PRE_MEM_READ(outmsg, (Addr)&nl->nl_groups, sizeof(nl->nl_groups));
   1185          break;
   1186 #     endif
   1187 
   1188 #     ifdef VKI_AF_UNSPEC
   1189       case VKI_AF_UNSPEC:
   1190          break;
   1191 #     endif
   1192 
   1193       default:
   1194          /* No specific information about this address family.
   1195             Let's just check the full data following the family.
   1196             Note that this can give false positive if this (unknown)
   1197             struct sockaddr_???? has padding bytes between its elements. */
   1198          VG_(sprintf) ( outmsg, description, "sa_data" );
   1199          PRE_MEM_READ( outmsg, (Addr)&sa->sa_family + sizeof(sa->sa_family),
   1200                        salen -  sizeof(sa->sa_family));
   1201          break;
   1202    }
   1203 }
   1204 
   1205 /* Dereference a pointer to a UInt. */
   1206 static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
   1207 {
   1208    UInt* a_p = (UInt*)a;
   1209    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
   1210    if (a_p == NULL || ! ML_(safe_to_deref) (a_p, sizeof(UInt)))
   1211       return 0;
   1212    else
   1213       return *a_p;
   1214 }
   1215 
   1216 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
   1217                                   const HChar* buf_s, const HChar* buflen_s )
   1218 {
   1219    if (VG_(tdict).track_pre_mem_write) {
   1220       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
   1221       if (buflen_in > 0) {
   1222          VG_(tdict).track_pre_mem_write(
   1223             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
   1224       }
   1225    }
   1226 }
   1227 
   1228 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
   1229                                    Addr buf_p, Addr buflen_p, const HChar* s )
   1230 {
   1231    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
   1232       UInt buflen_out = deref_UInt( tid, buflen_p, s);
   1233       if (buflen_out > 0 && buf_p != (Addr)NULL) {
   1234          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
   1235       }
   1236    }
   1237 }
   1238 
   1239 /* ---------------------------------------------------------------------
   1240    Data seg end, for brk()
   1241    ------------------------------------------------------------------ */
   1242 
   1243 /*   +--------+------------+
   1244      | anon   |    resvn   |
   1245      +--------+------------+
   1246 
   1247      ^     ^  ^
   1248      |     |  boundary is page aligned
   1249      |     VG_(brk_limit) -- no alignment constraint
   1250      VG_(brk_base) -- page aligned -- does not move
   1251 
   1252      Both the anon part and the reservation part are always at least
   1253      one page.
   1254 */
   1255 
   1256 /* Set the new data segment end to NEWBRK.  If this succeeds, return
   1257    NEWBRK, else return the current data segment end. */
   1258 
   1259 static Addr do_brk ( Addr newbrk, ThreadId tid )
   1260 {
   1261    NSegment const* aseg;
   1262    Addr newbrkP;
   1263    SizeT delta;
   1264    Bool debug = False;
   1265 
   1266    if (debug)
   1267       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
   1268 		  VG_(brk_base), VG_(brk_limit), newbrk);
   1269 
   1270    if (0) VG_(am_show_nsegments)(0, "in_brk");
   1271 
   1272    if (newbrk < VG_(brk_base))
   1273       /* Clearly impossible. */
   1274       goto bad;
   1275 
   1276    if (newbrk < VG_(brk_limit)) {
   1277       /* shrinking the data segment.  Be lazy and don't munmap the
   1278          excess area. */
   1279       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
   1280       vg_assert(seg);
   1281 
   1282       if (seg->hasT)
   1283          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
   1284                                     "do_brk(shrink)" );
   1285       /* Since we're being lazy and not unmapping pages, we have to
   1286          zero out the area, so that if the area later comes back into
   1287          circulation, it will be filled with zeroes, as if it really
   1288          had been unmapped and later remapped.  Be a bit paranoid and
   1289          try hard to ensure we're not going to segfault by doing the
   1290          write - check both ends of the range are in the same segment
   1291          and that segment is writable. */
   1292       NSegment const * seg2;
   1293 
   1294       seg2 = VG_(am_find_nsegment)( VG_(brk_limit) - 1 );
   1295       vg_assert(seg2);
   1296 
   1297       if (seg == seg2 && seg->hasW)
   1298          VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
   1299 
   1300       VG_(brk_limit) = newbrk;
   1301       return newbrk;
   1302    }
   1303 
   1304    /* otherwise we're expanding the brk segment. */
   1305    if (VG_(brk_limit) > VG_(brk_base))
   1306       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1307    else
   1308       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
   1309 
   1310    /* These should be assured by setup_client_dataseg in m_main. */
   1311    vg_assert(aseg);
   1312    vg_assert(aseg->kind == SkAnonC);
   1313 
   1314    if (newbrk <= aseg->end + 1) {
   1315       /* still fits within the anon segment. */
   1316       VG_(brk_limit) = newbrk;
   1317       return newbrk;
   1318    }
   1319 
   1320    newbrkP = VG_PGROUNDUP(newbrk);
   1321    delta = newbrkP - (aseg->end + 1);
   1322    vg_assert(delta > 0);
   1323    vg_assert(VG_IS_PAGE_ALIGNED(delta));
   1324 
   1325    Bool overflow = False;
   1326    if (! VG_(am_extend_into_adjacent_reservation_client)( aseg->start, delta,
   1327                                                           &overflow)) {
   1328       if (overflow) {
   1329          static Bool alreadyComplained = False;
   1330          if (!alreadyComplained) {
   1331             alreadyComplained = True;
   1332             if (VG_(clo_verbosity) > 0) {
   1333                VG_(umsg)("brk segment overflow in thread #%u: "
   1334                          "can't grow to %#lx\n",
   1335                          tid, newbrkP);
   1336                VG_(umsg)("(see section Limitations in user manual)\n");
   1337                VG_(umsg)("NOTE: further instances of this message "
   1338                          "will not be shown\n");
   1339             }
   1340          }
   1341       } else {
   1342          if (VG_(clo_verbosity) > 0) {
   1343             VG_(umsg)("Cannot map memory to grow brk segment in thread #%u "
   1344                       "to %#lx\n", tid, newbrkP);
   1345             VG_(umsg)("(see section Limitations in user manual)\n");
   1346          }
   1347       }
   1348       goto bad;
   1349    }
   1350 
   1351    VG_(brk_limit) = newbrk;
   1352    return newbrk;
   1353 
   1354   bad:
   1355    return VG_(brk_limit);
   1356 }
   1357 
   1358 
   1359 /* ---------------------------------------------------------------------
   1360    Vet file descriptors for sanity
   1361    ------------------------------------------------------------------ */
   1362 /*
   1363 > - what does the "Bool soft" parameter mean?
   1364 
   1365 (Tom Hughes, 3 Oct 05):
   1366 
   1367 Whether or not to consider a file descriptor invalid if it is above
   1368 the current soft limit.
   1369 
   1370 Basically if we are testing whether a newly created file descriptor is
   1371 valid (in a post handler) then we set soft to true, and if we are
   1372 testing whether a file descriptor that is about to be used (in a pre
   1373 handler) is valid [viz, an already-existing fd] then we set it to false.
   1374 
   1375 The point is that if the (virtual) soft limit is lowered then any
   1376 existing descriptors can still be read/written/closed etc (so long as
   1377 they are below the valgrind reserved descriptors) but no new
   1378 descriptors can be created above the new soft limit.
   1379 
   1380 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
   1381 */
   1382 
   1383 /* Return true if we're allowed to use or create this fd */
   1384 Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
   1385                      Bool isNewFd)
   1386 {
   1387    Bool allowed = True;
   1388 
   1389    /* hard limits always apply */
   1390    if (fd < 0 || fd >= VG_(fd_hard_limit))
   1391       allowed = False;
   1392 
   1393    /* hijacking the output fds is never allowed */
   1394    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
   1395       allowed = False;
   1396 
   1397    /* if creating a new fd (rather than using an existing one), the
   1398       soft limit must also be observed */
   1399    if (isNewFd && fd >= VG_(fd_soft_limit))
   1400       allowed = False;
   1401 
   1402    /* this looks like it ought to be included, but causes problems: */
   1403    /*
   1404    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
   1405       allowed = False;
   1406    */
   1407    /* The difficulty is as follows: consider a program P which expects
   1408       to be able to mess with (redirect) its own stderr (fd 2).
   1409       Usually to deal with P we would issue command line flags to send
   1410       logging somewhere other than stderr, so as not to disrupt P.
   1411       The problem is that -d unilaterally hijacks stderr with no
   1412       consultation with P.  And so, if this check is enabled, P will
   1413       work OK normally but fail if -d is issued.
   1414 
   1415       Basically -d is a hack and you take your chances when using it.
   1416       It's very useful for low level debugging -- particularly at
   1417       startup -- and having its presence change the behaviour of the
   1418       client is exactly what we don't want.  */
   1419 
   1420    /* croak? */
   1421    if ((!allowed) && VG_(showing_core_errors)() ) {
   1422       VG_(message)(Vg_UserMsg,
   1423          "Warning: invalid file descriptor %d in syscall %s()\n",
   1424          fd, syscallname);
   1425       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
   1426 	 VG_(message)(Vg_UserMsg,
   1427             "   Use --log-fd=<number> to select an alternative log fd.\n");
   1428       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
   1429 	 VG_(message)(Vg_UserMsg,
   1430             "   Use --xml-fd=<number> to select an alternative XML "
   1431             "output fd.\n");
   1432       // DDD: consider always printing this stack trace, it's useful.
   1433       // Also consider also making this a proper core error, ie.
   1434       // suppressible and all that.
   1435       if (VG_(clo_verbosity) > 1) {
   1436          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1437       }
   1438    }
   1439 
   1440    return allowed;
   1441 }
   1442 
   1443 
   1444 /* ---------------------------------------------------------------------
   1445    Deal with a bunch of socket-related syscalls
   1446    ------------------------------------------------------------------ */
   1447 
   1448 /* ------ */
   1449 
   1450 void
   1451 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
   1452                                   UWord arg0, UWord arg1,
   1453                                   UWord arg2, UWord arg3 )
   1454 {
   1455    /* int socketpair(int d, int type, int protocol, int sv[2]); */
   1456    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
   1457                   arg3, 2*sizeof(int) );
   1458 }
   1459 
   1460 SysRes
   1461 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
   1462                                    SysRes res,
   1463                                    UWord arg0, UWord arg1,
   1464                                    UWord arg2, UWord arg3 )
   1465 {
   1466    SysRes r = res;
   1467    Int fd1 = ((Int*)arg3)[0];
   1468    Int fd2 = ((Int*)arg3)[1];
   1469    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1470    POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1471    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
   1472        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
   1473       VG_(close)(fd1);
   1474       VG_(close)(fd2);
   1475       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1476    } else {
   1477       POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1478       if (VG_(clo_track_fds)) {
   1479          ML_(record_fd_open_nameless)(tid, fd1);
   1480          ML_(record_fd_open_nameless)(tid, fd2);
   1481       }
   1482    }
   1483    return r;
   1484 }
   1485 
   1486 /* ------ */
   1487 
   1488 SysRes
   1489 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
   1490 {
   1491    SysRes r = res;
   1492    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1493    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
   1494       VG_(close)(sr_Res(res));
   1495       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1496    } else {
   1497       if (VG_(clo_track_fds))
   1498          ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1499    }
   1500    return r;
   1501 }
   1502 
   1503 /* ------ */
   1504 
   1505 void
   1506 ML_(generic_PRE_sys_bind) ( ThreadId tid,
   1507                             UWord arg0, UWord arg1, UWord arg2 )
   1508 {
   1509    /* int bind(int sockfd, struct sockaddr *my_addr,
   1510                int addrlen); */
   1511    pre_mem_read_sockaddr(
   1512       tid, "socketcall.bind(my_addr.%s)",
   1513       (struct vki_sockaddr *) arg1, arg2
   1514    );
   1515 }
   1516 
   1517 /* ------ */
   1518 
   1519 void
   1520 ML_(generic_PRE_sys_accept) ( ThreadId tid,
   1521                               UWord arg0, UWord arg1, UWord arg2 )
   1522 {
   1523    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
   1524    Addr addr_p     = arg1;
   1525    Addr addrlen_p  = arg2;
   1526    if (addr_p != (Addr)NULL)
   1527       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
   1528                                    "socketcall.accept(addr)",
   1529                                    "socketcall.accept(addrlen_in)" );
   1530 }
   1531 
   1532 SysRes
   1533 ML_(generic_POST_sys_accept) ( ThreadId tid,
   1534                                SysRes res,
   1535                                UWord arg0, UWord arg1, UWord arg2 )
   1536 {
   1537    SysRes r = res;
   1538    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1539    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
   1540       VG_(close)(sr_Res(res));
   1541       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1542    } else {
   1543       Addr addr_p     = arg1;
   1544       Addr addrlen_p  = arg2;
   1545       if (addr_p != (Addr)NULL)
   1546          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
   1547                                        "socketcall.accept(addrlen_out)" );
   1548       if (VG_(clo_track_fds))
   1549           ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1550    }
   1551    return r;
   1552 }
   1553 
   1554 /* ------ */
   1555 
   1556 void
   1557 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
   1558                               UWord arg0, UWord arg1, UWord arg2,
   1559                               UWord arg3, UWord arg4, UWord arg5 )
   1560 {
   1561    /* int sendto(int s, const void *msg, int len,
   1562                  unsigned int flags,
   1563                  const struct sockaddr *to, int tolen); */
   1564    PRE_MEM_READ( "socketcall.sendto(msg)",
   1565                  arg1, /* msg */
   1566                  arg2  /* len */ );
   1567    pre_mem_read_sockaddr(
   1568       tid, "socketcall.sendto(to.%s)",
   1569       (struct vki_sockaddr *) arg4, arg5
   1570    );
   1571 }
   1572 
   1573 /* ------ */
   1574 
   1575 void
   1576 ML_(generic_PRE_sys_send) ( ThreadId tid,
   1577                             UWord arg0, UWord arg1, UWord arg2 )
   1578 {
   1579    /* int send(int s, const void *msg, size_t len, int flags); */
   1580    PRE_MEM_READ( "socketcall.send(msg)",
   1581                   arg1, /* msg */
   1582                   arg2  /* len */ );
   1583 
   1584 }
   1585 
   1586 /* ------ */
   1587 
   1588 void
   1589 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
   1590                                 UWord arg0, UWord arg1, UWord arg2,
   1591                                 UWord arg3, UWord arg4, UWord arg5 )
   1592 {
   1593    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
   1594                    struct sockaddr *from, int *fromlen); */
   1595    Addr buf_p      = arg1;
   1596    Int  len        = arg2;
   1597    Addr from_p     = arg4;
   1598    Addr fromlen_p  = arg5;
   1599    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
   1600    if (from_p != (Addr)NULL)
   1601       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
   1602                                    "socketcall.recvfrom(from)",
   1603                                    "socketcall.recvfrom(fromlen_in)" );
   1604 }
   1605 
   1606 void
   1607 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
   1608                                  SysRes res,
   1609                                  UWord arg0, UWord arg1, UWord arg2,
   1610                                  UWord arg3, UWord arg4, UWord arg5 )
   1611 {
   1612    Addr buf_p      = arg1;
   1613    Int  len        = arg2;
   1614    Addr from_p     = arg4;
   1615    Addr fromlen_p  = arg5;
   1616 
   1617    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1618    if (from_p != (Addr)NULL)
   1619       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
   1620                                     "socketcall.recvfrom(fromlen_out)" );
   1621    POST_MEM_WRITE( buf_p, len );
   1622 }
   1623 
   1624 /* ------ */
   1625 
   1626 void
   1627 ML_(generic_PRE_sys_recv) ( ThreadId tid,
   1628                             UWord arg0, UWord arg1, UWord arg2 )
   1629 {
   1630    /* int recv(int s, void *buf, int len, unsigned int flags); */
   1631    /* man 2 recv says:
   1632       The  recv call is normally used only on a connected socket
   1633       (see connect(2)) and is identical to recvfrom with a  NULL
   1634       from parameter.
   1635    */
   1636    PRE_MEM_WRITE( "socketcall.recv(buf)",
   1637                   arg1, /* buf */
   1638                   arg2  /* len */ );
   1639 }
   1640 
   1641 void
   1642 ML_(generic_POST_sys_recv) ( ThreadId tid,
   1643                              UWord res,
   1644                              UWord arg0, UWord arg1, UWord arg2 )
   1645 {
   1646    if (res >= 0 && arg1 != 0) {
   1647       POST_MEM_WRITE( arg1, /* buf */
   1648                       arg2  /* len */ );
   1649    }
   1650 }
   1651 
   1652 /* ------ */
   1653 
   1654 void
   1655 ML_(generic_PRE_sys_connect) ( ThreadId tid,
   1656                                UWord arg0, UWord arg1, UWord arg2 )
   1657 {
   1658    /* int connect(int sockfd,
   1659                   struct sockaddr *serv_addr, int addrlen ); */
   1660    pre_mem_read_sockaddr( tid,
   1661                           "socketcall.connect(serv_addr.%s)",
   1662                           (struct vki_sockaddr *) arg1, arg2);
   1663 }
   1664 
   1665 /* ------ */
   1666 
   1667 void
   1668 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
   1669                                   UWord arg0, UWord arg1, UWord arg2,
   1670                                   UWord arg3, UWord arg4 )
   1671 {
   1672    /* int setsockopt(int s, int level, int optname,
   1673                      const void *optval, int optlen); */
   1674    PRE_MEM_READ( "socketcall.setsockopt(optval)",
   1675                  arg3, /* optval */
   1676                  arg4  /* optlen */ );
   1677 }
   1678 
   1679 /* ------ */
   1680 
   1681 void
   1682 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
   1683                                    UWord arg0, UWord arg1, UWord arg2 )
   1684 {
   1685    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
   1686    Addr name_p     = arg1;
   1687    Addr namelen_p  = arg2;
   1688    /* Nb: name_p cannot be NULL */
   1689    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1690                                 "socketcall.getsockname(name)",
   1691                                 "socketcall.getsockname(namelen_in)" );
   1692 }
   1693 
   1694 void
   1695 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
   1696                                     SysRes res,
   1697                                     UWord arg0, UWord arg1, UWord arg2 )
   1698 {
   1699    Addr name_p     = arg1;
   1700    Addr namelen_p  = arg2;
   1701    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1702    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1703                                  "socketcall.getsockname(namelen_out)" );
   1704 }
   1705 
   1706 /* ------ */
   1707 
   1708 void
   1709 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
   1710                                    UWord arg0, UWord arg1, UWord arg2 )
   1711 {
   1712    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
   1713    Addr name_p     = arg1;
   1714    Addr namelen_p  = arg2;
   1715    /* Nb: name_p cannot be NULL */
   1716    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1717                                 "socketcall.getpeername(name)",
   1718                                 "socketcall.getpeername(namelen_in)" );
   1719 }
   1720 
   1721 void
   1722 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
   1723                                     SysRes res,
   1724                                     UWord arg0, UWord arg1, UWord arg2 )
   1725 {
   1726    Addr name_p     = arg1;
   1727    Addr namelen_p  = arg2;
   1728    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1729    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1730                                  "socketcall.getpeername(namelen_out)" );
   1731 }
   1732 
   1733 /* ------ */
   1734 
   1735 void
   1736 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
   1737                                struct vki_msghdr *msg )
   1738 {
   1739    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
   1740 }
   1741 
   1742 /* ------ */
   1743 
   1744 void
   1745 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
   1746                                struct vki_msghdr *msg )
   1747 {
   1748    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
   1749 }
   1750 
   1751 void
   1752 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
   1753                                 struct vki_msghdr *msg, UInt length )
   1754 {
   1755    msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
   1756    check_cmsg_for_fds( tid, msg );
   1757 }
   1758 
   1759 
   1760 /* ---------------------------------------------------------------------
   1761    Deal with a bunch of IPC related syscalls
   1762    ------------------------------------------------------------------ */
   1763 
   1764 /* ------ */
   1765 
   1766 void
   1767 ML_(generic_PRE_sys_semop) ( ThreadId tid,
   1768                              UWord arg0, UWord arg1, UWord arg2 )
   1769 {
   1770    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
   1771    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1772 }
   1773 
   1774 /* ------ */
   1775 
   1776 void
   1777 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
   1778                                   UWord arg0, UWord arg1,
   1779                                   UWord arg2, UWord arg3 )
   1780 {
   1781    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
   1782                      struct timespec *timeout); */
   1783    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1784    if (arg3 != 0)
   1785       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
   1786 }
   1787 
   1788 /* ------ */
   1789 
   1790 static
   1791 UInt get_sem_count( Int semid )
   1792 {
   1793    struct vki_semid_ds buf;
   1794    union vki_semun arg;
   1795    SysRes res;
   1796 
   1797    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
   1798       (experimental) otherwise complains that the use in the return
   1799       statement below is uninitialised. */
   1800    buf.sem_nsems = 0;
   1801 
   1802    arg.buf = &buf;
   1803 
   1804 #  if defined(__NR_semctl)
   1805    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
   1806 #  elif defined(__NR_semsys) /* Solaris */
   1807    res = VG_(do_syscall5)(__NR_semsys, VKI_SEMCTL, semid, 0, VKI_IPC_STAT,
   1808                           *(UWord *)&arg);
   1809 #  else
   1810    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
   1811                           VKI_IPC_STAT, (UWord)&arg);
   1812 #  endif
   1813    if (sr_isError(res))
   1814       return 0;
   1815 
   1816    return buf.sem_nsems;
   1817 }
   1818 
   1819 void
   1820 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
   1821                               UWord arg0, UWord arg1,
   1822                               UWord arg2, UWord arg3 )
   1823 {
   1824    /* int semctl(int semid, int semnum, int cmd, ...); */
   1825    union vki_semun arg = *(union vki_semun *)&arg3;
   1826    UInt nsems;
   1827    switch (arg2 /* cmd */) {
   1828 #if defined(VKI_IPC_INFO)
   1829    case VKI_IPC_INFO:
   1830    case VKI_SEM_INFO:
   1831    case VKI_IPC_INFO|VKI_IPC_64:
   1832    case VKI_SEM_INFO|VKI_IPC_64:
   1833       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
   1834                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1835       break;
   1836 #endif
   1837 
   1838    case VKI_IPC_STAT:
   1839 #if defined(VKI_SEM_STAT)
   1840    case VKI_SEM_STAT:
   1841 #endif
   1842       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1843                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1844       break;
   1845 
   1846 #if defined(VKI_IPC_64)
   1847    case VKI_IPC_STAT|VKI_IPC_64:
   1848 #if defined(VKI_SEM_STAT)
   1849    case VKI_SEM_STAT|VKI_IPC_64:
   1850 #endif
   1851 #endif
   1852 #if defined(VKI_IPC_STAT64)
   1853    case VKI_IPC_STAT64:
   1854 #endif
   1855 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
   1856       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1857                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1858       break;
   1859 #endif
   1860 
   1861    case VKI_IPC_SET:
   1862       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1863                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1864       break;
   1865 
   1866 #if defined(VKI_IPC_64)
   1867    case VKI_IPC_SET|VKI_IPC_64:
   1868 #endif
   1869 #if defined(VKI_IPC_SET64)
   1870    case VKI_IPC_SET64:
   1871 #endif
   1872 #if defined(VKI_IPC64) || defined(VKI_IPC_SET64)
   1873       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1874                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1875       break;
   1876 #endif
   1877 
   1878    case VKI_GETALL:
   1879 #if defined(VKI_IPC_64)
   1880    case VKI_GETALL|VKI_IPC_64:
   1881 #endif
   1882       nsems = get_sem_count( arg0 );
   1883       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
   1884                      (Addr)arg.array, sizeof(unsigned short) * nsems );
   1885       break;
   1886 
   1887    case VKI_SETALL:
   1888 #if defined(VKI_IPC_64)
   1889    case VKI_SETALL|VKI_IPC_64:
   1890 #endif
   1891       nsems = get_sem_count( arg0 );
   1892       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
   1893                     (Addr)arg.array, sizeof(unsigned short) * nsems );
   1894       break;
   1895    }
   1896 }
   1897 
   1898 void
   1899 ML_(generic_POST_sys_semctl) ( ThreadId tid,
   1900                                UWord res,
   1901                                UWord arg0, UWord arg1,
   1902                                UWord arg2, UWord arg3 )
   1903 {
   1904    union vki_semun arg = *(union vki_semun *)&arg3;
   1905    UInt nsems;
   1906    switch (arg2 /* cmd */) {
   1907 #if defined(VKI_IPC_INFO)
   1908    case VKI_IPC_INFO:
   1909    case VKI_SEM_INFO:
   1910    case VKI_IPC_INFO|VKI_IPC_64:
   1911    case VKI_SEM_INFO|VKI_IPC_64:
   1912       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1913       break;
   1914 #endif
   1915 
   1916    case VKI_IPC_STAT:
   1917 #if defined(VKI_SEM_STAT)
   1918    case VKI_SEM_STAT:
   1919 #endif
   1920       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1921       break;
   1922 
   1923 #if defined(VKI_IPC_64)
   1924    case VKI_IPC_STAT|VKI_IPC_64:
   1925    case VKI_SEM_STAT|VKI_IPC_64:
   1926 #endif
   1927 #if defined(VKI_IPC_STAT64)
   1928    case VKI_IPC_STAT64:
   1929 #endif
   1930 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
   1931       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1932       break;
   1933 #endif
   1934 
   1935    case VKI_GETALL:
   1936 #if defined(VKI_IPC_64)
   1937    case VKI_GETALL|VKI_IPC_64:
   1938 #endif
   1939       nsems = get_sem_count( arg0 );
   1940       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
   1941       break;
   1942    }
   1943 }
   1944 
   1945 /* ------ */
   1946 
   1947 /* ------ */
   1948 
   1949 static
   1950 SizeT get_shm_size ( Int shmid )
   1951 {
   1952 #if defined(__NR_shmctl)
   1953 #  ifdef VKI_IPC_64
   1954    struct vki_shmid64_ds buf;
   1955 #    if defined(VGP_amd64_linux) || defined(VGP_arm64_linux)
   1956      /* See bug 222545 comment 7 */
   1957      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1958                                      VKI_IPC_STAT, (UWord)&buf);
   1959 #    else
   1960      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1961                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
   1962 #    endif
   1963 #  else /* !def VKI_IPC_64 */
   1964    struct vki_shmid_ds buf;
   1965    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
   1966 #  endif /* def VKI_IPC_64 */
   1967 #elif defined(__NR_shmsys) /* Solaris */
   1968    struct vki_shmid_ds buf;
   1969    SysRes __res = VG_(do_syscall4)(__NR_shmsys, VKI_SHMCTL, shmid, VKI_IPC_STAT,
   1970                          (UWord)&buf);
   1971 #else
   1972    struct vki_shmid_ds buf;
   1973    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
   1974                                  VKI_IPC_STAT, 0, (UWord)&buf);
   1975 #endif
   1976    if (sr_isError(__res))
   1977       return 0;
   1978 
   1979    return (SizeT) buf.shm_segsz;
   1980 }
   1981 
   1982 UWord
   1983 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
   1984                              UWord arg0, UWord arg1, UWord arg2 )
   1985 {
   1986    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
   1987    SizeT  segmentSize = get_shm_size ( arg0 );
   1988    UWord tmp;
   1989    Bool  ok;
   1990    if (arg1 == 0) {
   1991       /* arm-linux only: work around the fact that
   1992          VG_(am_get_advisory_client_simple) produces something that is
   1993          VKI_PAGE_SIZE aligned, whereas what we want is something
   1994          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
   1995          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
   1996          then round the result up to the next VKI_SHMLBA boundary.
   1997          See bug 222545 comment 15.  So far, arm-linux is the only
   1998          platform where this is known to be necessary. */
   1999       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
   2000       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   2001          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
   2002       }
   2003       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
   2004       if (ok) {
   2005          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   2006             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
   2007          } else {
   2008             arg1 = tmp;
   2009          }
   2010       }
   2011    }
   2012    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
   2013       arg1 = 0;
   2014    return arg1;
   2015 }
   2016 
   2017 void
   2018 ML_(generic_POST_sys_shmat) ( ThreadId tid,
   2019                               UWord res,
   2020                               UWord arg0, UWord arg1, UWord arg2 )
   2021 {
   2022    SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
   2023    if ( segmentSize > 0 ) {
   2024       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
   2025       Bool d;
   2026 
   2027       if (arg2 & VKI_SHM_RDONLY)
   2028          prot &= ~VKI_PROT_WRITE;
   2029       /* It isn't exactly correct to pass 0 for the fd and offset
   2030          here.  The kernel seems to think the corresponding section
   2031          does have dev/ino numbers:
   2032 
   2033          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
   2034 
   2035          However there is no obvious way to find them.  In order to
   2036          cope with the discrepancy, aspacem's sync checker omits the
   2037          dev/ino correspondence check in cases where V does not know
   2038          the dev/ino. */
   2039       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
   2040 
   2041       /* we don't distinguish whether it's read-only or
   2042        * read-write -- it doesn't matter really. */
   2043       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
   2044                               0/*di_handle*/ );
   2045       if (d)
   2046          VG_(discard_translations)( (Addr)res,
   2047                                     (ULong)VG_PGROUNDUP(segmentSize),
   2048                                     "ML_(generic_POST_sys_shmat)" );
   2049    }
   2050 }
   2051 
   2052 /* ------ */
   2053 
   2054 Bool
   2055 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
   2056 {
   2057    /* int shmdt(const void *shmaddr); */
   2058    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
   2059 }
   2060 
   2061 void
   2062 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
   2063 {
   2064    NSegment const* s = VG_(am_find_nsegment)(arg0);
   2065 
   2066    if (s != NULL) {
   2067       Addr  s_start = s->start;
   2068       SizeT s_len   = s->end+1 - s->start;
   2069       Bool  d;
   2070 
   2071       vg_assert(s->kind == SkShmC);
   2072       vg_assert(s->start == arg0);
   2073 
   2074       d = VG_(am_notify_munmap)(s_start, s_len);
   2075       s = NULL; /* s is now invalid */
   2076       VG_TRACK( die_mem_munmap, s_start, s_len );
   2077       if (d)
   2078          VG_(discard_translations)( s_start,
   2079                                     (ULong)s_len,
   2080                                     "ML_(generic_POST_sys_shmdt)" );
   2081    }
   2082 }
   2083 /* ------ */
   2084 
   2085 void
   2086 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
   2087                               UWord arg0, UWord arg1, UWord arg2 )
   2088 {
   2089    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
   2090    switch (arg1 /* cmd */) {
   2091 #if defined(VKI_IPC_INFO)
   2092    case VKI_IPC_INFO:
   2093       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   2094                      arg2, sizeof(struct vki_shminfo) );
   2095       break;
   2096 #if defined(VKI_IPC_64)
   2097    case VKI_IPC_INFO|VKI_IPC_64:
   2098       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   2099                      arg2, sizeof(struct vki_shminfo64) );
   2100       break;
   2101 #endif
   2102 #endif
   2103 
   2104 #if defined(VKI_SHM_INFO)
   2105    case VKI_SHM_INFO:
   2106 #if defined(VKI_IPC_64)
   2107    case VKI_SHM_INFO|VKI_IPC_64:
   2108 #endif
   2109       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
   2110                      arg2, sizeof(struct vki_shm_info) );
   2111       break;
   2112 #endif
   2113 
   2114    case VKI_IPC_STAT:
   2115 #if defined(VKI_SHM_STAT)
   2116    case VKI_SHM_STAT:
   2117 #endif
   2118       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
   2119                      arg2, sizeof(struct vki_shmid_ds) );
   2120       break;
   2121 
   2122 #if defined(VKI_IPC_64)
   2123    case VKI_IPC_STAT|VKI_IPC_64:
   2124    case VKI_SHM_STAT|VKI_IPC_64:
   2125       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
   2126                      arg2, sizeof(struct vki_shmid64_ds) );
   2127       break;
   2128 #endif
   2129 
   2130    case VKI_IPC_SET:
   2131       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   2132                     arg2, sizeof(struct vki_shmid_ds) );
   2133       break;
   2134 
   2135 #if defined(VKI_IPC_64)
   2136    case VKI_IPC_SET|VKI_IPC_64:
   2137       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   2138                     arg2, sizeof(struct vki_shmid64_ds) );
   2139       break;
   2140 #endif
   2141    }
   2142 }
   2143 
   2144 void
   2145 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
   2146                                UWord res,
   2147                                UWord arg0, UWord arg1, UWord arg2 )
   2148 {
   2149    switch (arg1 /* cmd */) {
   2150 #if defined(VKI_IPC_INFO)
   2151    case VKI_IPC_INFO:
   2152       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
   2153       break;
   2154    case VKI_IPC_INFO|VKI_IPC_64:
   2155       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
   2156       break;
   2157 #endif
   2158 
   2159 #if defined(VKI_SHM_INFO)
   2160    case VKI_SHM_INFO:
   2161    case VKI_SHM_INFO|VKI_IPC_64:
   2162       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
   2163       break;
   2164 #endif
   2165 
   2166    case VKI_IPC_STAT:
   2167 #if defined(VKI_SHM_STAT)
   2168    case VKI_SHM_STAT:
   2169 #endif
   2170       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
   2171       break;
   2172 
   2173 #if defined(VKI_IPC_64)
   2174    case VKI_IPC_STAT|VKI_IPC_64:
   2175    case VKI_SHM_STAT|VKI_IPC_64:
   2176       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
   2177       break;
   2178 #endif
   2179 
   2180 
   2181    }
   2182 }
   2183 
   2184 /* ---------------------------------------------------------------------
   2185    Generic handler for mmap
   2186    ------------------------------------------------------------------ */
   2187 
   2188 /*
   2189  * Although mmap is specified by POSIX and the argument are generally
   2190  * consistent across platforms the precise details of the low level
   2191  * argument passing conventions differ. For example:
   2192  *
   2193  * - On x86-linux there is mmap (aka old_mmap) which takes the
   2194  *   arguments in a memory block and the offset in bytes; and
   2195  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   2196  *   way and the offset in pages.
   2197  *
   2198  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
   2199  *   arguments in the normal way and the offset in bytes; and
   2200  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   2201  *   way and the offset in pages.
   2202  *
   2203  * - On amd64-linux everything is simple and there is just the one
   2204  *   call, mmap (aka sys_mmap)  which takes the arguments in the
   2205  *   normal way and the offset in bytes.
   2206  *
   2207  * - On s390x-linux there is mmap (aka old_mmap) which takes the
   2208  *   arguments in a memory block and the offset in bytes. mmap2
   2209  *   is also available (but not exported via unistd.h) with
   2210  *   arguments in a memory block and the offset in pages.
   2211  *
   2212  * To cope with all this we provide a generic handler function here
   2213  * and then each platform implements one or more system call handlers
   2214  * which call this generic routine after extracting and normalising
   2215  * the arguments.
   2216  */
   2217 
   2218 SysRes
   2219 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
   2220                             UWord arg1, UWord arg2, UWord arg3,
   2221                             UWord arg4, UWord arg5, Off64T arg6 )
   2222 {
   2223    Addr       advised;
   2224    SysRes     sres;
   2225    MapRequest mreq;
   2226    Bool       mreq_ok;
   2227 
   2228 #  if defined(VGO_darwin)
   2229    // Nb: we can't use this on Darwin, it has races:
   2230    // * needs to RETRY if advisory succeeds but map fails
   2231    //   (could have been some other thread in a nonblocking call)
   2232    // * needs to not use fixed-position mmap() on Darwin
   2233    //   (mmap will cheerfully smash whatever's already there, which might
   2234    //   be a new mapping from some other thread in a nonblocking call)
   2235    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
   2236 #  endif
   2237 
   2238    if (arg2 == 0) {
   2239       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
   2240          shall be established. */
   2241       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2242    }
   2243 
   2244    if (!VG_IS_PAGE_ALIGNED(arg1)) {
   2245       /* zap any misaligned addresses. */
   2246       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
   2247          to fail.   Here, we catch them all. */
   2248       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2249    }
   2250 
   2251    if (!VG_IS_PAGE_ALIGNED(arg6)) {
   2252       /* zap any misaligned offsets. */
   2253       /* SuSV3 says: The off argument is constrained to be aligned and
   2254          sized according to the value returned by sysconf() when
   2255          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
   2256       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2257    }
   2258 
   2259    /* Figure out what kind of allocation constraints there are
   2260       (fixed/hint/any), and ask aspacem what we should do. */
   2261    mreq.start = arg1;
   2262    mreq.len   = arg2;
   2263    if (arg4 & VKI_MAP_FIXED) {
   2264       mreq.rkind = MFixed;
   2265    } else
   2266 #if defined(VKI_MAP_ALIGN) /* Solaris specific */
   2267    if (arg4 & VKI_MAP_ALIGN) {
   2268       mreq.rkind = MAlign;
   2269       if (mreq.start == 0) {
   2270          mreq.start = VKI_PAGE_SIZE;
   2271       }
   2272       /* VKI_MAP_FIXED and VKI_MAP_ALIGN don't like each other. */
   2273       arg4 &= ~VKI_MAP_ALIGN;
   2274    } else
   2275 #endif
   2276    if (arg1 != 0) {
   2277       mreq.rkind = MHint;
   2278    } else {
   2279       mreq.rkind = MAny;
   2280    }
   2281 
   2282    /* Enquire ... */
   2283    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2284    if (!mreq_ok) {
   2285       /* Our request was bounced, so we'd better fail. */
   2286       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2287    }
   2288 
   2289 #  if defined(VKI_MAP_32BIT)
   2290    /* MAP_32BIT is royally unportable, so if the client asks for it, try our
   2291       best to make it work (but without complexifying aspacemgr).
   2292       If the user requested MAP_32BIT, the mmap-ed space must be in the
   2293       first 2GB of the address space. So, return ENOMEM if aspacemgr
   2294       advisory is above the first 2GB. If MAP_FIXED is also requested,
   2295       MAP_32BIT has to be ignored.
   2296       Assumption about aspacemgr behaviour: aspacemgr scans the address space
   2297       from low addresses to find a free segment. No special effort is done
   2298       to keep the first 2GB 'free' for this MAP_32BIT. So, this will often
   2299       fail once the program has already allocated significant memory. */
   2300    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)) {
   2301       if (advised + arg2 >= 0x80000000)
   2302          return VG_(mk_SysRes_Error)( VKI_ENOMEM );
   2303    }
   2304 #  endif
   2305 
   2306    /* Otherwise we're OK (so far).  Install aspacem's choice of
   2307       address, and let the mmap go through.  */
   2308    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2309                                     arg4 | VKI_MAP_FIXED,
   2310                                     arg5, arg6);
   2311 
   2312 #  if defined(VKI_MAP_32BIT)
   2313    /* No recovery trial if the advisory was not accepted. */
   2314    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)
   2315        && sr_isError(sres)) {
   2316       return VG_(mk_SysRes_Error)( VKI_ENOMEM );
   2317    }
   2318 #  endif
   2319 
   2320    /* A refinement: it may be that the kernel refused aspacem's choice
   2321       of address.  If we were originally asked for a hinted mapping,
   2322       there is still a last chance: try again at any address.
   2323       Hence: */
   2324    if (mreq.rkind == MHint && sr_isError(sres)) {
   2325       mreq.start = 0;
   2326       mreq.len   = arg2;
   2327       mreq.rkind = MAny;
   2328       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2329       if (!mreq_ok) {
   2330          /* Our request was bounced, so we'd better fail. */
   2331          return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2332       }
   2333       /* and try again with the kernel */
   2334       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2335                                        arg4 | VKI_MAP_FIXED,
   2336                                        arg5, arg6);
   2337    }
   2338 
   2339    /* Yet another refinement : sometimes valgrind chooses an address
   2340       which is not acceptable by the kernel. This at least happens
   2341       when mmap-ing huge pages, using the flag MAP_HUGETLB.
   2342       valgrind aspacem does not know about huge pages, and modifying
   2343       it to handle huge pages is not straightforward (e.g. need
   2344       to understand special file system mount options).
   2345       So, let's just redo an mmap, without giving any constraint to
   2346       the kernel. If that succeeds, check with aspacem that the returned
   2347       address is acceptable.
   2348       This will give a similar effect as if the user would have
   2349       hinted that address.
   2350       The aspacem state will be correctly updated afterwards.
   2351       We however cannot do this last refinement when the user asked
   2352       for a fixed mapping, as the user asked a specific address. */
   2353    if (sr_isError(sres) && !(arg4 & VKI_MAP_FIXED)) {
   2354       advised = 0;
   2355       /* try mmap with NULL address and without VKI_MAP_FIXED
   2356          to let the kernel decide. */
   2357       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2358                                        arg4,
   2359                                        arg5, arg6);
   2360       if (!sr_isError(sres)) {
   2361          /* The kernel is supposed to know what it is doing, but let's
   2362             do a last sanity check anyway, as if the chosen address had
   2363             been initially hinted by the client. The whole point of this
   2364             last try was to allow mmap of huge pages to succeed without
   2365             making aspacem understand them, on the other hand the kernel
   2366             does not know about valgrind reservations, so this mapping
   2367             can end up in free space and reservations. */
   2368          mreq.start = (Addr)sr_Res(sres);
   2369          mreq.len   = arg2;
   2370          mreq.rkind = MHint;
   2371          advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2372          vg_assert(mreq_ok && advised == mreq.start);
   2373       }
   2374    }
   2375 
   2376    if (!sr_isError(sres)) {
   2377       ULong di_handle;
   2378       /* Notify aspacem. */
   2379       notify_core_of_mmap(
   2380          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2381          arg2, /* length */
   2382          arg3, /* prot */
   2383          arg4, /* the original flags value */
   2384          arg5, /* fd */
   2385          arg6  /* offset */
   2386       );
   2387       /* Load symbols? */
   2388       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
   2389                                        False/*allow_SkFileV*/, (Int)arg5 );
   2390       /* Notify the tool. */
   2391       notify_tool_of_mmap(
   2392          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2393          arg2, /* length */
   2394          arg3, /* prot */
   2395          di_handle /* so the tool can refer to the read debuginfo later,
   2396                       if it wants. */
   2397       );
   2398    }
   2399 
   2400    /* Stay sane */
   2401    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
   2402       vg_assert(sr_Res(sres) == arg1);
   2403 
   2404    return sres;
   2405 }
   2406 
   2407 
   2408 /* ---------------------------------------------------------------------
   2409    The Main Entertainment ... syscall wrappers
   2410    ------------------------------------------------------------------ */
   2411 
   2412 /* Note: the PRE() and POST() wrappers are for the actual functions
   2413    implementing the system calls in the OS kernel.  These mostly have
   2414    names like sys_write();  a few have names like old_mmap().  See the
   2415    comment for ML_(syscall_table)[] for important info about the __NR_foo
   2416    constants and their relationship to the sys_foo() functions.
   2417 
   2418    Some notes about names used for syscalls and args:
   2419    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
   2420      ambiguity.
   2421 
   2422    - For error messages, we generally use a somewhat generic name
   2423      for the syscall (eg. "write" rather than "sys_write").  This should be
   2424      good enough for the average user to understand what is happening,
   2425      without confusing them with names like "sys_write".
   2426 
   2427    - Also, for error messages the arg names are mostly taken from the man
   2428      pages (even though many of those man pages are really for glibc
   2429      functions of the same name), rather than from the OS kernel source,
   2430      for the same reason -- a user presented with a "bogus foo(bar)" arg
   2431      will most likely look at the "foo" man page to see which is the "bar"
   2432      arg.
   2433 
   2434    Note that we use our own vki_* types.  The one exception is in
   2435    PRE_REG_READn calls, where pointer types haven't been changed, because
   2436    they don't need to be -- eg. for "foo*" to be used, the type foo need not
   2437    be visible.
   2438 
   2439    XXX: some of these are arch-specific, and should be factored out.
   2440 */
   2441 
   2442 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
   2443 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
   2444 
   2445 PRE(sys_exit)
   2446 {
   2447    ThreadState* tst;
   2448    /* simple; just make this thread exit */
   2449    PRINT("exit( %ld )", SARG1);
   2450    PRE_REG_READ1(void, "exit", int, status);
   2451    tst = VG_(get_ThreadState)(tid);
   2452    /* Set the thread's status to be exiting, then claim that the
   2453       syscall succeeded. */
   2454    tst->exitreason = VgSrc_ExitThread;
   2455    tst->os_state.exitcode = ARG1;
   2456    SET_STATUS_Success(0);
   2457 }
   2458 
   2459 PRE(sys_ni_syscall)
   2460 {
   2461    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
   2462       VG_SYSNUM_STRING(SYSNO));
   2463    PRE_REG_READ0(long, "ni_syscall");
   2464    SET_STATUS_Failure( VKI_ENOSYS );
   2465 }
   2466 
   2467 PRE(sys_iopl)
   2468 {
   2469    PRINT("sys_iopl ( %lu )", ARG1);
   2470    PRE_REG_READ1(long, "iopl", unsigned long, level);
   2471 }
   2472 
   2473 PRE(sys_fsync)
   2474 {
   2475    *flags |= SfMayBlock;
   2476    PRINT("sys_fsync ( %lu )", ARG1);
   2477    PRE_REG_READ1(long, "fsync", unsigned int, fd);
   2478 }
   2479 
   2480 PRE(sys_fdatasync)
   2481 {
   2482    *flags |= SfMayBlock;
   2483    PRINT("sys_fdatasync ( %lu )", ARG1);
   2484    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
   2485 }
   2486 
   2487 PRE(sys_msync)
   2488 {
   2489    *flags |= SfMayBlock;
   2490    PRINT("sys_msync ( %#lx, %lu, %#lx )", ARG1, ARG2, ARG3);
   2491    PRE_REG_READ3(long, "msync",
   2492                  unsigned long, start, vki_size_t, length, int, flags);
   2493    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
   2494 }
   2495 
   2496 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
   2497 // versions of LiS (Linux Streams).  They are not part of the kernel.
   2498 // Therefore, we have to provide this type ourself, rather than getting it
   2499 // from the kernel sources.
   2500 struct vki_pmsg_strbuf {
   2501    int     maxlen;         /* no. of bytes in buffer */
   2502    int     len;            /* no. of bytes returned */
   2503    vki_caddr_t buf;        /* pointer to data */
   2504 };
   2505 PRE(sys_getpmsg)
   2506 {
   2507    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
   2508    struct vki_pmsg_strbuf *ctrl;
   2509    struct vki_pmsg_strbuf *data;
   2510    *flags |= SfMayBlock;
   2511    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", SARG1, ARG2, ARG3,
   2512          ARG4, ARG5);
   2513    PRE_REG_READ5(int, "getpmsg",
   2514                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2515                  int *, bandp, int *, flagsp);
   2516    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2517    data = (struct vki_pmsg_strbuf *)ARG3;
   2518    if (ctrl && ctrl->maxlen > 0)
   2519       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
   2520    if (data && data->maxlen > 0)
   2521       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
   2522    if (ARG4)
   2523       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
   2524    if (ARG5)
   2525       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
   2526 }
   2527 POST(sys_getpmsg)
   2528 {
   2529    struct vki_pmsg_strbuf *ctrl;
   2530    struct vki_pmsg_strbuf *data;
   2531    vg_assert(SUCCESS);
   2532    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2533    data = (struct vki_pmsg_strbuf *)ARG3;
   2534    if (RES == 0 && ctrl && ctrl->len > 0) {
   2535       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
   2536    }
   2537    if (RES == 0 && data && data->len > 0) {
   2538       POST_MEM_WRITE( (Addr)data->buf, data->len);
   2539    }
   2540 }
   2541 
   2542 PRE(sys_putpmsg)
   2543 {
   2544    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
   2545    struct vki_pmsg_strbuf *ctrl;
   2546    struct vki_pmsg_strbuf *data;
   2547    *flags |= SfMayBlock;
   2548    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", SARG1, ARG2, ARG3,
   2549          SARG4, SARG5);
   2550    PRE_REG_READ5(int, "putpmsg",
   2551                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2552                  int, band, int, flags);
   2553    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2554    data = (struct vki_pmsg_strbuf *)ARG3;
   2555    if (ctrl && ctrl->len > 0)
   2556       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
   2557    if (data && data->len > 0)
   2558       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
   2559 }
   2560 
   2561 PRE(sys_getitimer)
   2562 {
   2563    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2564    PRINT("sys_getitimer ( %ld, %#lx )", SARG1, ARG2);
   2565    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
   2566 
   2567    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
   2568    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
   2569 }
   2570 
   2571 POST(sys_getitimer)
   2572 {
   2573    if (ARG2 != (Addr)NULL) {
   2574       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2575       POST_timeval_WRITE( &(value->it_interval) );
   2576       POST_timeval_WRITE( &(value->it_value) );
   2577    }
   2578 }
   2579 
   2580 PRE(sys_setitimer)
   2581 {
   2582    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", SARG1, ARG2, ARG3);
   2583    PRE_REG_READ3(long, "setitimer",
   2584                  int, which,
   2585                  struct itimerval *, value, struct itimerval *, ovalue);
   2586    if (ARG2 != (Addr)NULL) {
   2587       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2588       PRE_timeval_READ( "setitimer(&value->it_interval)",
   2589                          &(value->it_interval));
   2590       PRE_timeval_READ( "setitimer(&value->it_value)",
   2591                          &(value->it_value));
   2592    }
   2593    if (ARG3 != (Addr)NULL) {
   2594       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2595       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
   2596                          &(ovalue->it_interval));
   2597       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
   2598                          &(ovalue->it_value));
   2599    }
   2600 }
   2601 
   2602 POST(sys_setitimer)
   2603 {
   2604    if (ARG3 != (Addr)NULL) {
   2605       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2606       POST_timeval_WRITE( &(ovalue->it_interval) );
   2607       POST_timeval_WRITE( &(ovalue->it_value) );
   2608    }
   2609 }
   2610 
   2611 PRE(sys_chroot)
   2612 {
   2613    PRINT("sys_chroot ( %#lx )", ARG1);
   2614    PRE_REG_READ1(long, "chroot", const char *, path);
   2615    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
   2616 }
   2617 
   2618 PRE(sys_madvise)
   2619 {
   2620    *flags |= SfMayBlock;
   2621    PRINT("sys_madvise ( %#lx, %lu, %ld )", ARG1, ARG2, SARG3);
   2622    PRE_REG_READ3(long, "madvise",
   2623                  unsigned long, start, vki_size_t, length, int, advice);
   2624 }
   2625 
   2626 #if HAVE_MREMAP
   2627 PRE(sys_mremap)
   2628 {
   2629    // Nb: this is different to the glibc version described in the man pages,
   2630    // which lacks the fifth 'new_address' argument.
   2631    if (ARG4 & VKI_MREMAP_FIXED) {
   2632       PRINT("sys_mremap ( %#lx, %lu, %lu, %#lx, %#lx )",
   2633             ARG1, ARG2, ARG3, ARG4, ARG5);
   2634       PRE_REG_READ5(unsigned long, "mremap",
   2635                     unsigned long, old_addr, unsigned long, old_size,
   2636                     unsigned long, new_size, unsigned long, flags,
   2637                     unsigned long, new_addr);
   2638    } else {
   2639       PRINT("sys_mremap ( %#lx, %lu, %lu, 0x%lx )",
   2640             ARG1, ARG2, ARG3, ARG4);
   2641       PRE_REG_READ4(unsigned long, "mremap",
   2642                     unsigned long, old_addr, unsigned long, old_size,
   2643                     unsigned long, new_size, unsigned long, flags);
   2644    }
   2645    SET_STATUS_from_SysRes(
   2646       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
   2647    );
   2648 }
   2649 #endif /* HAVE_MREMAP */
   2650 
   2651 PRE(sys_nice)
   2652 {
   2653    PRINT("sys_nice ( %ld )", SARG1);
   2654    PRE_REG_READ1(long, "nice", int, inc);
   2655 }
   2656 
   2657 PRE(sys_mlock)
   2658 {
   2659    *flags |= SfMayBlock;
   2660    PRINT("sys_mlock ( %#lx, %lu )", ARG1, ARG2);
   2661    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
   2662 }
   2663 
   2664 PRE(sys_munlock)
   2665 {
   2666    *flags |= SfMayBlock;
   2667    PRINT("sys_munlock ( %#lx, %lu )", ARG1, ARG2);
   2668    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
   2669 }
   2670 
   2671 PRE(sys_mlockall)
   2672 {
   2673    *flags |= SfMayBlock;
   2674    PRINT("sys_mlockall ( %lx )", ARG1);
   2675    PRE_REG_READ1(long, "mlockall", int, flags);
   2676 }
   2677 
   2678 PRE(sys_setpriority)
   2679 {
   2680    PRINT("sys_setpriority ( %ld, %ld, %ld )", SARG1, SARG2, SARG3);
   2681    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
   2682 }
   2683 
   2684 PRE(sys_getpriority)
   2685 {
   2686    PRINT("sys_getpriority ( %ld, %ld )", SARG1, SARG2);
   2687    PRE_REG_READ2(long, "getpriority", int, which, int, who);
   2688 }
   2689 
   2690 PRE(sys_pwrite64)
   2691 {
   2692    *flags |= SfMayBlock;
   2693 #if VG_WORDSIZE == 4
   2694    PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %lld )",
   2695          ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
   2696    PRE_REG_READ5(ssize_t, "pwrite64",
   2697                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2698                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2699 #elif VG_WORDSIZE == 8
   2700    PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %ld )",
   2701          ARG1, ARG2, ARG3, SARG4);
   2702    PRE_REG_READ4(ssize_t, "pwrite64",
   2703                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2704                  Word, offset);
   2705 #else
   2706 #  error Unexpected word size
   2707 #endif
   2708    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
   2709 }
   2710 
   2711 PRE(sys_sync)
   2712 {
   2713    *flags |= SfMayBlock;
   2714    PRINT("sys_sync ( )");
   2715    PRE_REG_READ0(long, "sync");
   2716 }
   2717 
   2718 PRE(sys_fstatfs)
   2719 {
   2720    FUSE_COMPATIBLE_MAY_BLOCK();
   2721    PRINT("sys_fstatfs ( %lu, %#lx )", ARG1, ARG2);
   2722    PRE_REG_READ2(long, "fstatfs",
   2723                  unsigned int, fd, struct statfs *, buf);
   2724    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
   2725 }
   2726 
   2727 POST(sys_fstatfs)
   2728 {
   2729    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   2730 }
   2731 
   2732 PRE(sys_fstatfs64)
   2733 {
   2734    FUSE_COMPATIBLE_MAY_BLOCK();
   2735    PRINT("sys_fstatfs64 ( %lu, %lu, %#lx )", ARG1, ARG2, ARG3);
   2736    PRE_REG_READ3(long, "fstatfs64",
   2737                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
   2738    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
   2739 }
   2740 POST(sys_fstatfs64)
   2741 {
   2742    POST_MEM_WRITE( ARG3, ARG2 );
   2743 }
   2744 
   2745 PRE(sys_getsid)
   2746 {
   2747    PRINT("sys_getsid ( %ld )", SARG1);
   2748    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
   2749 }
   2750 
   2751 PRE(sys_pread64)
   2752 {
   2753    *flags |= SfMayBlock;
   2754 #if VG_WORDSIZE == 4
   2755    PRINT("sys_pread64 ( %lu, %#lx, %lu, %lld )",
   2756          ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
   2757    PRE_REG_READ5(ssize_t, "pread64",
   2758                  unsigned int, fd, char *, buf, vki_size_t, count,
   2759                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2760 #elif VG_WORDSIZE == 8
   2761    PRINT("sys_pread64 ( %lu, %#lx, %lu, %ld )",
   2762          ARG1, ARG2, ARG3, SARG4);
   2763    PRE_REG_READ4(ssize_t, "pread64",
   2764                  unsigned int, fd, char *, buf, vki_size_t, count,
   2765                  Word, offset);
   2766 #else
   2767 #  error Unexpected word size
   2768 #endif
   2769    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
   2770 }
   2771 POST(sys_pread64)
   2772 {
   2773    vg_assert(SUCCESS);
   2774    if (RES > 0) {
   2775       POST_MEM_WRITE( ARG2, RES );
   2776    }
   2777 }
   2778 
   2779 PRE(sys_mknod)
   2780 {
   2781    FUSE_COMPATIBLE_MAY_BLOCK();
   2782    PRINT("sys_mknod ( %#lx(%s), %#lx, %#lx )", ARG1, (HChar*)ARG1, ARG2, ARG3 );
   2783    PRE_REG_READ3(long, "mknod",
   2784                  const char *, pathname, int, mode, unsigned, dev);
   2785    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
   2786 }
   2787 
   2788 PRE(sys_flock)
   2789 {
   2790    *flags |= SfMayBlock;
   2791    PRINT("sys_flock ( %lu, %lu )", ARG1, ARG2 );
   2792    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
   2793 }
   2794 
   2795 // Pre_read a char** argument.
   2796 void ML_(pre_argv_envp)(Addr a, ThreadId tid, const HChar *s1, const HChar *s2)
   2797 {
   2798    while (True) {
   2799       Addr a_deref;
   2800       Addr* a_p = (Addr*)a;
   2801       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
   2802       a_deref = *a_p;
   2803       if (0 == a_deref)
   2804          break;
   2805       PRE_MEM_RASCIIZ( s2, a_deref );
   2806       a += sizeof(char*);
   2807    }
   2808 }
   2809 
   2810 static Bool i_am_the_only_thread ( void )
   2811 {
   2812    Int c = VG_(count_living_threads)();
   2813    vg_assert(c >= 1); /* stay sane */
   2814    return c == 1;
   2815 }
   2816 
   2817 /* Wait until all other threads disappear. */
   2818 void VG_(reap_threads)(ThreadId self)
   2819 {
   2820    while (!i_am_the_only_thread()) {
   2821       /* Let other thread(s) run */
   2822       VG_(vg_yield)();
   2823       VG_(poll_signals)(self);
   2824    }
   2825    vg_assert(i_am_the_only_thread());
   2826 }
   2827 
   2828 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
   2829 // but it seems to work nonetheless...
   2830 PRE(sys_execve)
   2831 {
   2832    HChar*       path = NULL;       /* path to executable */
   2833    HChar**      envp = NULL;
   2834    HChar**      argv = NULL;
   2835    HChar**      arg2copy;
   2836    HChar*       launcher_basename = NULL;
   2837    ThreadState* tst;
   2838    Int          i, j, tot_args;
   2839    SysRes       res;
   2840    Bool         setuid_allowed, trace_this_child;
   2841 
   2842    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (HChar*)ARG1, ARG2, ARG3);
   2843    PRE_REG_READ3(vki_off_t, "execve",
   2844                  char *, filename, char **, argv, char **, envp);
   2845    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
   2846    if (ARG2 != 0) {
   2847       /* At least the terminating NULL must be addressable. */
   2848       if (!ML_(safe_to_deref)((HChar **) ARG2, sizeof(HChar *))) {
   2849          SET_STATUS_Failure(VKI_EFAULT);
   2850          return;
   2851       }
   2852       ML_(pre_argv_envp)( ARG2, tid, "execve(argv)", "execve(argv[i])" );
   2853    }
   2854    if (ARG3 != 0) {
   2855       /* At least the terminating NULL must be addressable. */
   2856       if (!ML_(safe_to_deref)((HChar **) ARG3, sizeof(HChar *))) {
   2857          SET_STATUS_Failure(VKI_EFAULT);
   2858          return;
   2859       }
   2860       ML_(pre_argv_envp)( ARG3, tid, "execve(envp)", "execve(envp[i])" );
   2861    }
   2862 
   2863    vg_assert(VG_(is_valid_tid)(tid));
   2864    tst = VG_(get_ThreadState)(tid);
   2865 
   2866    /* Erk.  If the exec fails, then the following will have made a
   2867       mess of things which makes it hard for us to continue.  The
   2868       right thing to do is piece everything together again in
   2869       POST(execve), but that's close to impossible.  Instead, we make
   2870       an effort to check that the execve will work before actually
   2871       doing it. */
   2872 
   2873    /* Check that the name at least begins in client-accessible storage. */
   2874    if (ARG1 == 0 /* obviously bogus */
   2875        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
   2876       SET_STATUS_Failure( VKI_EFAULT );
   2877       return;
   2878    }
   2879 
   2880    // debug-only printing
   2881    if (0) {
   2882       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
   2883       if (ARG2) {
   2884          VG_(printf)("ARG2 = ");
   2885          Int q;
   2886          HChar** vec = (HChar**)ARG2;
   2887          for (q = 0; vec[q]; q++)
   2888             VG_(printf)("%p(%s) ", vec[q], vec[q]);
   2889          VG_(printf)("\n");
   2890       } else {
   2891          VG_(printf)("ARG2 = null\n");
   2892       }
   2893    }
   2894 
   2895    // Decide whether or not we want to follow along
   2896    { // Make 'child_argv' be a pointer to the child's arg vector
   2897      // (skipping the exe name)
   2898      const HChar** child_argv = (const HChar**)ARG2;
   2899      if (child_argv && child_argv[0] == NULL)
   2900         child_argv = NULL;
   2901      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
   2902    }
   2903 
   2904    // Do the important checks:  it is a file, is executable, permissions are
   2905    // ok, etc.  We allow setuid executables to run only in the case when
   2906    // we are not simulating them, that is, they to be run natively.
   2907    setuid_allowed = trace_this_child  ? False  : True;
   2908    res = VG_(pre_exec_check)((const HChar *)ARG1, NULL, setuid_allowed);
   2909    if (sr_isError(res)) {
   2910       SET_STATUS_Failure( sr_Err(res) );
   2911       return;
   2912    }
   2913 
   2914    /* If we're tracing the child, and the launcher name looks bogus
   2915       (possibly because launcher.c couldn't figure it out, see
   2916       comments therein) then we have no option but to fail. */
   2917    if (trace_this_child
   2918        && (VG_(name_of_launcher) == NULL
   2919            || VG_(name_of_launcher)[0] != '/')) {
   2920       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
   2921       return;
   2922    }
   2923 
   2924    /* After this point, we can't recover if the execve fails. */
   2925    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)ARG1);
   2926 
   2927 
   2928    // Terminate gdbserver if it is active.
   2929    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
   2930       // If the child will not be traced, we need to terminate gdbserver
   2931       // to cleanup the gdbserver resources (e.g. the FIFO files).
   2932       // If child will be traced, we also terminate gdbserver: the new
   2933       // Valgrind will start a fresh gdbserver after exec.
   2934       VG_(gdbserver) (0);
   2935    }
   2936 
   2937    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
   2938       this. (Really, nuke them all, since the new process will make
   2939       its own new thread.) */
   2940    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
   2941    VG_(reap_threads)(tid);
   2942 
   2943    // Set up the child's exe path.
   2944    //
   2945    if (trace_this_child) {
   2946 
   2947       // We want to exec the launcher.  Get its pre-remembered path.
   2948       path = VG_(name_of_launcher);
   2949       // VG_(name_of_launcher) should have been acquired by m_main at
   2950       // startup.
   2951       vg_assert(path);
   2952 
   2953       launcher_basename = VG_(strrchr)(path, '/');
   2954       if (launcher_basename == NULL || launcher_basename[1] == 0) {
   2955          launcher_basename = path;  // hmm, tres dubious
   2956       } else {
   2957          launcher_basename++;
   2958       }
   2959 
   2960    } else {
   2961       path = (HChar*)ARG1;
   2962    }
   2963 
   2964    // Set up the child's environment.
   2965    //
   2966    // Remove the valgrind-specific stuff from the environment so the
   2967    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
   2968    // This is done unconditionally, since if we are tracing the child,
   2969    // the child valgrind will set up the appropriate client environment.
   2970    // Nb: we make a copy of the environment before trying to mangle it
   2971    // as it might be in read-only memory (this was bug #101881).
   2972    //
   2973    // Then, if tracing the child, set VALGRIND_LIB for it.
   2974    //
   2975    if (ARG3 == 0) {
   2976       envp = NULL;
   2977    } else {
   2978       envp = VG_(env_clone)( (HChar**)ARG3 );
   2979       if (envp == NULL) goto hosed;
   2980       VG_(env_remove_valgrind_env_stuff)( envp, True /*ro_strings*/, NULL );
   2981    }
   2982 
   2983    if (trace_this_child) {
   2984       // Set VALGRIND_LIB in ARG3 (the environment)
   2985       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
   2986    }
   2987 
   2988    // Set up the child's args.  If not tracing it, they are
   2989    // simply ARG2.  Otherwise, they are
   2990    //
   2991    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
   2992    //
   2993    // except that the first VG_(args_for_valgrind_noexecpass) args
   2994    // are omitted.
   2995    //
   2996    if (!trace_this_child) {
   2997       argv = (HChar**)ARG2;
   2998    } else {
   2999       vg_assert( VG_(args_for_valgrind) );
   3000       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
   3001       vg_assert( VG_(args_for_valgrind_noexecpass)
   3002                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
   3003       /* how many args in total will there be? */
   3004       // launcher basename
   3005       tot_args = 1;
   3006       // V's args
   3007       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
   3008       tot_args -= VG_(args_for_valgrind_noexecpass);
   3009       // name of client exe
   3010       tot_args++;
   3011       // args for client exe, skipping [0]
   3012       arg2copy = (HChar**)ARG2;
   3013       if (arg2copy && arg2copy[0]) {
   3014          for (i = 1; arg2copy[i]; i++)
   3015             tot_args++;
   3016       }
   3017       // allocate
   3018       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
   3019                           (tot_args+1) * sizeof(HChar*) );
   3020       // copy
   3021       j = 0;
   3022       argv[j++] = launcher_basename;
   3023       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
   3024          if (i < VG_(args_for_valgrind_noexecpass))
   3025             continue;
   3026          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
   3027       }
   3028       argv[j++] = (HChar*)ARG1;
   3029       if (arg2copy && arg2copy[0])
   3030          for (i = 1; arg2copy[i]; i++)
   3031             argv[j++] = arg2copy[i];
   3032       argv[j++] = NULL;
   3033       // check
   3034       vg_assert(j == tot_args+1);
   3035    }
   3036 
   3037    /*
   3038       Set the signal state up for exec.
   3039 
   3040       We need to set the real signal state to make sure the exec'd
   3041       process gets SIG_IGN properly.
   3042 
   3043       Also set our real sigmask to match the client's sigmask so that
   3044       the exec'd child will get the right mask.  First we need to
   3045       clear out any pending signals so they they don't get delivered,
   3046       which would confuse things.
   3047 
   3048       XXX This is a bug - the signals should remain pending, and be
   3049       delivered to the new process after exec.  There's also a
   3050       race-condition, since if someone delivers us a signal between
   3051       the sigprocmask and the execve, we'll still get the signal. Oh
   3052       well.
   3053    */
   3054    {
   3055       vki_sigset_t allsigs;
   3056       vki_siginfo_t info;
   3057 
   3058       /* What this loop does: it queries SCSS (the signal state that
   3059          the client _thinks_ the kernel is in) by calling
   3060          VG_(do_sys_sigaction), and modifies the real kernel signal
   3061          state accordingly. */
   3062       for (i = 1; i < VG_(max_signal); i++) {
   3063          vki_sigaction_fromK_t sa_f;
   3064          vki_sigaction_toK_t   sa_t;
   3065          VG_(do_sys_sigaction)(i, NULL, &sa_f);
   3066          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
   3067          if (sa_t.ksa_handler == VKI_SIG_IGN)
   3068             VG_(sigaction)(i, &sa_t, NULL);
   3069          else {
   3070             sa_t.ksa_handler = VKI_SIG_DFL;
   3071             VG_(sigaction)(i, &sa_t, NULL);
   3072          }
   3073       }
   3074 
   3075       VG_(sigfillset)(&allsigs);
   3076       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
   3077          ;
   3078 
   3079       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
   3080    }
   3081 
   3082    if (0) {
   3083       HChar **cpp;
   3084       VG_(printf)("exec: %s\n", path);
   3085       for (cpp = argv; cpp && *cpp; cpp++)
   3086          VG_(printf)("argv: %s\n", *cpp);
   3087       if (0)
   3088          for (cpp = envp; cpp && *cpp; cpp++)
   3089             VG_(printf)("env: %s\n", *cpp);
   3090    }
   3091 
   3092    SET_STATUS_from_SysRes(
   3093       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
   3094    );
   3095 
   3096    /* If we got here, then the execve failed.  We've already made way
   3097       too much of a mess to continue, so we have to abort. */
   3098   hosed:
   3099    vg_assert(FAILURE);
   3100    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %lu\n",
   3101                 ARG1, (HChar*)ARG1, ARG2, ARG3, ERR);
   3102    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
   3103                             "execve() failing, so I'm dying.\n");
   3104    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
   3105                             "or work out how to recover.\n");
   3106    VG_(exit)(101);
   3107 }
   3108 
   3109 PRE(sys_access)
   3110 {
   3111    PRINT("sys_access ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
   3112    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
   3113    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
   3114 }
   3115 
   3116 PRE(sys_alarm)
   3117 {
   3118    PRINT("sys_alarm ( %lu )", ARG1);
   3119    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
   3120 }
   3121 
   3122 PRE(sys_brk)
   3123 {
   3124    Addr brk_limit = VG_(brk_limit);
   3125    Addr brk_new;
   3126 
   3127    /* libc   says: int   brk(void *end_data_segment);
   3128       kernel says: void* brk(void* end_data_segment);  (more or less)
   3129 
   3130       libc returns 0 on success, and -1 (and sets errno) on failure.
   3131       Nb: if you ask to shrink the dataseg end below what it
   3132       currently is, that always succeeds, even if the dataseg end
   3133       doesn't actually change (eg. brk(0)).  Unless it seg faults.
   3134 
   3135       Kernel returns the new dataseg end.  If the brk() failed, this
   3136       will be unchanged from the old one.  That's why calling (kernel)
   3137       brk(0) gives the current dataseg end (libc brk() just returns
   3138       zero in that case).
   3139 
   3140       Both will seg fault if you shrink it back into a text segment.
   3141    */
   3142    PRINT("sys_brk ( %#lx )", ARG1);
   3143    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
   3144 
   3145    brk_new = do_brk(ARG1, tid);
   3146    SET_STATUS_Success( brk_new );
   3147 
   3148    if (brk_new == ARG1) {
   3149       /* brk() succeeded */
   3150       if (brk_new < brk_limit) {
   3151          /* successfully shrunk the data segment. */
   3152          VG_TRACK( die_mem_brk, (Addr)ARG1,
   3153 		   brk_limit-ARG1 );
   3154       } else
   3155       if (brk_new > brk_limit) {
   3156          /* successfully grew the data segment */
   3157          VG_TRACK( new_mem_brk, brk_limit,
   3158                    ARG1-brk_limit, tid );
   3159       }
   3160    } else {
   3161       /* brk() failed */
   3162       vg_assert(brk_limit == brk_new);
   3163    }
   3164 }
   3165 
   3166 PRE(sys_chdir)
   3167 {
   3168    FUSE_COMPATIBLE_MAY_BLOCK();
   3169    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   3170    PRE_REG_READ1(long, "chdir", const char *, path);
   3171    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
   3172 }
   3173 
   3174 PRE(sys_chmod)
   3175 {
   3176    FUSE_COMPATIBLE_MAY_BLOCK();
   3177    PRINT("sys_chmod ( %#lx(%s), %lu )", ARG1, (HChar*)ARG1, ARG2);
   3178    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
   3179    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
   3180 }
   3181 
   3182 PRE(sys_chown)
   3183 {
   3184    FUSE_COMPATIBLE_MAY_BLOCK();
   3185    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   3186    PRE_REG_READ3(long, "chown",
   3187                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   3188    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
   3189 }
   3190 
   3191 PRE(sys_lchown)
   3192 {
   3193    FUSE_COMPATIBLE_MAY_BLOCK();
   3194    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   3195    PRE_REG_READ3(long, "lchown",
   3196                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   3197    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
   3198 }
   3199 
   3200 PRE(sys_close)
   3201 {
   3202    FUSE_COMPATIBLE_MAY_BLOCK();
   3203    PRINT("sys_close ( %lu )", ARG1);
   3204    PRE_REG_READ1(long, "close", unsigned int, fd);
   3205 
   3206    /* Detect and negate attempts by the client to close Valgrind's log fd */
   3207    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
   3208         /* If doing -d style logging (which is to fd=2), don't
   3209            allow that to be closed either. */
   3210         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
   3211       SET_STATUS_Failure( VKI_EBADF );
   3212 }
   3213 
   3214 POST(sys_close)
   3215 {
   3216    if (VG_(clo_track_fds)) ML_(record_fd_close)(ARG1);
   3217 }
   3218 
   3219 PRE(sys_dup)
   3220 {
   3221    PRINT("sys_dup ( %lu )", ARG1);
   3222    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
   3223 }
   3224 
   3225 POST(sys_dup)
   3226 {
   3227    vg_assert(SUCCESS);
   3228    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
   3229       VG_(close)(RES);
   3230       SET_STATUS_Failure( VKI_EMFILE );
   3231    } else {
   3232       if (VG_(clo_track_fds))
   3233          ML_(record_fd_open_named)(tid, RES);
   3234    }
   3235 }
   3236 
   3237 PRE(sys_dup2)
   3238 {
   3239    PRINT("sys_dup2 ( %lu, %lu )", ARG1, ARG2);
   3240    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
   3241    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
   3242       SET_STATUS_Failure( VKI_EBADF );
   3243 }
   3244 
   3245 POST(sys_dup2)
   3246 {
   3247    vg_assert(SUCCESS);
   3248    if (VG_(clo_track_fds))
   3249       ML_(record_fd_open_named)(tid, RES);
   3250 }
   3251 
   3252 PRE(sys_fchdir)
   3253 {
   3254    FUSE_COMPATIBLE_MAY_BLOCK();
   3255    PRINT("sys_fchdir ( %lu )", ARG1);
   3256    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
   3257 }
   3258 
   3259 PRE(sys_fchown)
   3260 {
   3261    FUSE_COMPATIBLE_MAY_BLOCK();
   3262    PRINT("sys_fchown ( %lu, %lu, %lu )", ARG1, ARG2, ARG3);
   3263    PRE_REG_READ3(long, "fchown",
   3264                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
   3265 }
   3266 
   3267 PRE(sys_fchmod)
   3268 {
   3269    FUSE_COMPATIBLE_MAY_BLOCK();
   3270    PRINT("sys_fchmod ( %lu, %lu )", ARG1, ARG2);
   3271    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
   3272 }
   3273 
   3274 PRE(sys_newfstat)
   3275 {
   3276    FUSE_COMPATIBLE_MAY_BLOCK();
   3277    PRINT("sys_newfstat ( %lu, %#lx )", ARG1, ARG2);
   3278    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
   3279    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
   3280 }
   3281 
   3282 POST(sys_newfstat)
   3283 {
   3284    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3285 }
   3286 
   3287 #if !defined(VGO_solaris) && !defined(VGP_arm64_linux)
   3288 static vki_sigset_t fork_saved_mask;
   3289 
   3290 // In Linux, the sys_fork() function varies across architectures, but we
   3291 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
   3292 PRE(sys_fork)
   3293 {
   3294    Bool is_child;
   3295    Int child_pid;
   3296    vki_sigset_t mask;
   3297 
   3298    PRINT("sys_fork ( )");
   3299    PRE_REG_READ0(long, "fork");
   3300 
   3301    /* Block all signals during fork, so that we can fix things up in
   3302       the child without being interrupted. */
   3303    VG_(sigfillset)(&mask);
   3304    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
   3305 
   3306    VG_(do_atfork_pre)(tid);
   3307 
   3308    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
   3309 
   3310    if (!SUCCESS) return;
   3311 
   3312 #if defined(VGO_linux)
   3313    // RES is 0 for child, non-0 (the child's PID) for parent.
   3314    is_child = ( RES == 0 ? True : False );
   3315    child_pid = ( is_child ? -1 : RES );
   3316 #elif defined(VGO_darwin)
   3317    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
   3318    is_child = RESHI;
   3319    child_pid = RES;
   3320 #else
   3321 #  error Unknown OS
   3322 #endif
   3323 
   3324    if (is_child) {
   3325       VG_(do_atfork_child)(tid);
   3326 
   3327       /* restore signal mask */
   3328       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3329    } else {
   3330       VG_(do_atfork_parent)(tid);
   3331 
   3332       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
   3333 
   3334       /* restore signal mask */
   3335       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3336    }
   3337 }
   3338 #endif // !defined(VGO_solaris) && !defined(VGP_arm64_linux)
   3339 
   3340 PRE(sys_ftruncate)
   3341 {
   3342    *flags |= SfMayBlock;
   3343    PRINT("sys_ftruncate ( %lu, %lu )", ARG1, ARG2);
   3344    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
   3345 }
   3346 
   3347 PRE(sys_truncate)
   3348 {
   3349    *flags |= SfMayBlock;
   3350    PRINT("sys_truncate ( %#lx(%s), %lu )", ARG1, (HChar*)ARG1, ARG2);
   3351    PRE_REG_READ2(long, "truncate",
   3352                  const char *, path, unsigned long, length);
   3353    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
   3354 }
   3355 
   3356 PRE(sys_ftruncate64)
   3357 {
   3358    *flags |= SfMayBlock;
   3359 #if VG_WORDSIZE == 4
   3360    PRINT("sys_ftruncate64 ( %lu, %llu )", ARG1, MERGE64(ARG2,ARG3));
   3361    PRE_REG_READ3(long, "ftruncate64",
   3362                  unsigned int, fd,
   3363                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3364 #else
   3365    PRINT("sys_ftruncate64 ( %lu, %lu )", ARG1, ARG2);
   3366    PRE_REG_READ2(long, "ftruncate64",
   3367                  unsigned int,fd, UWord,length);
   3368 #endif
   3369 }
   3370 
   3371 PRE(sys_truncate64)
   3372 {
   3373    *flags |= SfMayBlock;
   3374 #if VG_WORDSIZE == 4
   3375    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
   3376    PRE_REG_READ3(long, "truncate64",
   3377                  const char *, path,
   3378                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3379 #else
   3380    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
   3381    PRE_REG_READ2(long, "truncate64",
   3382                  const char *,path, UWord,length);
   3383 #endif
   3384    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
   3385 }
   3386 
   3387 PRE(sys_getdents)
   3388 {
   3389    *flags |= SfMayBlock;
   3390    PRINT("sys_getdents ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   3391    PRE_REG_READ3(long, "getdents",
   3392                  unsigned int, fd, struct vki_dirent *, dirp,
   3393                  unsigned int, count);
   3394    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
   3395 }
   3396 
   3397 POST(sys_getdents)
   3398 {
   3399    vg_assert(SUCCESS);
   3400    if (RES > 0)
   3401       POST_MEM_WRITE( ARG2, RES );
   3402 }
   3403 
   3404 PRE(sys_getdents64)
   3405 {
   3406    *flags |= SfMayBlock;
   3407    PRINT("sys_getdents64 ( %lu, %#lx, %lu )",ARG1, ARG2, ARG3);
   3408    PRE_REG_READ3(long, "getdents64",
   3409                  unsigned int, fd, struct vki_dirent64 *, dirp,
   3410                  unsigned int, count);
   3411    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
   3412 }
   3413 
   3414 POST(sys_getdents64)
   3415 {
   3416    vg_assert(SUCCESS);
   3417    if (RES > 0)
   3418       POST_MEM_WRITE( ARG2, RES );
   3419 }
   3420 
   3421 PRE(sys_getgroups)
   3422 {
   3423    PRINT("sys_getgroups ( %ld, %#lx )", SARG1, ARG2);
   3424    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
   3425    if (ARG1 > 0)
   3426       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   3427 }
   3428 
   3429 POST(sys_getgroups)
   3430 {
   3431    vg_assert(SUCCESS);
   3432    if (ARG1 > 0 && RES > 0)
   3433       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
   3434 }
   3435 
   3436 PRE(sys_getcwd)
   3437 {
   3438    // Comment from linux/fs/dcache.c:
   3439    //   NOTE! The user-level library version returns a character pointer.
   3440    //   The kernel system call just returns the length of the buffer filled
   3441    //   (which includes the ending '\0' character), or a negative error
   3442    //   value.
   3443    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
   3444    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3445    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
   3446    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
   3447 }
   3448 
   3449 POST(sys_getcwd)
   3450 {
   3451    vg_assert(SUCCESS);
   3452    if (RES != (Addr)NULL)
   3453       POST_MEM_WRITE( ARG1, RES );
   3454 }
   3455 
   3456 PRE(sys_geteuid)
   3457 {
   3458    PRINT("sys_geteuid ( )");
   3459    PRE_REG_READ0(long, "geteuid");
   3460 }
   3461 
   3462 PRE(sys_getegid)
   3463 {
   3464    PRINT("sys_getegid ( )");
   3465    PRE_REG_READ0(long, "getegid");
   3466 }
   3467 
   3468 PRE(sys_getgid)
   3469 {
   3470    PRINT("sys_getgid ( )");
   3471    PRE_REG_READ0(long, "getgid");
   3472 }
   3473 
   3474 PRE(sys_getpid)
   3475 {
   3476    PRINT("sys_getpid ()");
   3477    PRE_REG_READ0(long, "getpid");
   3478 }
   3479 
   3480 PRE(sys_getpgid)
   3481 {
   3482    PRINT("sys_getpgid ( %ld )", SARG1);
   3483    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
   3484 }
   3485 
   3486 PRE(sys_getpgrp)
   3487 {
   3488    PRINT("sys_getpgrp ()");
   3489    PRE_REG_READ0(long, "getpgrp");
   3490 }
   3491 
   3492 PRE(sys_getppid)
   3493 {
   3494    PRINT("sys_getppid ()");
   3495    PRE_REG_READ0(long, "getppid");
   3496 }
   3497 
   3498 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
   3499 {
   3500    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
   3501 
   3502 #ifdef _RLIMIT_POSIX_FLAG
   3503    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
   3504    // Unset it here to make the switch case below work correctly.
   3505    a1 &= ~_RLIMIT_POSIX_FLAG;
   3506 #endif
   3507 
   3508    switch (a1) {
   3509    case VKI_RLIMIT_NOFILE:
   3510       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
   3511       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
   3512       break;
   3513 
   3514    case VKI_RLIMIT_DATA:
   3515       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
   3516       break;
   3517 
   3518    case VKI_RLIMIT_STACK:
   3519       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
   3520       break;
   3521    }
   3522 }
   3523 
   3524 PRE(sys_old_getrlimit)
   3525 {
   3526    PRINT("sys_old_getrlimit ( %lu, %#lx )", ARG1, ARG2);
   3527    PRE_REG_READ2(long, "old_getrlimit",
   3528                  unsigned int, resource, struct rlimit *, rlim);
   3529    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3530 }
   3531 
   3532 POST(sys_old_getrlimit)
   3533 {
   3534    common_post_getrlimit(tid, ARG1, ARG2);
   3535 }
   3536 
   3537 PRE(sys_getrlimit)
   3538 {
   3539    PRINT("sys_getrlimit ( %lu, %#lx )", ARG1, ARG2);
   3540    PRE_REG_READ2(long, "getrlimit",
   3541                  unsigned int, resource, struct rlimit *, rlim);
   3542    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3543 }
   3544 
   3545 POST(sys_getrlimit)
   3546 {
   3547    common_post_getrlimit(tid, ARG1, ARG2);
   3548 }
   3549 
   3550 PRE(sys_getrusage)
   3551 {
   3552    PRINT("sys_getrusage ( %ld, %#lx )", SARG1, ARG2);
   3553    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
   3554    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
   3555 }
   3556 
   3557 POST(sys_getrusage)
   3558 {
   3559    vg_assert(SUCCESS);
   3560    if (RES == 0)
   3561       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
   3562 }
   3563 
   3564 PRE(sys_gettimeofday)
   3565 {
   3566    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3567    PRE_REG_READ2(long, "gettimeofday",
   3568                  struct timeval *, tv, struct timezone *, tz);
   3569    // GrP fixme does darwin write to *tz anymore?
   3570    if (ARG1 != 0)
   3571       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
   3572    if (ARG2 != 0)
   3573       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3574 }
   3575 
   3576 POST(sys_gettimeofday)
   3577 {
   3578    vg_assert(SUCCESS);
   3579    if (RES == 0) {
   3580       if (ARG1 != 0)
   3581          POST_timeval_WRITE( ARG1 );
   3582       if (ARG2 != 0)
   3583 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
   3584    }
   3585 }
   3586 
   3587 PRE(sys_settimeofday)
   3588 {
   3589    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3590    PRE_REG_READ2(long, "settimeofday",
   3591                  struct timeval *, tv, struct timezone *, tz);
   3592    if (ARG1 != 0)
   3593       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
   3594    if (ARG2 != 0) {
   3595       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3596       /* maybe should warn if tz->tz_dsttime is non-zero? */
   3597    }
   3598 }
   3599 
   3600 PRE(sys_getuid)
   3601 {
   3602    PRINT("sys_getuid ( )");
   3603    PRE_REG_READ0(long, "getuid");
   3604 }
   3605 
   3606 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
   3607 {
   3608    /* We don't have any specific information on it, so
   3609       try to do something reasonable based on direction and
   3610       size bits.  The encoding scheme is described in
   3611       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3612 
   3613       According to Simon Hausmann, _IOC_READ means the kernel
   3614       writes a value to the ioctl value passed from the user
   3615       space and the other way around with _IOC_WRITE. */
   3616 
   3617 #if defined(VGO_solaris)
   3618    /* Majority of Solaris ioctl requests does not honour direction hints. */
   3619    UInt dir  = _VKI_IOC_NONE;
   3620 #else
   3621    UInt dir  = _VKI_IOC_DIR(request);
   3622 #endif
   3623    UInt size = _VKI_IOC_SIZE(request);
   3624 
   3625    if (SimHintiS(SimHint_lax_ioctls, VG_(clo_sim_hints))) {
   3626       /*
   3627        * Be very lax about ioctl handling; the only
   3628        * assumption is that the size is correct. Doesn't
   3629        * require the full buffer to be initialized when
   3630        * writing.  Without this, using some device
   3631        * drivers with a large number of strange ioctl
   3632        * commands becomes very tiresome.
   3633        */
   3634    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
   3635       static UWord unknown_ioctl[10];
   3636       static Int moans = sizeof(unknown_ioctl) / sizeof(unknown_ioctl[0]);
   3637 
   3638       if (moans > 0 && !VG_(clo_xml)) {
   3639          /* Check if have not already moaned for this request. */
   3640          UInt i;
   3641          for (i = 0; i < sizeof(unknown_ioctl)/sizeof(unknown_ioctl[0]); i++) {
   3642             if (unknown_ioctl[i] == request)
   3643                break;
   3644             if (unknown_ioctl[i] == 0) {
   3645                unknown_ioctl[i] = request;
   3646                moans--;
   3647                VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
   3648                          " with no size/direction hints.\n", request);
   3649                VG_(umsg)("   This could cause spurious value errors to appear.\n");
   3650                VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
   3651                          "guidance on writing a proper wrapper.\n" );
   3652                //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3653                return;
   3654             }
   3655          }
   3656       }
   3657    } else {
   3658       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
   3659       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3660       if ((dir & _VKI_IOC_WRITE) && size > 0)
   3661          PRE_MEM_READ( "ioctl(generic)", arg, size);
   3662       if ((dir & _VKI_IOC_READ) && size > 0)
   3663          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
   3664    }
   3665 }
   3666 
   3667 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
   3668 {
   3669    /* We don't have any specific information on it, so
   3670       try to do something reasonable based on direction and
   3671       size bits.  The encoding scheme is described in
   3672       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3673 
   3674       According to Simon Hausmann, _IOC_READ means the kernel
   3675       writes a value to the ioctl value passed from the user
   3676       space and the other way around with _IOC_WRITE. */
   3677 
   3678    UInt dir  = _VKI_IOC_DIR(request);
   3679    UInt size = _VKI_IOC_SIZE(request);
   3680    if (size > 0 && (dir & _VKI_IOC_READ)
   3681        && res == 0
   3682        && arg != (Addr)NULL) {
   3683       POST_MEM_WRITE(arg, size);
   3684    }
   3685 }
   3686 
   3687 /*
   3688    If we're sending a SIGKILL to one of our own threads, then simulate
   3689    it rather than really sending the signal, so that the target thread
   3690    gets a chance to clean up.  Returns True if we did the killing (or
   3691    no killing is necessary), and False if the caller should use the
   3692    normal kill syscall.
   3693 
   3694    "pid" is any pid argument which can be passed to kill; group kills
   3695    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
   3696    they'll most likely hit all the threads and we won't need to worry
   3697    about cleanup.  In truth, we can't fully emulate these multicast
   3698    kills.
   3699 
   3700    "tgid" is a thread group id.  If it is not -1, then the target
   3701    thread must be in that thread group.
   3702  */
   3703 Bool ML_(do_sigkill)(Int pid, Int tgid)
   3704 {
   3705    ThreadState *tst;
   3706    ThreadId tid;
   3707 
   3708    if (pid <= 0)
   3709       return False;
   3710 
   3711    tid = VG_(lwpid_to_vgtid)(pid);
   3712    if (tid == VG_INVALID_THREADID)
   3713       return False;		/* none of our threads */
   3714 
   3715    tst = VG_(get_ThreadState)(tid);
   3716    if (tst == NULL || tst->status == VgTs_Empty)
   3717       return False;		/* hm, shouldn't happen */
   3718 
   3719    if (tgid != -1 && tst->os_state.threadgroup != tgid)
   3720       return False;		/* not the right thread group */
   3721 
   3722    /* Check to see that the target isn't already exiting. */
   3723    if (!VG_(is_exiting)(tid)) {
   3724       if (VG_(clo_trace_signals))
   3725 	 VG_(message)(Vg_DebugMsg,
   3726                       "Thread %u being killed with SIGKILL\n",
   3727                       tst->tid);
   3728 
   3729       tst->exitreason = VgSrc_FatalSig;
   3730       tst->os_state.fatalsig = VKI_SIGKILL;
   3731 
   3732       if (!VG_(is_running_thread)(tid))
   3733 	 VG_(get_thread_out_of_syscall)(tid);
   3734    }
   3735 
   3736    return True;
   3737 }
   3738 
   3739 PRE(sys_kill)
   3740 {
   3741    PRINT("sys_kill ( %ld, %ld )", SARG1, SARG2);
   3742    PRE_REG_READ2(long, "kill", int, pid, int, signal);
   3743    if (!ML_(client_signal_OK)(ARG2)) {
   3744       SET_STATUS_Failure( VKI_EINVAL );
   3745       return;
   3746    }
   3747 
   3748    /* If we're sending SIGKILL, check to see if the target is one of
   3749       our threads and handle it specially. */
   3750    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
   3751       SET_STATUS_Success(0);
   3752    else
   3753       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
   3754          affecting how posix-compliant the call is.  I guess it is
   3755          harmless to pass the 3rd arg on other platforms; hence pass
   3756          it on all. */
   3757       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
   3758 
   3759    if (VG_(clo_trace_signals))
   3760       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
   3761 		   SARG2, SARG1);
   3762 
   3763    /* This kill might have given us a pending signal.  Ask for a check once
   3764       the syscall is done. */
   3765    *flags |= SfPollAfter;
   3766 }
   3767 
   3768 PRE(sys_link)
   3769 {
   3770    *flags |= SfMayBlock;
   3771    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   3772    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
   3773    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
   3774    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
   3775 }
   3776 
   3777 PRE(sys_newlstat)
   3778 {
   3779    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   3780    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
   3781    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
   3782    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
   3783 }
   3784 
   3785 POST(sys_newlstat)
   3786 {
   3787    vg_assert(SUCCESS);
   3788    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3789 }
   3790 
   3791 PRE(sys_mkdir)
   3792 {
   3793    *flags |= SfMayBlock;
   3794    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
   3795    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
   3796    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
   3797 }
   3798 
   3799 PRE(sys_mprotect)
   3800 {
   3801    PRINT("sys_mprotect ( %#lx, %lu, %lu )", ARG1, ARG2, ARG3);
   3802    PRE_REG_READ3(long, "mprotect",
   3803                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
   3804 
   3805    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
   3806       SET_STATUS_Failure( VKI_ENOMEM );
   3807    }
   3808 #if defined(VKI_PROT_GROWSDOWN)
   3809    else
   3810    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
   3811       /* Deal with mprotects on growable stack areas.
   3812 
   3813          The critical files to understand all this are mm/mprotect.c
   3814          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
   3815          glibc.
   3816 
   3817          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
   3818          round the start/end address of mprotect to the start/end of
   3819          the underlying vma and glibc uses that as an easy way to
   3820          change the protection of the stack by calling mprotect on the
   3821          last page of the stack with PROT_GROWSDOWN set.
   3822 
   3823          The sanity check provided by the kernel is that the vma must
   3824          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
   3825       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
   3826       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
   3827       NSegment const *rseg;
   3828 
   3829       vg_assert(aseg);
   3830 
   3831       if (grows == VKI_PROT_GROWSDOWN) {
   3832          rseg = VG_(am_next_nsegment)( aseg, False/*backwards*/ );
   3833          if (rseg
   3834              && rseg->kind == SkResvn
   3835              && rseg->smode == SmUpper
   3836              && rseg->end+1 == aseg->start) {
   3837             Addr end = ARG1 + ARG2;
   3838             ARG1 = aseg->start;
   3839             ARG2 = end - aseg->start;
   3840             ARG3 &= ~VKI_PROT_GROWSDOWN;
   3841          } else {
   3842             SET_STATUS_Failure( VKI_EINVAL );
   3843          }
   3844       } else if (grows == VKI_PROT_GROWSUP) {
   3845          rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
   3846          if (rseg
   3847              && rseg->kind == SkResvn
   3848              && rseg->smode == SmLower
   3849              && aseg->end+1 == rseg->start) {
   3850             ARG2 = aseg->end - ARG1 + 1;
   3851             ARG3 &= ~VKI_PROT_GROWSUP;
   3852          } else {
   3853             SET_STATUS_Failure( VKI_EINVAL );
   3854          }
   3855       } else {
   3856          /* both GROWSUP and GROWSDOWN */
   3857          SET_STATUS_Failure( VKI_EINVAL );
   3858       }
   3859    }
   3860 #endif   // defined(VKI_PROT_GROWSDOWN)
   3861 }
   3862 
   3863 POST(sys_mprotect)
   3864 {
   3865    Addr a    = ARG1;
   3866    SizeT len = ARG2;
   3867    Int  prot = ARG3;
   3868 
   3869    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
   3870 }
   3871 
   3872 PRE(sys_munmap)
   3873 {
   3874    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
   3875    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3876    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
   3877 
   3878    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
   3879       SET_STATUS_Failure( VKI_EINVAL );
   3880 }
   3881 
   3882 POST(sys_munmap)
   3883 {
   3884    Addr  a   = ARG1;
   3885    SizeT len = ARG2;
   3886 
   3887    ML_(notify_core_and_tool_of_munmap)( a, len );
   3888 }
   3889 
   3890 PRE(sys_mincore)
   3891 {
   3892    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
   3893    PRE_REG_READ3(long, "mincore",
   3894                  unsigned long, start, vki_size_t, length,
   3895                  unsigned char *, vec);
   3896    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3897 }
   3898 POST(sys_mincore)
   3899 {
   3900    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3901 }
   3902 
   3903 PRE(sys_nanosleep)
   3904 {
   3905    *flags |= SfMayBlock|SfPostOnFail;
   3906    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
   3907    PRE_REG_READ2(long, "nanosleep",
   3908                  struct timespec *, req, struct timespec *, rem);
   3909    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
   3910    if (ARG2 != 0)
   3911       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
   3912 }
   3913 
   3914 POST(sys_nanosleep)
   3915 {
   3916    vg_assert(SUCCESS || FAILURE);
   3917    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
   3918       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
   3919 }
   3920 
   3921 #if defined(VGO_linux) || defined(VGO_solaris)
   3922 /* Handles the case where the open is of /proc/self/auxv or
   3923    /proc/<pid>/auxv, and just gives out a copy of the fd for the
   3924    fake file we cooked up at startup (in m_main).  Also, seeks the
   3925    cloned fd back to the start.
   3926    Returns True if auxv open was handled (status is set). */
   3927 Bool ML_(handle_auxv_open)(SyscallStatus *status, const HChar *filename,
   3928                            int flags)
   3929 {
   3930    HChar  name[30];   // large enough
   3931 
   3932    if (!ML_(safe_to_deref)((const void *) filename, 1))
   3933       return False;
   3934 
   3935    /* Opening /proc/<pid>/auxv or /proc/self/auxv? */
   3936    VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
   3937    if (!VG_STREQ(filename, name) && !VG_STREQ(filename, "/proc/self/auxv"))
   3938       return False;
   3939 
   3940    /* Allow to open the file only for reading. */
   3941    if (flags & (VKI_O_WRONLY | VKI_O_RDWR)) {
   3942       SET_STATUS_Failure(VKI_EACCES);
   3943       return True;
   3944    }
   3945 
   3946 #  if defined(VGO_solaris)
   3947    VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_auxv_fd));
   3948    SysRes sres = VG_(open)(name, flags, 0);
   3949    SET_STATUS_from_SysRes(sres);
   3950 #  else
   3951    SysRes sres = VG_(dup)(VG_(cl_auxv_fd));
   3952    SET_STATUS_from_SysRes(sres);
   3953    if (!sr_isError(sres)) {
   3954       OffT off = VG_(lseek)(sr_Res(sres), 0, VKI_SEEK_SET);
   3955       if (off < 0)
   3956          SET_STATUS_Failure(VKI_EMFILE);
   3957    }
   3958 #  endif
   3959 
   3960    return True;
   3961 }
   3962 #endif // defined(VGO_linux) || defined(VGO_solaris)
   3963 
   3964 PRE(sys_open)
   3965 {
   3966    if (ARG2 & VKI_O_CREAT) {
   3967       // 3-arg version
   3968       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1, (HChar*)ARG1, SARG2, SARG3);
   3969       PRE_REG_READ3(long, "open",
   3970                     const char *, filename, int, flags, int, mode);
   3971    } else {
   3972       // 2-arg version
   3973       PRINT("sys_open ( %#lx(%s), %ld )",ARG1, (HChar*)ARG1, SARG2);
   3974       PRE_REG_READ2(long, "open",
   3975                     const char *, filename, int, flags);
   3976    }
   3977    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
   3978 
   3979 #if defined(VGO_linux)
   3980    /* Handle the case where the open is of /proc/self/cmdline or
   3981       /proc/<pid>/cmdline, and just give it a copy of the fd for the
   3982       fake file we cooked up at startup (in m_main).  Also, seek the
   3983       cloned fd back to the start. */
   3984    {
   3985       HChar  name[30];   // large enough
   3986       HChar* arg1s = (HChar*) ARG1;
   3987       SysRes sres;
   3988 
   3989       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
   3990       if (ML_(safe_to_deref)( arg1s, 1 )
   3991           && (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))) {
   3992          sres = VG_(dup)( VG_(cl_cmdline_fd) );
   3993          SET_STATUS_from_SysRes( sres );
   3994          if (!sr_isError(sres)) {
   3995             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
   3996             if (off < 0)
   3997                SET_STATUS_Failure( VKI_EMFILE );
   3998          }
   3999          return;
   4000       }
   4001    }
   4002 
   4003    /* Handle also the case of /proc/self/auxv or /proc/<pid>/auxv. */
   4004    if (ML_(handle_auxv_open)(status, (const HChar *)ARG1, ARG2))
   4005       return;
   4006 #endif // defined(VGO_linux)
   4007 
   4008    /* Otherwise handle normally */
   4009    *flags |= SfMayBlock;
   4010 }
   4011 
   4012 POST(sys_open)
   4013 {
   4014    vg_assert(SUCCESS);
   4015    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
   4016       VG_(close)(RES);
   4017       SET_STATUS_Failure( VKI_EMFILE );
   4018    } else {
   4019       if (VG_(clo_track_fds))
   4020          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
   4021    }
   4022 }
   4023 
   4024 PRE(sys_read)
   4025 {
   4026    *flags |= SfMayBlock;
   4027    PRINT("sys_read ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   4028    PRE_REG_READ3(ssize_t, "read",
   4029                  unsigned int, fd, char *, buf, vki_size_t, count);
   4030 
   4031    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
   4032       SET_STATUS_Failure( VKI_EBADF );
   4033    else
   4034       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
   4035 }
   4036 
   4037 POST(sys_read)
   4038 {
   4039    vg_assert(SUCCESS);
   4040    POST_MEM_WRITE( ARG2, RES );
   4041 }
   4042 
   4043 PRE(sys_write)
   4044 {
   4045    Bool ok;
   4046    *flags |= SfMayBlock;
   4047    PRINT("sys_write ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   4048    PRE_REG_READ3(ssize_t, "write",
   4049                  unsigned int, fd, const char *, buf, vki_size_t, count);
   4050    /* check to see if it is allowed.  If not, try for an exemption from
   4051       --sim-hints=enable-outer (used for self hosting). */
   4052    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
   4053    if (!ok && ARG1 == 2/*stderr*/
   4054            && SimHintiS(SimHint_enable_outer, VG_(clo_sim_hints)))
   4055       ok = True;
   4056 #if defined(VGO_solaris)
   4057    if (!ok && VG_(vfork_fildes_addr) != NULL
   4058        && *VG_(vfork_fildes_addr) >= 0 && *VG_(vfork_fildes_addr) == ARG1)
   4059       ok = True;
   4060 #endif
   4061    if (!ok)
   4062       SET_STATUS_Failure( VKI_EBADF );
   4063    else
   4064       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
   4065 }
   4066 
   4067 PRE(sys_creat)
   4068 {
   4069    *flags |= SfMayBlock;
   4070    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
   4071    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
   4072    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
   4073 }
   4074 
   4075 POST(sys_creat)
   4076 {
   4077    vg_assert(SUCCESS);
   4078    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
   4079       VG_(close)(RES);
   4080       SET_STATUS_Failure( VKI_EMFILE );
   4081    } else {
   4082       if (VG_(clo_track_fds))
   4083          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
   4084    }
   4085 }
   4086 
   4087 PRE(sys_poll)
   4088 {
   4089    /* struct pollfd {
   4090         int fd;           -- file descriptor
   4091         short events;     -- requested events
   4092         short revents;    -- returned events
   4093       };
   4094       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
   4095    */
   4096    UInt i;
   4097    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   4098    *flags |= SfMayBlock;
   4099    PRINT("sys_poll ( %#lx, %lu, %ld )\n", ARG1, ARG2, SARG3);
   4100    PRE_REG_READ3(long, "poll",
   4101                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
   4102 
   4103    for (i = 0; i < ARG2; i++) {
   4104       PRE_MEM_READ( "poll(ufds.fd)",
   4105                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
   4106       PRE_MEM_READ( "poll(ufds.events)",
   4107                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
   4108       PRE_MEM_WRITE( "poll(ufds.revents)",
   4109                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   4110    }
   4111 }
   4112 
   4113 POST(sys_poll)
   4114 {
   4115    if (RES >= 0) {
   4116       UInt i;
   4117       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   4118       for (i = 0; i < ARG2; i++)
   4119 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   4120    }
   4121 }
   4122 
   4123 PRE(sys_readlink)
   4124 {
   4125    FUSE_COMPATIBLE_MAY_BLOCK();
   4126    Word saved = SYSNO;
   4127 
   4128    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
   4129    PRE_REG_READ3(long, "readlink",
   4130                  const char *, path, char *, buf, int, bufsiz);
   4131    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
   4132    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
   4133 
   4134 
   4135    {
   4136 #if defined(VGO_linux) || defined(VGO_solaris)
   4137 #if defined(VGO_linux)
   4138 #define PID_EXEPATH  "/proc/%d/exe"
   4139 #define SELF_EXEPATH "/proc/self/exe"
   4140 #define SELF_EXEFD   "/proc/self/fd/%d"
   4141 #elif defined(VGO_solaris)
   4142 #define PID_EXEPATH  "/proc/%d/path/a.out"
   4143 #define SELF_EXEPATH "/proc/self/path/a.out"
   4144 #define SELF_EXEFD   "/proc/self/path/%d"
   4145 #endif
   4146       /*
   4147        * Handle the case where readlink is looking at /proc/self/exe or
   4148        * /proc/<pid>/exe, or equivalent on Solaris.
   4149        */
   4150       HChar  name[30];   // large enough
   4151       HChar* arg1s = (HChar*) ARG1;
   4152       VG_(sprintf)(name, PID_EXEPATH, VG_(getpid)());
   4153       if (ML_(safe_to_deref)(arg1s, 1)
   4154           && (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, SELF_EXEPATH))) {
   4155          VG_(sprintf)(name, SELF_EXEFD, VG_(cl_exec_fd));
   4156          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
   4157                                                          ARG2, ARG3));
   4158       } else
   4159 #endif
   4160       {
   4161          /* Normal case */
   4162          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
   4163       }
   4164    }
   4165 
   4166    if (SUCCESS && RES > 0)
   4167       POST_MEM_WRITE( ARG2, RES );
   4168 }
   4169 
   4170 PRE(sys_readv)
   4171 {
   4172    Int i;
   4173    struct vki_iovec * vec;
   4174    *flags |= SfMayBlock;
   4175    PRINT("sys_readv ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   4176    PRE_REG_READ3(ssize_t, "readv",
   4177                  unsigned long, fd, const struct iovec *, vector,
   4178                  unsigned long, count);
   4179    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
   4180       SET_STATUS_Failure( VKI_EBADF );
   4181    } else {
   4182       if ((Int)ARG3 >= 0)
   4183          PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
   4184 
   4185       if (ARG2 != 0) {
   4186          /* ToDo: don't do any of the following if the vector is invalid */
   4187          vec = (struct vki_iovec *)ARG2;
   4188          for (i = 0; i < (Int)ARG3; i++)
   4189             PRE_MEM_WRITE( "readv(vector[...])",
   4190                            (Addr)vec[i].iov_base, vec[i].iov_len );
   4191       }
   4192    }
   4193 }
   4194 
   4195 POST(sys_readv)
   4196 {
   4197    vg_assert(SUCCESS);
   4198    if (RES > 0) {
   4199       Int i;
   4200       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
   4201       Int remains = RES;
   4202 
   4203       /* RES holds the number of bytes read. */
   4204       for (i = 0; i < (Int)ARG3; i++) {
   4205 	 Int nReadThisBuf = vec[i].iov_len;
   4206 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
   4207 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
   4208 	 remains -= nReadThisBuf;
   4209 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
   4210       }
   4211    }
   4212 }
   4213 
   4214 PRE(sys_rename)
   4215 {
   4216    FUSE_COMPATIBLE_MAY_BLOCK();
   4217    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   4218    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
   4219    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
   4220    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
   4221 }
   4222 
   4223 PRE(sys_rmdir)
   4224 {
   4225    *flags |= SfMayBlock;
   4226    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   4227    PRE_REG_READ1(long, "rmdir", const char *, pathname);
   4228    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
   4229 }
   4230 
   4231 PRE(sys_select)
   4232 {
   4233    *flags |= SfMayBlock;
   4234    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", SARG1, ARG2, ARG3,
   4235          ARG4, ARG5);
   4236    PRE_REG_READ5(long, "select",
   4237                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
   4238                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
   4239    // XXX: this possibly understates how much memory is read.
   4240    if (ARG2 != 0)
   4241       PRE_MEM_READ( "select(readfds)",
   4242 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
   4243    if (ARG3 != 0)
   4244       PRE_MEM_READ( "select(writefds)",
   4245 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
   4246    if (ARG4 != 0)
   4247       PRE_MEM_READ( "select(exceptfds)",
   4248 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
   4249    if (ARG5 != 0)
   4250       PRE_timeval_READ( "select(timeout)", ARG5 );
   4251 }
   4252 
   4253 PRE(sys_setgid)
   4254 {
   4255    PRINT("sys_setgid ( %lu )", ARG1);
   4256    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
   4257 }
   4258 
   4259 PRE(sys_setsid)
   4260 {
   4261    PRINT("sys_setsid ( )");
   4262    PRE_REG_READ0(long, "setsid");
   4263 }
   4264 
   4265 PRE(sys_setgroups)
   4266 {
   4267    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
   4268    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
   4269    if (ARG1 > 0)
   4270       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   4271 }
   4272 
   4273 PRE(sys_setpgid)
   4274 {
   4275    PRINT("setpgid ( %ld, %ld )", SARG1, SARG2);
   4276    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
   4277 }
   4278 
   4279 PRE(sys_setregid)
   4280 {
   4281    PRINT("sys_setregid ( %lu, %lu )", ARG1, ARG2);
   4282    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
   4283 }
   4284 
   4285 PRE(sys_setreuid)
   4286 {
   4287    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
   4288    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
   4289 }
   4290 
   4291 PRE(sys_setrlimit)
   4292 {
   4293    UWord arg1 = ARG1;
   4294    PRINT("sys_setrlimit ( %lu, %#lx )", ARG1, ARG2);
   4295    PRE_REG_READ2(long, "setrlimit",
   4296                  unsigned int, resource, struct rlimit *, rlim);
   4297    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   4298 
   4299 #ifdef _RLIMIT_POSIX_FLAG
   4300    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
   4301    // Unset it here to make the if statements below work correctly.
   4302    arg1 &= ~_RLIMIT_POSIX_FLAG;
   4303 #endif
   4304 
   4305    if (!VG_(am_is_valid_for_client)(ARG2, sizeof(struct vki_rlimit),
   4306                                     VKI_PROT_READ)) {
   4307       SET_STATUS_Failure( VKI_EFAULT );
   4308    }
   4309    else if (((struct vki_rlimit *)ARG2)->rlim_cur
   4310             > ((struct vki_rlimit *)ARG2)->rlim_max) {
   4311       SET_STATUS_Failure( VKI_EINVAL );
   4312    }
   4313    else if (arg1 == VKI_RLIMIT_NOFILE) {
   4314       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
   4315           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
   4316          SET_STATUS_Failure( VKI_EPERM );
   4317       }
   4318       else {
   4319          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
   4320          SET_STATUS_Success( 0 );
   4321       }
   4322    }
   4323    else if (arg1 == VKI_RLIMIT_DATA) {
   4324       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
   4325           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
   4326          SET_STATUS_Failure( VKI_EPERM );
   4327       }
   4328       else {
   4329          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
   4330          SET_STATUS_Success( 0 );
   4331       }
   4332    }
   4333    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
   4334       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
   4335           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
   4336          SET_STATUS_Failure( VKI_EPERM );
   4337       }
   4338       else {
   4339          /* Change the value of client_stack_szB to the rlim_cur value but
   4340             only if it is smaller than the size of the allocated stack for the
   4341             client.
   4342             TODO: All platforms should set VG_(clstk_max_size) as part of their
   4343                   setup_client_stack(). */
   4344          if ((VG_(clstk_max_size) == 0)
   4345              || (((struct vki_rlimit *) ARG2)->rlim_cur <= VG_(clstk_max_size)))
   4346             VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
   4347 
   4348          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
   4349          SET_STATUS_Success( 0 );
   4350       }
   4351    }
   4352 }
   4353 
   4354 PRE(sys_setuid)
   4355 {
   4356    PRINT("sys_setuid ( %lu )", ARG1);
   4357    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
   4358 }
   4359 
   4360 PRE(sys_newstat)
   4361 {
   4362    FUSE_COMPATIBLE_MAY_BLOCK();
   4363    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   4364    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
   4365    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
   4366    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
   4367 }
   4368 
   4369 POST(sys_newstat)
   4370 {
   4371    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   4372 }
   4373 
   4374 PRE(sys_statfs)
   4375 {
   4376    FUSE_COMPATIBLE_MAY_BLOCK();
   4377    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
   4378    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
   4379    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
   4380    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
   4381 }
   4382 POST(sys_statfs)
   4383 {
   4384    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   4385 }
   4386 
   4387 PRE(sys_statfs64)
   4388 {
   4389    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
   4390    PRE_REG_READ3(long, "statfs64",
   4391                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
   4392    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
   4393    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
   4394 }
   4395 POST(sys_statfs64)
   4396 {
   4397    POST_MEM_WRITE( ARG3, ARG2 );
   4398 }
   4399 
   4400 PRE(sys_symlink)
   4401 {
   4402    *flags |= SfMayBlock;
   4403    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   4404    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
   4405    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
   4406    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
   4407 }
   4408 
   4409 PRE(sys_time)
   4410 {
   4411    /* time_t time(time_t *t); */
   4412    PRINT("sys_time ( %#lx )",ARG1);
   4413    PRE_REG_READ1(long, "time", int *, t);
   4414    if (ARG1 != 0) {
   4415       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
   4416    }
   4417 }
   4418 
   4419 POST(sys_time)
   4420 {
   4421    if (ARG1 != 0) {
   4422       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
   4423    }
   4424 }
   4425 
   4426 PRE(sys_times)
   4427 {
   4428    PRINT("sys_times ( %#lx )", ARG1);
   4429    PRE_REG_READ1(long, "times", struct tms *, buf);
   4430    if (ARG1 != 0) {
   4431       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
   4432    }
   4433 }
   4434 
   4435 POST(sys_times)
   4436 {
   4437    if (ARG1 != 0) {
   4438       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
   4439    }
   4440 }
   4441 
   4442 PRE(sys_umask)
   4443 {
   4444    PRINT("sys_umask ( %ld )", SARG1);
   4445    PRE_REG_READ1(long, "umask", int, mask);
   4446 }
   4447 
   4448 PRE(sys_unlink)
   4449 {
   4450    *flags |= SfMayBlock;
   4451    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
   4452    PRE_REG_READ1(long, "unlink", const char *, pathname);
   4453    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
   4454 }
   4455 
   4456 PRE(sys_newuname)
   4457 {
   4458    PRINT("sys_newuname ( %#lx )", ARG1);
   4459    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
   4460    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
   4461 }
   4462 
   4463 POST(sys_newuname)
   4464 {
   4465    if (ARG1 != 0) {
   4466       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
   4467    }
   4468 }
   4469 
   4470 PRE(sys_waitpid)
   4471 {
   4472    *flags |= SfMayBlock;
   4473    PRINT("sys_waitpid ( %ld, %#lx, %ld )", SARG1, ARG2, SARG3);
   4474    PRE_REG_READ3(long, "waitpid",
   4475                  vki_pid_t, pid, unsigned int *, status, int, options);
   4476 
   4477    if (ARG2 != (Addr)NULL)
   4478       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
   4479 }
   4480 
   4481 POST(sys_waitpid)
   4482 {
   4483    if (ARG2 != (Addr)NULL)
   4484       POST_MEM_WRITE( ARG2, sizeof(int) );
   4485 }
   4486 
   4487 PRE(sys_wait4)
   4488 {
   4489    *flags |= SfMayBlock;
   4490    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", SARG1, ARG2, SARG3, ARG4);
   4491 
   4492    PRE_REG_READ4(long, "wait4",
   4493                  vki_pid_t, pid, unsigned int *, status, int, options,
   4494                  struct rusage *, rusage);
   4495    if (ARG2 != (Addr)NULL)
   4496       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
   4497    if (ARG4 != (Addr)NULL)
   4498       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
   4499 }
   4500 
   4501 POST(sys_wait4)
   4502 {
   4503    if (ARG2 != (Addr)NULL)
   4504       POST_MEM_WRITE( ARG2, sizeof(int) );
   4505    if (ARG4 != (Addr)NULL)
   4506       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
   4507 }
   4508 
   4509 PRE(sys_writev)
   4510 {
   4511    Int i;
   4512    struct vki_iovec * vec;
   4513    *flags |= SfMayBlock;
   4514    PRINT("sys_writev ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
   4515    PRE_REG_READ3(ssize_t, "writev",
   4516                  unsigned long, fd, const struct iovec *, vector,
   4517                  unsigned long, count);
   4518    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
   4519       SET_STATUS_Failure( VKI_EBADF );
   4520    } else {
   4521       if ((Int)ARG3 >= 0)
   4522          PRE_MEM_READ( "writev(vector)",
   4523                        ARG2, ARG3 * sizeof(struct vki_iovec) );
   4524       if (ARG2 != 0) {
   4525          /* ToDo: don't do any of the following if the vector is invalid */
   4526          vec = (struct vki_iovec *)ARG2;
   4527          for (i = 0; i < (Int)ARG3; i++)
   4528             PRE_MEM_READ( "writev(vector[...])",
   4529                            (Addr)vec[i].iov_base, vec[i].iov_len );
   4530       }
   4531    }
   4532 }
   4533 
   4534 PRE(sys_utimes)
   4535 {
   4536    FUSE_COMPATIBLE_MAY_BLOCK();
   4537    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   4538    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
   4539    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
   4540    if (ARG2 != 0) {
   4541       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
   4542       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
   4543    }
   4544 }
   4545 
   4546 PRE(sys_acct)
   4547 {
   4548    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
   4549    PRE_REG_READ1(long, "acct", const char *, filename);
   4550    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
   4551 }
   4552 
   4553 PRE(sys_pause)
   4554 {
   4555    *flags |= SfMayBlock;
   4556    PRINT("sys_pause ( )");
   4557    PRE_REG_READ0(long, "pause");
   4558 }
   4559 
   4560 PRE(sys_sigaltstack)
   4561 {
   4562    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
   4563    PRE_REG_READ2(int, "sigaltstack",
   4564                  const vki_stack_t *, ss, vki_stack_t *, oss);
   4565    if (ARG1 != 0) {
   4566       const vki_stack_t *ss = (vki_stack_t *)ARG1;
   4567       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
   4568       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
   4569       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
   4570    }
   4571    if (ARG2 != 0) {
   4572       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
   4573    }
   4574 
   4575    /* Be safe. */
   4576    if (ARG1 && !ML_(safe_to_deref((void*)ARG1, sizeof(vki_stack_t)))) {
   4577       SET_STATUS_Failure(VKI_EFAULT);
   4578       return;
   4579    }
   4580    if (ARG2 && !ML_(safe_to_deref((void*)ARG2, sizeof(vki_stack_t)))) {
   4581       SET_STATUS_Failure(VKI_EFAULT);
   4582       return;
   4583    }
   4584 
   4585    SET_STATUS_from_SysRes(
   4586       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
   4587                               (vki_stack_t*)ARG2)
   4588    );
   4589 }
   4590 POST(sys_sigaltstack)
   4591 {
   4592    vg_assert(SUCCESS);
   4593    if (RES == 0 && ARG2 != 0)
   4594       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
   4595 }
   4596 
   4597 PRE(sys_sethostname)
   4598 {
   4599    PRINT("sys_sethostname ( %#lx, %ld )", ARG1, SARG2);
   4600    PRE_REG_READ2(long, "sethostname", char *, name, int, len);
   4601    PRE_MEM_READ( "sethostname(name)", ARG1, ARG2 );
   4602 }
   4603 
   4604 #undef PRE
   4605 #undef POST
   4606 
   4607 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
   4608 
   4609 /*--------------------------------------------------------------------*/
   4610 /*--- end                                                          ---*/
   4611 /*--------------------------------------------------------------------*/
   4612