Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Wrappers for generic Unix system calls                       ---*/
      4 /*---                                            syswrap-generic.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2000-2012 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #if defined(VGO_linux) || defined(VGO_darwin)
     33 
     34 #include "pub_core_basics.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_vkiscnums.h"
     37 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
     38 #include "pub_core_threadstate.h"
     39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
     40 #include "pub_core_aspacemgr.h"
     41 #include "pub_core_transtab.h"      // VG_(discard_translations)
     42 #include "pub_core_xarray.h"
     43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
     44 #include "pub_core_debuglog.h"
     45 #include "pub_core_errormgr.h"
     46 #include "pub_tool_gdbserver.h"     // VG_(gdbserver)
     47 #include "pub_core_libcbase.h"
     48 #include "pub_core_libcassert.h"
     49 #include "pub_core_libcfile.h"
     50 #include "pub_core_libcprint.h"
     51 #include "pub_core_libcproc.h"
     52 #include "pub_core_libcsignal.h"
     53 #include "pub_core_machine.h"       // VG_(get_SP)
     54 #include "pub_core_mallocfree.h"
     55 #include "pub_core_options.h"
     56 #include "pub_core_scheduler.h"
     57 #include "pub_core_signals.h"
     58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     59 #include "pub_core_syscall.h"
     60 #include "pub_core_syswrap.h"
     61 #include "pub_core_tooliface.h"
     62 #include "pub_core_ume.h"
     63 
     64 #include "priv_types_n_macros.h"
     65 #include "priv_syswrap-generic.h"
     66 
     67 #include "config.h"
     68 
     69 
     70 /* Returns True iff address range is something the client can
     71    plausibly mess with: all of it is either already belongs to the
     72    client or is free or a reservation. */
     73 
     74 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
     75                                    const Char *syscallname)
     76 {
     77    Bool ret;
     78 
     79    if (size == 0)
     80       return True;
     81 
     82    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
     83             (start,size,VKI_PROT_NONE);
     84 
     85    if (0)
     86       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
     87 		  syscallname, start, start+size-1, (Int)ret);
     88 
     89    if (!ret && syscallname != NULL) {
     90       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
     91                                "to modify addresses %#lx-%#lx\n",
     92                                syscallname, start, start+size-1);
     93       if (VG_(clo_verbosity) > 1) {
     94          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
     95       }
     96    }
     97 
     98    return ret;
     99 }
    100 
    101 
    102 Bool ML_(client_signal_OK)(Int sigNo)
    103 {
    104    /* signal 0 is OK for kill */
    105    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
    106 
    107    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
    108 
    109    return ret;
    110 }
    111 
    112 
    113 /* Handy small function to help stop wrappers from segfaulting when
    114    presented with bogus client addresses.  Is not used for generating
    115    user-visible errors. */
    116 
    117 Bool ML_(safe_to_deref) ( void* start, SizeT size )
    118 {
    119    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
    120 }
    121 
    122 
    123 /* ---------------------------------------------------------------------
    124    Doing mmap, mremap
    125    ------------------------------------------------------------------ */
    126 
    127 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
    128    munmap, mprotect (and mremap??) work at the page level.  So addresses
    129    and lengths must be adjusted for this. */
    130 
    131 /* Mash around start and length so that the area exactly covers
    132    an integral number of pages.  If we don't do that, memcheck's
    133    idea of addressible memory diverges from that of the
    134    kernel's, which causes the leak detector to crash. */
    135 static
    136 void page_align_addr_and_len( Addr* a, SizeT* len)
    137 {
    138    Addr ra;
    139 
    140    ra = VG_PGROUNDDN(*a);
    141    *len = VG_PGROUNDUP(*a + *len) - ra;
    142    *a = ra;
    143 }
    144 
    145 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
    146                                 UInt flags, Int fd, Off64T offset)
    147 {
    148    Bool d;
    149 
    150    /* 'a' is the return value from a real kernel mmap, hence: */
    151    vg_assert(VG_IS_PAGE_ALIGNED(a));
    152    /* whereas len is whatever the syscall supplied.  So: */
    153    len = VG_PGROUNDUP(len);
    154 
    155    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
    156 
    157    if (d)
    158       VG_(discard_translations)( (Addr64)a, (ULong)len,
    159                                  "notify_core_of_mmap" );
    160 }
    161 
    162 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
    163 {
    164    Bool rr, ww, xx;
    165 
    166    /* 'a' is the return value from a real kernel mmap, hence: */
    167    vg_assert(VG_IS_PAGE_ALIGNED(a));
    168    /* whereas len is whatever the syscall supplied.  So: */
    169    len = VG_PGROUNDUP(len);
    170 
    171    rr = toBool(prot & VKI_PROT_READ);
    172    ww = toBool(prot & VKI_PROT_WRITE);
    173    xx = toBool(prot & VKI_PROT_EXEC);
    174 
    175    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
    176 }
    177 
    178 
    179 /* When a client mmap has been successfully done, this function must
    180    be called.  It notifies both aspacem and the tool of the new
    181    mapping.
    182 
    183    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
    184    it is called from is POST(sys_io_setup).  In particular,
    185    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
    186    client mmap.  But it doesn't call this function; instead it does the
    187    relevant notifications itself.  Here, we just pass di_handle=0 to
    188    notify_tool_of_mmap as we have no better information.  But really this
    189    function should be done away with; problem is I don't understand what
    190    POST(sys_io_setup) does or how it works.
    191 
    192    [However, this function is used lots for Darwin, because
    193     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
    194  */
    195 void
    196 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
    197                                     UInt flags, Int fd, Off64T offset )
    198 {
    199    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
    200    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
    201    // Should it?  --njn
    202    notify_core_of_mmap(a, len, prot, flags, fd, offset);
    203    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
    204 }
    205 
    206 void
    207 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
    208 {
    209    Bool d;
    210 
    211    page_align_addr_and_len(&a, &len);
    212    d = VG_(am_notify_munmap)(a, len);
    213    VG_TRACK( die_mem_munmap, a, len );
    214    VG_(di_notify_munmap)( a, len );
    215    if (d)
    216       VG_(discard_translations)( (Addr64)a, (ULong)len,
    217                                  "ML_(notify_core_and_tool_of_munmap)" );
    218 }
    219 
    220 void
    221 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
    222 {
    223    Bool rr = toBool(prot & VKI_PROT_READ);
    224    Bool ww = toBool(prot & VKI_PROT_WRITE);
    225    Bool xx = toBool(prot & VKI_PROT_EXEC);
    226    Bool d;
    227 
    228    page_align_addr_and_len(&a, &len);
    229    d = VG_(am_notify_mprotect)(a, len, prot);
    230    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
    231    VG_(di_notify_mprotect)( a, len, prot );
    232    if (d)
    233       VG_(discard_translations)( (Addr64)a, (ULong)len,
    234                                  "ML_(notify_core_and_tool_of_mprotect)" );
    235 }
    236 
    237 
    238 
    239 #if HAVE_MREMAP
    240 /* Expand (or shrink) an existing mapping, potentially moving it at
    241    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
    242 */
    243 static
    244 SysRes do_mremap( Addr old_addr, SizeT old_len,
    245                   Addr new_addr, SizeT new_len,
    246                   UWord flags, ThreadId tid )
    247 {
    248 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
    249 
    250    Bool      ok, d;
    251    NSegment const* old_seg;
    252    Addr      advised;
    253    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
    254    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
    255 
    256    if (0)
    257       VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
    258                   old_addr,old_len,new_addr,new_len,
    259                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
    260                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
    261    if (0)
    262       VG_(am_show_nsegments)(0, "do_remap: before");
    263 
    264    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
    265       goto eINVAL;
    266 
    267    if (!VG_IS_PAGE_ALIGNED(old_addr))
    268       goto eINVAL;
    269 
    270    old_len = VG_PGROUNDUP(old_len);
    271    new_len = VG_PGROUNDUP(new_len);
    272 
    273    if (new_len == 0)
    274       goto eINVAL;
    275 
    276    /* kernel doesn't reject this, but we do. */
    277    if (old_len == 0)
    278       goto eINVAL;
    279 
    280    /* reject wraparounds */
    281    if (old_addr + old_len < old_addr)
    282       goto eINVAL;
    283    if (f_fixed == True && new_addr + new_len < new_len)
    284       goto eINVAL;
    285 
    286    /* kernel rejects all fixed, no-move requests (which are
    287       meaningless). */
    288    if (f_fixed == True && f_maymove == False)
    289       goto eINVAL;
    290 
    291    /* Stay away from non-client areas. */
    292    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
    293       goto eINVAL;
    294 
    295    /* In all remaining cases, if the old range does not fall within a
    296       single segment, fail. */
    297    old_seg = VG_(am_find_nsegment)( old_addr );
    298    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
    299       goto eINVAL;
    300    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
    301       goto eINVAL;
    302 
    303    vg_assert(old_len > 0);
    304    vg_assert(new_len > 0);
    305    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
    306    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
    307    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
    308 
    309    /* There are 3 remaining cases:
    310 
    311       * maymove == False
    312 
    313         new space has to be at old address, so:
    314             - shrink    -> unmap end
    315             - same size -> do nothing
    316             - grow      -> if can grow in-place, do so, else fail
    317 
    318       * maymove == True, fixed == False
    319 
    320         new space can be anywhere, so:
    321             - shrink    -> unmap end
    322             - same size -> do nothing
    323             - grow      -> if can grow in-place, do so, else
    324                            move to anywhere large enough, else fail
    325 
    326       * maymove == True, fixed == True
    327 
    328         new space must be at new address, so:
    329 
    330             - if new address is not page aligned, fail
    331             - if new address range overlaps old one, fail
    332             - if new address range cannot be allocated, fail
    333             - else move to new address range with new size
    334             - else fail
    335    */
    336 
    337    if (f_maymove == False) {
    338       /* new space has to be at old address */
    339       if (new_len < old_len)
    340          goto shrink_in_place;
    341       if (new_len > old_len)
    342          goto grow_in_place_or_fail;
    343       goto same_in_place;
    344    }
    345 
    346    if (f_maymove == True && f_fixed == False) {
    347       /* new space can be anywhere */
    348       if (new_len < old_len)
    349          goto shrink_in_place;
    350       if (new_len > old_len)
    351          goto grow_in_place_or_move_anywhere_or_fail;
    352       goto same_in_place;
    353    }
    354 
    355    if (f_maymove == True && f_fixed == True) {
    356       /* new space can only be at the new address */
    357       if (!VG_IS_PAGE_ALIGNED(new_addr))
    358          goto eINVAL;
    359       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
    360          /* no overlap */
    361       } else {
    362          goto eINVAL;
    363       }
    364       if (new_addr == 0)
    365          goto eINVAL;
    366          /* VG_(am_get_advisory_client_simple) interprets zero to mean
    367             non-fixed, which is not what we want */
    368       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
    369       if (!ok || advised != new_addr)
    370          goto eNOMEM;
    371       ok = VG_(am_relocate_nooverlap_client)
    372               ( &d, old_addr, old_len, new_addr, new_len );
    373       if (ok) {
    374          VG_TRACK( copy_mem_remap, old_addr, new_addr,
    375                                    MIN_SIZET(old_len,new_len) );
    376          if (new_len > old_len)
    377             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
    378                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
    379                       0/*di_handle*/ );
    380          VG_TRACK(die_mem_munmap, old_addr, old_len);
    381          if (d) {
    382             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
    383             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
    384          }
    385          return VG_(mk_SysRes_Success)( new_addr );
    386       }
    387       goto eNOMEM;
    388    }
    389 
    390    /* end of the 3 cases */
    391    /*NOTREACHED*/ vg_assert(0);
    392 
    393   grow_in_place_or_move_anywhere_or_fail:
    394    {
    395    /* try growing it in-place */
    396    Addr   needA = old_addr + old_len;
    397    SSizeT needL = new_len - old_len;
    398 
    399    vg_assert(needL > 0);
    400    if (needA == 0)
    401       goto eINVAL;
    402       /* VG_(am_get_advisory_client_simple) interprets zero to mean
    403          non-fixed, which is not what we want */
    404    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    405    if (ok) {
    406       /* Fixes bug #129866. */
    407       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
    408    }
    409    if (ok && advised == needA) {
    410       ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
    411       if (ok) {
    412          VG_TRACK( new_mem_mmap, needA, needL,
    413                                  old_seg->hasR,
    414                                  old_seg->hasW, old_seg->hasX,
    415                                  0/*di_handle*/ );
    416          if (d)
    417             VG_(discard_translations)( needA, needL, "do_remap(3)" );
    418          return VG_(mk_SysRes_Success)( old_addr );
    419       }
    420    }
    421 
    422    /* that failed.  Look elsewhere. */
    423    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
    424    if (ok) {
    425       Bool oldR = old_seg->hasR;
    426       Bool oldW = old_seg->hasW;
    427       Bool oldX = old_seg->hasX;
    428       /* assert new area does not overlap old */
    429       vg_assert(advised+new_len-1 < old_addr
    430                 || advised > old_addr+old_len-1);
    431       ok = VG_(am_relocate_nooverlap_client)
    432               ( &d, old_addr, old_len, advised, new_len );
    433       if (ok) {
    434          VG_TRACK( copy_mem_remap, old_addr, advised,
    435                                    MIN_SIZET(old_len,new_len) );
    436          if (new_len > old_len)
    437             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
    438                       oldR, oldW, oldX, 0/*di_handle*/ );
    439          VG_TRACK(die_mem_munmap, old_addr, old_len);
    440          if (d) {
    441             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
    442             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
    443          }
    444          return VG_(mk_SysRes_Success)( advised );
    445       }
    446    }
    447    goto eNOMEM;
    448    }
    449    /*NOTREACHED*/ vg_assert(0);
    450 
    451   grow_in_place_or_fail:
    452    {
    453    Addr  needA = old_addr + old_len;
    454    SizeT needL = new_len - old_len;
    455    if (needA == 0)
    456       goto eINVAL;
    457       /* VG_(am_get_advisory_client_simple) interprets zero to mean
    458          non-fixed, which is not what we want */
    459    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    460    if (ok) {
    461       /* Fixes bug #129866. */
    462       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
    463    }
    464    if (!ok || advised != needA)
    465       goto eNOMEM;
    466    ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
    467    if (!ok)
    468       goto eNOMEM;
    469    VG_TRACK( new_mem_mmap, needA, needL,
    470                            old_seg->hasR, old_seg->hasW, old_seg->hasX,
    471                            0/*di_handle*/ );
    472    if (d)
    473       VG_(discard_translations)( needA, needL, "do_remap(6)" );
    474    return VG_(mk_SysRes_Success)( old_addr );
    475    }
    476    /*NOTREACHED*/ vg_assert(0);
    477 
    478   shrink_in_place:
    479    {
    480    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
    481    if (sr_isError(sres))
    482       return sres;
    483    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
    484    if (d)
    485       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
    486                                  "do_remap(7)" );
    487    return VG_(mk_SysRes_Success)( old_addr );
    488    }
    489    /*NOTREACHED*/ vg_assert(0);
    490 
    491   same_in_place:
    492    return VG_(mk_SysRes_Success)( old_addr );
    493    /*NOTREACHED*/ vg_assert(0);
    494 
    495   eINVAL:
    496    return VG_(mk_SysRes_Error)( VKI_EINVAL );
    497   eNOMEM:
    498    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
    499 
    500 #  undef MIN_SIZET
    501 }
    502 #endif /* HAVE_MREMAP */
    503 
    504 
    505 /* ---------------------------------------------------------------------
    506    File-descriptor tracking
    507    ------------------------------------------------------------------ */
    508 
    509 /* One of these is allocated for each open file descriptor.  */
    510 typedef struct OpenFd
    511 {
    512    Int fd;                        /* The file descriptor */
    513    Char *pathname;                /* NULL if not a regular file or unknown */
    514    ExeContext *where;             /* NULL if inherited from parent */
    515    struct OpenFd *next, *prev;
    516 } OpenFd;
    517 
    518 /* List of allocated file descriptors. */
    519 static OpenFd *allocated_fds = NULL;
    520 
    521 /* Count of open file descriptors. */
    522 static Int fd_count = 0;
    523 
    524 
    525 /* Note the fact that a file descriptor was just closed. */
    526 static
    527 void record_fd_close(Int fd)
    528 {
    529    OpenFd *i = allocated_fds;
    530 
    531    if (fd >= VG_(fd_hard_limit))
    532       return;			/* Valgrind internal */
    533 
    534    while(i) {
    535       if(i->fd == fd) {
    536          if(i->prev)
    537             i->prev->next = i->next;
    538          else
    539             allocated_fds = i->next;
    540          if(i->next)
    541             i->next->prev = i->prev;
    542          if(i->pathname)
    543             VG_(arena_free) (VG_AR_CORE, i->pathname);
    544          VG_(arena_free) (VG_AR_CORE, i);
    545          fd_count--;
    546          break;
    547       }
    548       i = i->next;
    549    }
    550 }
    551 
    552 /* Note the fact that a file descriptor was just opened.  If the
    553    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
    554    this either indicates a non-standard file (i.e. a pipe or socket or
    555    some such thing) or that we don't know the filename.  If the fd is
    556    already open, then we're probably doing a dup2() to an existing fd,
    557    so just overwrite the existing one. */
    558 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
    559 {
    560    OpenFd *i;
    561 
    562    if (fd >= VG_(fd_hard_limit))
    563       return;			/* Valgrind internal */
    564 
    565    /* Check to see if this fd is already open. */
    566    i = allocated_fds;
    567    while (i) {
    568       if (i->fd == fd) {
    569          if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
    570          break;
    571       }
    572       i = i->next;
    573    }
    574 
    575    /* Not already one: allocate an OpenFd */
    576    if (i == NULL) {
    577       i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
    578 
    579       i->prev = NULL;
    580       i->next = allocated_fds;
    581       if(allocated_fds) allocated_fds->prev = i;
    582       allocated_fds = i;
    583       fd_count++;
    584    }
    585 
    586    i->fd = fd;
    587    i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
    588    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
    589 }
    590 
    591 // Record opening of an fd, and find its name.
    592 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
    593 {
    594    static HChar buf[VKI_PATH_MAX];
    595    Char* name;
    596    if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
    597       name = buf;
    598    else
    599       name = NULL;
    600 
    601    ML_(record_fd_open_with_given_name)(tid, fd, name);
    602 }
    603 
    604 // Record opening of a nameless fd.
    605 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
    606 {
    607    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
    608 }
    609 
    610 static
    611 Char *unix2name(struct vki_sockaddr_un *sa, UInt len, Char *name)
    612 {
    613    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
    614       VG_(sprintf)(name, "<unknown>");
    615    } else {
    616       VG_(sprintf)(name, "%s", sa->sun_path);
    617    }
    618 
    619    return name;
    620 }
    621 
    622 static
    623 Char *inet2name(struct vki_sockaddr_in *sa, UInt len, Char *name)
    624 {
    625    if (sa == NULL || len == 0) {
    626       VG_(sprintf)(name, "<unknown>");
    627    } else {
    628       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
    629       if (addr == 0) {
    630          VG_(sprintf)(name, "<unbound>");
    631       } else {
    632          VG_(sprintf)(name, "%u.%u.%u.%u:%u",
    633                       (addr>>24) & 0xFF, (addr>>16) & 0xFF,
    634                       (addr>>8) & 0xFF, addr & 0xFF,
    635                       VG_(ntohs)(sa->sin_port));
    636       }
    637    }
    638 
    639    return name;
    640 }
    641 
    642 /*
    643  * Try get some details about a socket.
    644  */
    645 static void
    646 getsockdetails(Int fd)
    647 {
    648    union u {
    649       struct vki_sockaddr a;
    650       struct vki_sockaddr_in in;
    651       struct vki_sockaddr_un un;
    652    } laddr;
    653    UInt llen;
    654 
    655    llen = sizeof(laddr);
    656    VG_(memset)(&laddr, 0, llen);
    657 
    658    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
    659       switch(laddr.a.sa_family) {
    660       case VKI_AF_INET: {
    661          static char lname[32];
    662          static char pname[32];
    663          struct vki_sockaddr_in paddr;
    664          UInt plen = sizeof(struct vki_sockaddr_in);
    665 
    666          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
    667             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
    668                          inet2name(&(laddr.in), llen, lname),
    669                          inet2name(&paddr, plen, pname));
    670          } else {
    671             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
    672                          fd, inet2name(&(laddr.in), llen, lname));
    673          }
    674          return;
    675          }
    676       case VKI_AF_UNIX: {
    677          static char lname[256];
    678          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
    679                       unix2name(&(laddr.un), llen, lname));
    680          return;
    681          }
    682       default:
    683          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
    684                       laddr.a.sa_family, fd);
    685          return;
    686       }
    687    }
    688 
    689    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
    690 }
    691 
    692 
    693 /* Dump out a summary, and a more detailed list, of open file descriptors. */
    694 void VG_(show_open_fds) (void)
    695 {
    696    OpenFd *i = allocated_fds;
    697 
    698    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open at exit.\n", fd_count);
    699 
    700    while (i) {
    701       if (i->pathname) {
    702          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
    703                       i->pathname);
    704       } else {
    705          Int val;
    706          UInt len = sizeof(val);
    707 
    708          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
    709              == -1) {
    710             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
    711          } else {
    712             getsockdetails(i->fd);
    713          }
    714       }
    715 
    716       if(i->where) {
    717          VG_(pp_ExeContext)(i->where);
    718          VG_(message)(Vg_UserMsg, "\n");
    719       } else {
    720          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
    721          VG_(message)(Vg_UserMsg, "\n");
    722       }
    723 
    724       i = i->next;
    725    }
    726 
    727    VG_(message)(Vg_UserMsg, "\n");
    728 }
    729 
    730 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
    731    have /proc support compiled in, or a non-Linux kernel), then we need to
    732    find out what file descriptors we inherited from our parent process the
    733    hard way - by checking each fd in turn. */
    734 static
    735 void init_preopened_fds_without_proc_self_fd(void)
    736 {
    737    struct vki_rlimit lim;
    738    UInt count;
    739    Int i;
    740 
    741    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
    742       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
    743          an arbitrarily high number.  1024 happens to be the limit in
    744          the 2.4 Linux kernels. */
    745       count = 1024;
    746    } else {
    747       count = lim.rlim_cur;
    748    }
    749 
    750    for (i = 0; i < count; i++)
    751       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
    752          ML_(record_fd_open_named)(-1, i);
    753 }
    754 
    755 /* Initialize the list of open file descriptors with the file descriptors
    756    we inherited from out parent process. */
    757 
    758 void VG_(init_preopened_fds)(void)
    759 {
    760 // DDD: should probably use HAVE_PROC here or similar, instead.
    761 #if defined(VGO_linux)
    762    Int ret;
    763    struct vki_dirent d;
    764    SysRes f;
    765 
    766    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
    767    if (sr_isError(f)) {
    768       init_preopened_fds_without_proc_self_fd();
    769       return;
    770    }
    771 
    772    while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
    773       if (ret == -1)
    774          goto out;
    775 
    776       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
    777          Char* s;
    778          Int fno = VG_(strtoll10)(d.d_name, &s);
    779          if (*s == '\0') {
    780             if (fno != sr_Res(f))
    781                if (VG_(clo_track_fds))
    782                   ML_(record_fd_open_named)(-1, fno);
    783          } else {
    784             VG_(message)(Vg_DebugMsg,
    785                "Warning: invalid file name in /proc/self/fd: %s\n",
    786                d.d_name);
    787          }
    788       }
    789 
    790       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
    791    }
    792 
    793   out:
    794    VG_(close)(sr_Res(f));
    795 
    796 #elif defined(VGO_darwin)
    797    init_preopened_fds_without_proc_self_fd();
    798 
    799 #else
    800 #  error Unknown OS
    801 #endif
    802 }
    803 
    804 static
    805 Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
    806 {
    807    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
    808    Char *result = VG_(arena_malloc) ( aid, cc, len );
    809    VG_(strcpy) ( result, s1 );
    810    VG_(strcat) ( result, s2 );
    811    return result;
    812 }
    813 
    814 static
    815 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
    816                             Char *msg, Addr base, SizeT size )
    817 {
    818    Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
    819                               "sendmsg", msg, VG_AR_CORE );
    820    PRE_MEM_READ( outmsg, base, size );
    821    VG_(arena_free) ( VG_AR_CORE, outmsg );
    822 }
    823 
    824 static
    825 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
    826                              Char *msg, Addr base, SizeT size )
    827 {
    828    Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
    829                               "recvmsg", msg, VG_AR_CORE );
    830    if ( read )
    831       PRE_MEM_READ( outmsg, base, size );
    832    else
    833       PRE_MEM_WRITE( outmsg, base, size );
    834    VG_(arena_free) ( VG_AR_CORE, outmsg );
    835 }
    836 
    837 static
    838 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
    839                               Char *fieldName, Addr base, SizeT size )
    840 {
    841    if ( !read )
    842       POST_MEM_WRITE( base, size );
    843 }
    844 
    845 static
    846 void msghdr_foreachfield (
    847         ThreadId tid,
    848         Char *name,
    849         struct vki_msghdr *msg,
    850         UInt length,
    851         void (*foreach_func)( ThreadId, Bool, Char *, Addr, SizeT )
    852      )
    853 {
    854    Char *fieldName;
    855 
    856    if ( !msg )
    857       return;
    858 
    859    fieldName = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.mfef", VG_(strlen)(name) + 32 );
    860 
    861    VG_(sprintf) ( fieldName, "(%s)", name );
    862 
    863    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
    864    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
    865    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
    866    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
    867    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
    868    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
    869    foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
    870 
    871    if ( msg->msg_name ) {
    872       VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
    873       foreach_func ( tid, False, fieldName,
    874                      (Addr)msg->msg_name, msg->msg_namelen );
    875    }
    876 
    877    if ( msg->msg_iov ) {
    878       struct vki_iovec *iov = msg->msg_iov;
    879       UInt i;
    880 
    881       VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
    882 
    883       foreach_func ( tid, True, fieldName,
    884                      (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
    885 
    886       for ( i = 0; i < msg->msg_iovlen; ++i, ++iov ) {
    887          UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
    888          VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
    889          foreach_func ( tid, False, fieldName,
    890                         (Addr)iov->iov_base, iov_len );
    891          length = length - iov_len;
    892       }
    893    }
    894 
    895    if ( msg->msg_control )
    896    {
    897       VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
    898       foreach_func ( tid, False, fieldName,
    899                      (Addr)msg->msg_control, msg->msg_controllen );
    900    }
    901 
    902    VG_(arena_free) ( VG_AR_CORE, fieldName );
    903 }
    904 
    905 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
    906 {
    907    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
    908 
    909    while (cm) {
    910       if (cm->cmsg_level == VKI_SOL_SOCKET &&
    911           cm->cmsg_type == VKI_SCM_RIGHTS ) {
    912          Int *fds = (Int *) VKI_CMSG_DATA(cm);
    913          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
    914                          / sizeof(int);
    915          Int i;
    916 
    917          for (i = 0; i < fdc; i++)
    918             if(VG_(clo_track_fds))
    919                // XXX: must we check the range on these fds with
    920                //      ML_(fd_allowed)()?
    921                ML_(record_fd_open_named)(tid, fds[i]);
    922       }
    923 
    924       cm = VKI_CMSG_NXTHDR(msg, cm);
    925    }
    926 }
    927 
    928 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
    929 static
    930 void pre_mem_read_sockaddr ( ThreadId tid,
    931                              Char *description,
    932                              struct vki_sockaddr *sa, UInt salen )
    933 {
    934    Char *outmsg;
    935    struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
    936    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
    937    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
    938 
    939    /* NULL/zero-length sockaddrs are legal */
    940    if ( sa == NULL || salen == 0 ) return;
    941 
    942    outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
    943                                 VG_(strlen)( description ) + 30 );
    944 
    945    VG_(sprintf) ( outmsg, description, "sa_family" );
    946    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
    947 
    948    switch (sa->sa_family) {
    949 
    950       case VKI_AF_UNIX:
    951          VG_(sprintf) ( outmsg, description, "sun_path" );
    952          PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
    953          // GrP fixme max of sun_len-2? what about nul char?
    954          break;
    955 
    956       case VKI_AF_INET:
    957          VG_(sprintf) ( outmsg, description, "sin_port" );
    958          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
    959          VG_(sprintf) ( outmsg, description, "sin_addr" );
    960          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
    961          break;
    962 
    963       case VKI_AF_INET6:
    964          VG_(sprintf) ( outmsg, description, "sin6_port" );
    965          PRE_MEM_READ( outmsg,
    966             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
    967          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
    968          PRE_MEM_READ( outmsg,
    969             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
    970          VG_(sprintf) ( outmsg, description, "sin6_addr" );
    971          PRE_MEM_READ( outmsg,
    972             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
    973          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
    974          PRE_MEM_READ( outmsg,
    975             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
    976          break;
    977 
    978       default:
    979          VG_(sprintf) ( outmsg, description, "" );
    980          PRE_MEM_READ( outmsg, (Addr) sa, salen );
    981          break;
    982    }
    983 
    984    VG_(arena_free) ( VG_AR_CORE, outmsg );
    985 }
    986 
    987 /* Dereference a pointer to a UInt. */
    988 static UInt deref_UInt ( ThreadId tid, Addr a, Char* s )
    989 {
    990    UInt* a_p = (UInt*)a;
    991    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
    992    if (a_p == NULL)
    993       return 0;
    994    else
    995       return *a_p;
    996 }
    997 
    998 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
    999                                   Char* buf_s, Char* buflen_s )
   1000 {
   1001    if (VG_(tdict).track_pre_mem_write) {
   1002       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
   1003       if (buflen_in > 0) {
   1004          VG_(tdict).track_pre_mem_write(
   1005             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
   1006       }
   1007    }
   1008 }
   1009 
   1010 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
   1011                                    Addr buf_p, Addr buflen_p, Char* s )
   1012 {
   1013    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
   1014       UInt buflen_out = deref_UInt( tid, buflen_p, s);
   1015       if (buflen_out > 0 && buf_p != (Addr)NULL) {
   1016          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
   1017       }
   1018    }
   1019 }
   1020 
   1021 /* ---------------------------------------------------------------------
   1022    Data seg end, for brk()
   1023    ------------------------------------------------------------------ */
   1024 
   1025 /*   +--------+------------+
   1026      | anon   |    resvn   |
   1027      +--------+------------+
   1028 
   1029      ^     ^  ^
   1030      |     |  boundary is page aligned
   1031      |     VG_(brk_limit) -- no alignment constraint
   1032      VG_(brk_base) -- page aligned -- does not move
   1033 
   1034      Both the anon part and the reservation part are always at least
   1035      one page.
   1036 */
   1037 
   1038 /* Set the new data segment end to NEWBRK.  If this succeeds, return
   1039    NEWBRK, else return the current data segment end. */
   1040 
   1041 static Addr do_brk ( Addr newbrk )
   1042 {
   1043    NSegment const* aseg;
   1044    NSegment const* rseg;
   1045    Addr newbrkP;
   1046    SizeT delta;
   1047    Bool ok;
   1048    Bool debug = False;
   1049 
   1050    if (debug)
   1051       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
   1052 		  VG_(brk_base), VG_(brk_limit), newbrk);
   1053 
   1054 #  if 0
   1055    if (0) show_segments("in_brk");
   1056 #  endif
   1057 
   1058    if (newbrk < VG_(brk_base))
   1059       /* Clearly impossible. */
   1060       goto bad;
   1061 
   1062    if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
   1063       /* shrinking the data segment.  Be lazy and don't munmap the
   1064          excess area. */
   1065       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
   1066       if (seg && seg->hasT)
   1067          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
   1068                                     "do_brk(shrink)" );
   1069       /* Since we're being lazy and not unmapping pages, we have to
   1070          zero out the area, so that if the area later comes back into
   1071          circulation, it will be filled with zeroes, as if it really
   1072          had been unmapped and later remapped.  Be a bit paranoid and
   1073          try hard to ensure we're not going to segfault by doing the
   1074          write - check both ends of the range are in the same segment
   1075          and that segment is writable. */
   1076       if (seg) {
   1077          /* pre: newbrk < VG_(brk_limit)
   1078               => newbrk <= VG_(brk_limit)-1 */
   1079          NSegment const * seg2;
   1080          vg_assert(newbrk < VG_(brk_limit));
   1081          seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1082          if (seg2 && seg == seg2 && seg->hasW)
   1083             VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
   1084       }
   1085 
   1086       VG_(brk_limit) = newbrk;
   1087       return newbrk;
   1088    }
   1089 
   1090    /* otherwise we're expanding the brk segment. */
   1091    if (VG_(brk_limit) > VG_(brk_base))
   1092       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1093    else
   1094       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
   1095    rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
   1096 
   1097    /* These should be assured by setup_client_dataseg in m_main. */
   1098    vg_assert(aseg);
   1099    vg_assert(rseg);
   1100    vg_assert(aseg->kind == SkAnonC);
   1101    vg_assert(rseg->kind == SkResvn);
   1102    vg_assert(aseg->end+1 == rseg->start);
   1103 
   1104    vg_assert(newbrk >= VG_(brk_base));
   1105    if (newbrk <= rseg->start) {
   1106       /* still fits within the anon segment. */
   1107       VG_(brk_limit) = newbrk;
   1108       return newbrk;
   1109    }
   1110 
   1111    if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
   1112       /* request is too large -- the resvn would fall below 1 page,
   1113          which isn't allowed. */
   1114       goto bad;
   1115    }
   1116 
   1117    newbrkP = VG_PGROUNDUP(newbrk);
   1118    vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
   1119    delta = newbrkP - rseg->start;
   1120    vg_assert(delta > 0);
   1121    vg_assert(VG_IS_PAGE_ALIGNED(delta));
   1122 
   1123    ok = VG_(am_extend_into_adjacent_reservation_client)( (NSegment*)aseg, delta );
   1124    if (!ok) goto bad;
   1125 
   1126    VG_(brk_limit) = newbrk;
   1127    return newbrk;
   1128 
   1129   bad:
   1130    return VG_(brk_limit);
   1131 }
   1132 
   1133 
   1134 /* ---------------------------------------------------------------------
   1135    Vet file descriptors for sanity
   1136    ------------------------------------------------------------------ */
   1137 /*
   1138 > - what does the "Bool soft" parameter mean?
   1139 
   1140 (Tom Hughes, 3 Oct 05):
   1141 
   1142 Whether or not to consider a file descriptor invalid if it is above
   1143 the current soft limit.
   1144 
   1145 Basically if we are testing whether a newly created file descriptor is
   1146 valid (in a post handler) then we set soft to true, and if we are
   1147 testing whether a file descriptor that is about to be used (in a pre
   1148 handler) is valid [viz, an already-existing fd] then we set it to false.
   1149 
   1150 The point is that if the (virtual) soft limit is lowered then any
   1151 existing descriptors can still be read/written/closed etc (so long as
   1152 they are below the valgrind reserved descriptors) but no new
   1153 descriptors can be created above the new soft limit.
   1154 
   1155 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
   1156 */
   1157 
   1158 /* Return true if we're allowed to use or create this fd */
   1159 Bool ML_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool isNewFd)
   1160 {
   1161    Bool allowed = True;
   1162 
   1163    /* hard limits always apply */
   1164    if (fd < 0 || fd >= VG_(fd_hard_limit))
   1165       allowed = False;
   1166 
   1167    /* hijacking the output fds is never allowed */
   1168    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
   1169       allowed = False;
   1170 
   1171    /* if creating a new fd (rather than using an existing one), the
   1172       soft limit must also be observed */
   1173    if (isNewFd && fd >= VG_(fd_soft_limit))
   1174       allowed = False;
   1175 
   1176    /* this looks like it ought to be included, but causes problems: */
   1177    /*
   1178    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
   1179       allowed = False;
   1180    */
   1181    /* The difficulty is as follows: consider a program P which expects
   1182       to be able to mess with (redirect) its own stderr (fd 2).
   1183       Usually to deal with P we would issue command line flags to send
   1184       logging somewhere other than stderr, so as not to disrupt P.
   1185       The problem is that -d unilaterally hijacks stderr with no
   1186       consultation with P.  And so, if this check is enabled, P will
   1187       work OK normally but fail if -d is issued.
   1188 
   1189       Basically -d is a hack and you take your chances when using it.
   1190       It's very useful for low level debugging -- particularly at
   1191       startup -- and having its presence change the behaviour of the
   1192       client is exactly what we don't want.  */
   1193 
   1194    /* croak? */
   1195    if ((!allowed) && VG_(showing_core_errors)() ) {
   1196       VG_(message)(Vg_UserMsg,
   1197          "Warning: invalid file descriptor %d in syscall %s()\n",
   1198          fd, syscallname);
   1199       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
   1200 	 VG_(message)(Vg_UserMsg,
   1201             "   Use --log-fd=<number> to select an alternative log fd.\n");
   1202       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
   1203 	 VG_(message)(Vg_UserMsg,
   1204             "   Use --xml-fd=<number> to select an alternative XML "
   1205             "output fd.\n");
   1206       // DDD: consider always printing this stack trace, it's useful.
   1207       // Also consider also making this a proper core error, ie.
   1208       // suppressible and all that.
   1209       if (VG_(clo_verbosity) > 1) {
   1210          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1211       }
   1212    }
   1213 
   1214    return allowed;
   1215 }
   1216 
   1217 
   1218 /* ---------------------------------------------------------------------
   1219    Deal with a bunch of socket-related syscalls
   1220    ------------------------------------------------------------------ */
   1221 
   1222 /* ------ */
   1223 
   1224 void
   1225 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
   1226                                   UWord arg0, UWord arg1,
   1227                                   UWord arg2, UWord arg3 )
   1228 {
   1229    /* int socketpair(int d, int type, int protocol, int sv[2]); */
   1230    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
   1231                   arg3, 2*sizeof(int) );
   1232 }
   1233 
   1234 SysRes
   1235 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
   1236                                    SysRes res,
   1237                                    UWord arg0, UWord arg1,
   1238                                    UWord arg2, UWord arg3 )
   1239 {
   1240    SysRes r = res;
   1241    Int fd1 = ((Int*)arg3)[0];
   1242    Int fd2 = ((Int*)arg3)[1];
   1243    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1244    POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1245    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
   1246        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
   1247       VG_(close)(fd1);
   1248       VG_(close)(fd2);
   1249       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1250    } else {
   1251       POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1252       if (VG_(clo_track_fds)) {
   1253          ML_(record_fd_open_nameless)(tid, fd1);
   1254          ML_(record_fd_open_nameless)(tid, fd2);
   1255       }
   1256    }
   1257    return r;
   1258 }
   1259 
   1260 /* ------ */
   1261 
   1262 SysRes
   1263 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
   1264 {
   1265    SysRes r = res;
   1266    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1267    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
   1268       VG_(close)(sr_Res(res));
   1269       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1270    } else {
   1271       if (VG_(clo_track_fds))
   1272          ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1273    }
   1274    return r;
   1275 }
   1276 
   1277 /* ------ */
   1278 
   1279 void
   1280 ML_(generic_PRE_sys_bind) ( ThreadId tid,
   1281                             UWord arg0, UWord arg1, UWord arg2 )
   1282 {
   1283    /* int bind(int sockfd, struct sockaddr *my_addr,
   1284                int addrlen); */
   1285    pre_mem_read_sockaddr(
   1286       tid, "socketcall.bind(my_addr.%s)",
   1287       (struct vki_sockaddr *) arg1, arg2
   1288    );
   1289 }
   1290 
   1291 /* ------ */
   1292 
   1293 void
   1294 ML_(generic_PRE_sys_accept) ( ThreadId tid,
   1295                               UWord arg0, UWord arg1, UWord arg2 )
   1296 {
   1297    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
   1298    Addr addr_p     = arg1;
   1299    Addr addrlen_p  = arg2;
   1300    if (addr_p != (Addr)NULL)
   1301       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
   1302                                    "socketcall.accept(addr)",
   1303                                    "socketcall.accept(addrlen_in)" );
   1304 }
   1305 
   1306 SysRes
   1307 ML_(generic_POST_sys_accept) ( ThreadId tid,
   1308                                SysRes res,
   1309                                UWord arg0, UWord arg1, UWord arg2 )
   1310 {
   1311    SysRes r = res;
   1312    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1313    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
   1314       VG_(close)(sr_Res(res));
   1315       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1316    } else {
   1317       Addr addr_p     = arg1;
   1318       Addr addrlen_p  = arg2;
   1319       if (addr_p != (Addr)NULL)
   1320          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
   1321                                        "socketcall.accept(addrlen_out)" );
   1322       if (VG_(clo_track_fds))
   1323           ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1324    }
   1325    return r;
   1326 }
   1327 
   1328 /* ------ */
   1329 
   1330 void
   1331 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
   1332                               UWord arg0, UWord arg1, UWord arg2,
   1333                               UWord arg3, UWord arg4, UWord arg5 )
   1334 {
   1335    /* int sendto(int s, const void *msg, int len,
   1336                  unsigned int flags,
   1337                  const struct sockaddr *to, int tolen); */
   1338    PRE_MEM_READ( "socketcall.sendto(msg)",
   1339                  arg1, /* msg */
   1340                  arg2  /* len */ );
   1341    pre_mem_read_sockaddr(
   1342       tid, "socketcall.sendto(to.%s)",
   1343       (struct vki_sockaddr *) arg4, arg5
   1344    );
   1345 }
   1346 
   1347 /* ------ */
   1348 
   1349 void
   1350 ML_(generic_PRE_sys_send) ( ThreadId tid,
   1351                             UWord arg0, UWord arg1, UWord arg2 )
   1352 {
   1353    /* int send(int s, const void *msg, size_t len, int flags); */
   1354    PRE_MEM_READ( "socketcall.send(msg)",
   1355                   arg1, /* msg */
   1356                   arg2  /* len */ );
   1357 
   1358 }
   1359 
   1360 /* ------ */
   1361 
   1362 void
   1363 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
   1364                                 UWord arg0, UWord arg1, UWord arg2,
   1365                                 UWord arg3, UWord arg4, UWord arg5 )
   1366 {
   1367    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
   1368                    struct sockaddr *from, int *fromlen); */
   1369    Addr buf_p      = arg1;
   1370    Int  len        = arg2;
   1371    Addr from_p     = arg4;
   1372    Addr fromlen_p  = arg5;
   1373    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
   1374    if (from_p != (Addr)NULL)
   1375       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
   1376                                    "socketcall.recvfrom(from)",
   1377                                    "socketcall.recvfrom(fromlen_in)" );
   1378 }
   1379 
   1380 void
   1381 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
   1382                                  SysRes res,
   1383                                  UWord arg0, UWord arg1, UWord arg2,
   1384                                  UWord arg3, UWord arg4, UWord arg5 )
   1385 {
   1386    Addr buf_p      = arg1;
   1387    Int  len        = arg2;
   1388    Addr from_p     = arg4;
   1389    Addr fromlen_p  = arg5;
   1390 
   1391    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1392    if (from_p != (Addr)NULL)
   1393       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
   1394                                     "socketcall.recvfrom(fromlen_out)" );
   1395    POST_MEM_WRITE( buf_p, len );
   1396 }
   1397 
   1398 /* ------ */
   1399 
   1400 void
   1401 ML_(generic_PRE_sys_recv) ( ThreadId tid,
   1402                             UWord arg0, UWord arg1, UWord arg2 )
   1403 {
   1404    /* int recv(int s, void *buf, int len, unsigned int flags); */
   1405    /* man 2 recv says:
   1406       The  recv call is normally used only on a connected socket
   1407       (see connect(2)) and is identical to recvfrom with a  NULL
   1408       from parameter.
   1409    */
   1410    PRE_MEM_WRITE( "socketcall.recv(buf)",
   1411                   arg1, /* buf */
   1412                   arg2  /* len */ );
   1413 }
   1414 
   1415 void
   1416 ML_(generic_POST_sys_recv) ( ThreadId tid,
   1417                              UWord res,
   1418                              UWord arg0, UWord arg1, UWord arg2 )
   1419 {
   1420    if (res >= 0 && arg1 != 0) {
   1421       POST_MEM_WRITE( arg1, /* buf */
   1422                       arg2  /* len */ );
   1423    }
   1424 }
   1425 
   1426 /* ------ */
   1427 
   1428 void
   1429 ML_(generic_PRE_sys_connect) ( ThreadId tid,
   1430                                UWord arg0, UWord arg1, UWord arg2 )
   1431 {
   1432    /* int connect(int sockfd,
   1433                   struct sockaddr *serv_addr, int addrlen ); */
   1434    pre_mem_read_sockaddr( tid,
   1435                           "socketcall.connect(serv_addr.%s)",
   1436                           (struct vki_sockaddr *) arg1, arg2);
   1437 }
   1438 
   1439 /* ------ */
   1440 
   1441 void
   1442 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
   1443                                   UWord arg0, UWord arg1, UWord arg2,
   1444                                   UWord arg3, UWord arg4 )
   1445 {
   1446    /* int setsockopt(int s, int level, int optname,
   1447                      const void *optval, int optlen); */
   1448    PRE_MEM_READ( "socketcall.setsockopt(optval)",
   1449                  arg3, /* optval */
   1450                  arg4  /* optlen */ );
   1451 }
   1452 
   1453 /* ------ */
   1454 
   1455 void
   1456 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
   1457                                    UWord arg0, UWord arg1, UWord arg2 )
   1458 {
   1459    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
   1460    Addr name_p     = arg1;
   1461    Addr namelen_p  = arg2;
   1462    /* Nb: name_p cannot be NULL */
   1463    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1464                                 "socketcall.getsockname(name)",
   1465                                 "socketcall.getsockname(namelen_in)" );
   1466 }
   1467 
   1468 void
   1469 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
   1470                                     SysRes res,
   1471                                     UWord arg0, UWord arg1, UWord arg2 )
   1472 {
   1473    Addr name_p     = arg1;
   1474    Addr namelen_p  = arg2;
   1475    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1476    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1477                                  "socketcall.getsockname(namelen_out)" );
   1478 }
   1479 
   1480 /* ------ */
   1481 
   1482 void
   1483 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
   1484                                    UWord arg0, UWord arg1, UWord arg2 )
   1485 {
   1486    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
   1487    Addr name_p     = arg1;
   1488    Addr namelen_p  = arg2;
   1489    /* Nb: name_p cannot be NULL */
   1490    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1491                                 "socketcall.getpeername(name)",
   1492                                 "socketcall.getpeername(namelen_in)" );
   1493 }
   1494 
   1495 void
   1496 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
   1497                                     SysRes res,
   1498                                     UWord arg0, UWord arg1, UWord arg2 )
   1499 {
   1500    Addr name_p     = arg1;
   1501    Addr namelen_p  = arg2;
   1502    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1503    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1504                                  "socketcall.getpeername(namelen_out)" );
   1505 }
   1506 
   1507 /* ------ */
   1508 
   1509 void
   1510 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, Char *name, struct vki_msghdr *msg )
   1511 {
   1512    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg );
   1513 }
   1514 
   1515 /* ------ */
   1516 
   1517 void
   1518 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, Char *name, struct vki_msghdr *msg )
   1519 {
   1520    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg );
   1521 }
   1522 
   1523 void
   1524 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, Char *name, struct vki_msghdr *msg, UInt length )
   1525 {
   1526    msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg );
   1527    check_cmsg_for_fds( tid, msg );
   1528 }
   1529 
   1530 
   1531 /* ---------------------------------------------------------------------
   1532    Deal with a bunch of IPC related syscalls
   1533    ------------------------------------------------------------------ */
   1534 
   1535 /* ------ */
   1536 
   1537 void
   1538 ML_(generic_PRE_sys_semop) ( ThreadId tid,
   1539                              UWord arg0, UWord arg1, UWord arg2 )
   1540 {
   1541    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
   1542    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1543 }
   1544 
   1545 /* ------ */
   1546 
   1547 void
   1548 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
   1549                                   UWord arg0, UWord arg1,
   1550                                   UWord arg2, UWord arg3 )
   1551 {
   1552    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
   1553                      struct timespec *timeout); */
   1554    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1555    if (arg3 != 0)
   1556       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
   1557 }
   1558 
   1559 /* ------ */
   1560 
   1561 static
   1562 UInt get_sem_count( Int semid )
   1563 {
   1564    struct vki_semid_ds buf;
   1565    union vki_semun arg;
   1566    SysRes res;
   1567 
   1568    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
   1569       (experimental) otherwise complains that the use in the return
   1570       statement below is uninitialised. */
   1571    buf.sem_nsems = 0;
   1572 
   1573    arg.buf = &buf;
   1574 
   1575 #  ifdef __NR_semctl
   1576    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
   1577 #  else
   1578    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
   1579                           VKI_IPC_STAT, (UWord)&arg);
   1580 #  endif
   1581    if (sr_isError(res))
   1582       return 0;
   1583 
   1584    return buf.sem_nsems;
   1585 }
   1586 
   1587 void
   1588 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
   1589                               UWord arg0, UWord arg1,
   1590                               UWord arg2, UWord arg3 )
   1591 {
   1592    /* int semctl(int semid, int semnum, int cmd, ...); */
   1593    union vki_semun arg = *(union vki_semun *)&arg3;
   1594    UInt nsems;
   1595    switch (arg2 /* cmd */) {
   1596 #if defined(VKI_IPC_INFO)
   1597    case VKI_IPC_INFO:
   1598    case VKI_SEM_INFO:
   1599    case VKI_IPC_INFO|VKI_IPC_64:
   1600    case VKI_SEM_INFO|VKI_IPC_64:
   1601       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
   1602                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1603       break;
   1604 #endif
   1605 
   1606    case VKI_IPC_STAT:
   1607 #if defined(VKI_SEM_STAT)
   1608    case VKI_SEM_STAT:
   1609 #endif
   1610       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1611                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1612       break;
   1613 
   1614 #if defined(VKI_IPC_64)
   1615    case VKI_IPC_STAT|VKI_IPC_64:
   1616 #if defined(VKI_SEM_STAT)
   1617    case VKI_SEM_STAT|VKI_IPC_64:
   1618 #endif
   1619       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1620                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1621       break;
   1622 #endif
   1623 
   1624    case VKI_IPC_SET:
   1625       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1626                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1627       break;
   1628 
   1629 #if defined(VKI_IPC_64)
   1630    case VKI_IPC_SET|VKI_IPC_64:
   1631       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1632                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1633       break;
   1634 #endif
   1635 
   1636    case VKI_GETALL:
   1637 #if defined(VKI_IPC_64)
   1638    case VKI_GETALL|VKI_IPC_64:
   1639 #endif
   1640       nsems = get_sem_count( arg0 );
   1641       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
   1642                      (Addr)arg.array, sizeof(unsigned short) * nsems );
   1643       break;
   1644 
   1645    case VKI_SETALL:
   1646 #if defined(VKI_IPC_64)
   1647    case VKI_SETALL|VKI_IPC_64:
   1648 #endif
   1649       nsems = get_sem_count( arg0 );
   1650       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
   1651                     (Addr)arg.array, sizeof(unsigned short) * nsems );
   1652       break;
   1653    }
   1654 }
   1655 
   1656 void
   1657 ML_(generic_POST_sys_semctl) ( ThreadId tid,
   1658                                UWord res,
   1659                                UWord arg0, UWord arg1,
   1660                                UWord arg2, UWord arg3 )
   1661 {
   1662    union vki_semun arg = *(union vki_semun *)&arg3;
   1663    UInt nsems;
   1664    switch (arg2 /* cmd */) {
   1665 #if defined(VKI_IPC_INFO)
   1666    case VKI_IPC_INFO:
   1667    case VKI_SEM_INFO:
   1668    case VKI_IPC_INFO|VKI_IPC_64:
   1669    case VKI_SEM_INFO|VKI_IPC_64:
   1670       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1671       break;
   1672 #endif
   1673 
   1674    case VKI_IPC_STAT:
   1675 #if defined(VKI_SEM_STAT)
   1676    case VKI_SEM_STAT:
   1677 #endif
   1678       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1679       break;
   1680 
   1681 #if defined(VKI_IPC_64)
   1682    case VKI_IPC_STAT|VKI_IPC_64:
   1683    case VKI_SEM_STAT|VKI_IPC_64:
   1684       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1685       break;
   1686 #endif
   1687 
   1688    case VKI_GETALL:
   1689 #if defined(VKI_IPC_64)
   1690    case VKI_GETALL|VKI_IPC_64:
   1691 #endif
   1692       nsems = get_sem_count( arg0 );
   1693       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
   1694       break;
   1695    }
   1696 }
   1697 
   1698 /* ------ */
   1699 
   1700 /* ------ */
   1701 
   1702 static
   1703 SizeT get_shm_size ( Int shmid )
   1704 {
   1705 #ifdef __NR_shmctl
   1706 #  ifdef VKI_IPC_64
   1707    struct vki_shmid64_ds buf;
   1708 #    ifdef VGP_amd64_linux
   1709      /* See bug 222545 comment 7 */
   1710      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1711                                      VKI_IPC_STAT, (UWord)&buf);
   1712 #    else
   1713      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1714                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
   1715 #    endif
   1716 #  else /* !def VKI_IPC_64 */
   1717    struct vki_shmid_ds buf;
   1718    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
   1719 #  endif /* def VKI_IPC_64 */
   1720 #else
   1721    struct vki_shmid_ds buf;
   1722    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
   1723                                  VKI_IPC_STAT, 0, (UWord)&buf);
   1724 #endif
   1725    if (sr_isError(__res))
   1726       return 0;
   1727 
   1728    return (SizeT) buf.shm_segsz;
   1729 }
   1730 
   1731 UWord
   1732 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
   1733                              UWord arg0, UWord arg1, UWord arg2 )
   1734 {
   1735    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
   1736    SizeT  segmentSize = get_shm_size ( arg0 );
   1737    UWord tmp;
   1738    Bool  ok;
   1739    if (arg1 == 0) {
   1740       /* arm-linux only: work around the fact that
   1741          VG_(am_get_advisory_client_simple) produces something that is
   1742          VKI_PAGE_SIZE aligned, whereas what we want is something
   1743          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
   1744          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
   1745          then round the result up to the next VKI_SHMLBA boundary.
   1746          See bug 222545 comment 15.  So far, arm-linux is the only
   1747          platform where this is known to be necessary. */
   1748       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
   1749       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1750          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
   1751       }
   1752       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
   1753       if (ok) {
   1754          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1755             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
   1756          } else {
   1757             arg1 = tmp;
   1758          }
   1759       }
   1760    }
   1761    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
   1762       arg1 = 0;
   1763    return arg1;
   1764 }
   1765 
   1766 void
   1767 ML_(generic_POST_sys_shmat) ( ThreadId tid,
   1768                               UWord res,
   1769                               UWord arg0, UWord arg1, UWord arg2 )
   1770 {
   1771    SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
   1772    if ( segmentSize > 0 ) {
   1773       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
   1774       Bool d;
   1775 
   1776       if (arg2 & VKI_SHM_RDONLY)
   1777          prot &= ~VKI_PROT_WRITE;
   1778       /* It isn't exactly correct to pass 0 for the fd and offset
   1779          here.  The kernel seems to think the corresponding section
   1780          does have dev/ino numbers:
   1781 
   1782          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
   1783 
   1784          However there is no obvious way to find them.  In order to
   1785          cope with the discrepancy, aspacem's sync checker omits the
   1786          dev/ino correspondence check in cases where V does not know
   1787          the dev/ino. */
   1788       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
   1789 
   1790       /* we don't distinguish whether it's read-only or
   1791        * read-write -- it doesn't matter really. */
   1792       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
   1793                               0/*di_handle*/ );
   1794       if (d)
   1795          VG_(discard_translations)( (Addr64)res,
   1796                                     (ULong)VG_PGROUNDUP(segmentSize),
   1797                                     "ML_(generic_POST_sys_shmat)" );
   1798    }
   1799 }
   1800 
   1801 /* ------ */
   1802 
   1803 Bool
   1804 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
   1805 {
   1806    /* int shmdt(const void *shmaddr); */
   1807    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
   1808 }
   1809 
   1810 void
   1811 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
   1812 {
   1813    NSegment const* s = VG_(am_find_nsegment)(arg0);
   1814 
   1815    if (s != NULL) {
   1816       Addr  s_start = s->start;
   1817       SizeT s_len   = s->end+1 - s->start;
   1818       Bool  d;
   1819 
   1820       vg_assert(s->kind == SkShmC);
   1821       vg_assert(s->start == arg0);
   1822 
   1823       d = VG_(am_notify_munmap)(s_start, s_len);
   1824       s = NULL; /* s is now invalid */
   1825       VG_TRACK( die_mem_munmap, s_start, s_len );
   1826       if (d)
   1827          VG_(discard_translations)( (Addr64)s_start,
   1828                                     (ULong)s_len,
   1829                                     "ML_(generic_POST_sys_shmdt)" );
   1830    }
   1831 }
   1832 /* ------ */
   1833 
   1834 void
   1835 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
   1836                               UWord arg0, UWord arg1, UWord arg2 )
   1837 {
   1838    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
   1839    switch (arg1 /* cmd */) {
   1840 #if defined(VKI_IPC_INFO)
   1841    case VKI_IPC_INFO:
   1842       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   1843                      arg2, sizeof(struct vki_shminfo) );
   1844       break;
   1845 #if defined(VKI_IPC_64)
   1846    case VKI_IPC_INFO|VKI_IPC_64:
   1847       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   1848                      arg2, sizeof(struct vki_shminfo64) );
   1849       break;
   1850 #endif
   1851 #endif
   1852 
   1853 #if defined(VKI_SHM_INFO)
   1854    case VKI_SHM_INFO:
   1855 #if defined(VKI_IPC_64)
   1856    case VKI_SHM_INFO|VKI_IPC_64:
   1857 #endif
   1858       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
   1859                      arg2, sizeof(struct vki_shm_info) );
   1860       break;
   1861 #endif
   1862 
   1863    case VKI_IPC_STAT:
   1864 #if defined(VKI_SHM_STAT)
   1865    case VKI_SHM_STAT:
   1866 #endif
   1867       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
   1868                      arg2, sizeof(struct vki_shmid_ds) );
   1869       break;
   1870 
   1871 #if defined(VKI_IPC_64)
   1872    case VKI_IPC_STAT|VKI_IPC_64:
   1873    case VKI_SHM_STAT|VKI_IPC_64:
   1874       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
   1875                      arg2, sizeof(struct vki_shmid64_ds) );
   1876       break;
   1877 #endif
   1878 
   1879    case VKI_IPC_SET:
   1880       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   1881                     arg2, sizeof(struct vki_shmid_ds) );
   1882       break;
   1883 
   1884 #if defined(VKI_IPC_64)
   1885    case VKI_IPC_SET|VKI_IPC_64:
   1886       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   1887                     arg2, sizeof(struct vki_shmid64_ds) );
   1888       break;
   1889 #endif
   1890    }
   1891 }
   1892 
   1893 void
   1894 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
   1895                                UWord res,
   1896                                UWord arg0, UWord arg1, UWord arg2 )
   1897 {
   1898    switch (arg1 /* cmd */) {
   1899 #if defined(VKI_IPC_INFO)
   1900    case VKI_IPC_INFO:
   1901       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
   1902       break;
   1903    case VKI_IPC_INFO|VKI_IPC_64:
   1904       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
   1905       break;
   1906 #endif
   1907 
   1908 #if defined(VKI_SHM_INFO)
   1909    case VKI_SHM_INFO:
   1910    case VKI_SHM_INFO|VKI_IPC_64:
   1911       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
   1912       break;
   1913 #endif
   1914 
   1915    case VKI_IPC_STAT:
   1916 #if defined(VKI_SHM_STAT)
   1917    case VKI_SHM_STAT:
   1918 #endif
   1919       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
   1920       break;
   1921 
   1922 #if defined(VKI_IPC_64)
   1923    case VKI_IPC_STAT|VKI_IPC_64:
   1924    case VKI_SHM_STAT|VKI_IPC_64:
   1925       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
   1926       break;
   1927 #endif
   1928 
   1929 
   1930    }
   1931 }
   1932 
   1933 
   1934 /* ---------------------------------------------------------------------
   1935    Generic handler for mmap
   1936    ------------------------------------------------------------------ */
   1937 
   1938 /*
   1939  * Although mmap is specified by POSIX and the argument are generally
   1940  * consistent across platforms the precise details of the low level
   1941  * argument passing conventions differ. For example:
   1942  *
   1943  * - On x86-linux there is mmap (aka old_mmap) which takes the
   1944  *   arguments in a memory block and the offset in bytes; and
   1945  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   1946  *   way and the offset in pages.
   1947  *
   1948  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
   1949  *   arguments in the normal way and the offset in bytes; and
   1950  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   1951  *   way and the offset in pages.
   1952  *
   1953  * - On amd64-linux everything is simple and there is just the one
   1954  *   call, mmap (aka sys_mmap)  which takes the arguments in the
   1955  *   normal way and the offset in bytes.
   1956  *
   1957  * - On s390x-linux there is mmap (aka old_mmap) which takes the
   1958  *   arguments in a memory block and the offset in bytes. mmap2
   1959  *   is also available (but not exported via unistd.h) with
   1960  *   arguments in a memory block and the offset in pages.
   1961  *
   1962  * To cope with all this we provide a generic handler function here
   1963  * and then each platform implements one or more system call handlers
   1964  * which call this generic routine after extracting and normalising
   1965  * the arguments.
   1966  */
   1967 
   1968 SysRes
   1969 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
   1970                             UWord arg1, UWord arg2, UWord arg3,
   1971                             UWord arg4, UWord arg5, Off64T arg6 )
   1972 {
   1973    Addr       advised;
   1974    SysRes     sres;
   1975    MapRequest mreq;
   1976    Bool       mreq_ok;
   1977 
   1978 #if defined(VGO_darwin)
   1979    // Nb: we can't use this on Darwin, it has races:
   1980    // * needs to RETRY if advisory succeeds but map fails
   1981    //   (could have been some other thread in a nonblocking call)
   1982    // * needs to not use fixed-position mmap() on Darwin
   1983    //   (mmap will cheerfully smash whatever's already there, which might
   1984    //   be a new mapping from some other thread in a nonblocking call)
   1985    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
   1986 #endif
   1987 
   1988    if (arg2 == 0) {
   1989       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
   1990          shall be established. */
   1991       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   1992    }
   1993 
   1994    if (!VG_IS_PAGE_ALIGNED(arg1)) {
   1995       /* zap any misaligned addresses. */
   1996       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
   1997          to fail.   Here, we catch them all. */
   1998       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   1999    }
   2000 
   2001    if (!VG_IS_PAGE_ALIGNED(arg6)) {
   2002       /* zap any misaligned offsets. */
   2003       /* SuSV3 says: The off argument is constrained to be aligned and
   2004          sized according to the value returned by sysconf() when
   2005          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
   2006       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2007    }
   2008 
   2009    /* Figure out what kind of allocation constraints there are
   2010       (fixed/hint/any), and ask aspacem what we should do. */
   2011    mreq.start = arg1;
   2012    mreq.len   = arg2;
   2013    if (arg4 & VKI_MAP_FIXED) {
   2014       mreq.rkind = MFixed;
   2015    } else
   2016    if (arg1 != 0) {
   2017       mreq.rkind = MHint;
   2018    } else {
   2019       mreq.rkind = MAny;
   2020    }
   2021 
   2022    /* Enquire ... */
   2023    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2024    if (!mreq_ok) {
   2025       /* Our request was bounced, so we'd better fail. */
   2026       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2027    }
   2028 
   2029    /* Otherwise we're OK (so far).  Install aspacem's choice of
   2030       address, and let the mmap go through.  */
   2031    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2032                                     arg4 | VKI_MAP_FIXED,
   2033                                     arg5, arg6);
   2034 
   2035    /* A refinement: it may be that the kernel refused aspacem's choice
   2036       of address.  If we were originally asked for a hinted mapping,
   2037       there is still a last chance: try again at any address.
   2038       Hence: */
   2039    if (mreq.rkind == MHint && sr_isError(sres)) {
   2040       mreq.start = 0;
   2041       mreq.len   = arg2;
   2042       mreq.rkind = MAny;
   2043       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2044       if (!mreq_ok) {
   2045          /* Our request was bounced, so we'd better fail. */
   2046          return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2047       }
   2048       /* and try again with the kernel */
   2049       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2050                                        arg4 | VKI_MAP_FIXED,
   2051                                        arg5, arg6);
   2052    }
   2053 
   2054    if (!sr_isError(sres)) {
   2055       ULong di_handle;
   2056       /* Notify aspacem. */
   2057       notify_core_of_mmap(
   2058          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2059          arg2, /* length */
   2060          arg3, /* prot */
   2061          arg4, /* the original flags value */
   2062          arg5, /* fd */
   2063          arg6  /* offset */
   2064       );
   2065       /* Load symbols? */
   2066       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
   2067                                        False/*allow_SkFileV*/, (Int)arg5 );
   2068       /* Notify the tool. */
   2069       notify_tool_of_mmap(
   2070          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2071          arg2, /* length */
   2072          arg3, /* prot */
   2073          di_handle /* so the tool can refer to the read debuginfo later,
   2074                       if it wants. */
   2075       );
   2076    }
   2077 
   2078    /* Stay sane */
   2079    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
   2080       vg_assert(sr_Res(sres) == arg1);
   2081 
   2082    return sres;
   2083 }
   2084 
   2085 
   2086 /* ---------------------------------------------------------------------
   2087    The Main Entertainment ... syscall wrappers
   2088    ------------------------------------------------------------------ */
   2089 
   2090 /* Note: the PRE() and POST() wrappers are for the actual functions
   2091    implementing the system calls in the OS kernel.  These mostly have
   2092    names like sys_write();  a few have names like old_mmap().  See the
   2093    comment for ML_(syscall_table)[] for important info about the __NR_foo
   2094    constants and their relationship to the sys_foo() functions.
   2095 
   2096    Some notes about names used for syscalls and args:
   2097    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
   2098      ambiguity.
   2099 
   2100    - For error messages, we generally use a somewhat generic name
   2101      for the syscall (eg. "write" rather than "sys_write").  This should be
   2102      good enough for the average user to understand what is happening,
   2103      without confusing them with names like "sys_write".
   2104 
   2105    - Also, for error messages the arg names are mostly taken from the man
   2106      pages (even though many of those man pages are really for glibc
   2107      functions of the same name), rather than from the OS kernel source,
   2108      for the same reason -- a user presented with a "bogus foo(bar)" arg
   2109      will most likely look at the "foo" man page to see which is the "bar"
   2110      arg.
   2111 
   2112    Note that we use our own vki_* types.  The one exception is in
   2113    PRE_REG_READn calls, where pointer types haven't been changed, because
   2114    they don't need to be -- eg. for "foo*" to be used, the type foo need not
   2115    be visible.
   2116 
   2117    XXX: some of these are arch-specific, and should be factored out.
   2118 */
   2119 
   2120 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
   2121 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
   2122 
   2123 // Macros to support 64-bit syscall args split into two 32 bit values
   2124 #if defined(VG_LITTLEENDIAN)
   2125 #define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2126 #define MERGE64_FIRST(name) name##_low
   2127 #define MERGE64_SECOND(name) name##_high
   2128 #elif defined(VG_BIGENDIAN)
   2129 #define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2130 #define MERGE64_FIRST(name) name##_high
   2131 #define MERGE64_SECOND(name) name##_low
   2132 #else
   2133 #error Unknown endianness
   2134 #endif
   2135 
   2136 PRE(sys_exit)
   2137 {
   2138    ThreadState* tst;
   2139    /* simple; just make this thread exit */
   2140    PRINT("exit( %ld )", ARG1);
   2141    PRE_REG_READ1(void, "exit", int, status);
   2142    tst = VG_(get_ThreadState)(tid);
   2143    /* Set the thread's status to be exiting, then claim that the
   2144       syscall succeeded. */
   2145    tst->exitreason = VgSrc_ExitThread;
   2146    tst->os_state.exitcode = ARG1;
   2147    SET_STATUS_Success(0);
   2148 }
   2149 
   2150 PRE(sys_ni_syscall)
   2151 {
   2152    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
   2153       VG_SYSNUM_STRING(SYSNO));
   2154    PRE_REG_READ0(long, "ni_syscall");
   2155    SET_STATUS_Failure( VKI_ENOSYS );
   2156 }
   2157 
   2158 PRE(sys_iopl)
   2159 {
   2160    PRINT("sys_iopl ( %ld )", ARG1);
   2161    PRE_REG_READ1(long, "iopl", unsigned long, level);
   2162 }
   2163 
   2164 PRE(sys_fsync)
   2165 {
   2166    *flags |= SfMayBlock;
   2167    PRINT("sys_fsync ( %ld )", ARG1);
   2168    PRE_REG_READ1(long, "fsync", unsigned int, fd);
   2169 }
   2170 
   2171 PRE(sys_fdatasync)
   2172 {
   2173    *flags |= SfMayBlock;
   2174    PRINT("sys_fdatasync ( %ld )", ARG1);
   2175    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
   2176 }
   2177 
   2178 PRE(sys_msync)
   2179 {
   2180    *flags |= SfMayBlock;
   2181    PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   2182    PRE_REG_READ3(long, "msync",
   2183                  unsigned long, start, vki_size_t, length, int, flags);
   2184    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
   2185 }
   2186 
   2187 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
   2188 // versions of LiS (Linux Streams).  They are not part of the kernel.
   2189 // Therefore, we have to provide this type ourself, rather than getting it
   2190 // from the kernel sources.
   2191 struct vki_pmsg_strbuf {
   2192    int     maxlen;         /* no. of bytes in buffer */
   2193    int     len;            /* no. of bytes returned */
   2194    vki_caddr_t buf;        /* pointer to data */
   2195 };
   2196 PRE(sys_getpmsg)
   2197 {
   2198    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
   2199    struct vki_pmsg_strbuf *ctrl;
   2200    struct vki_pmsg_strbuf *data;
   2201    *flags |= SfMayBlock;
   2202    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
   2203    PRE_REG_READ5(int, "getpmsg",
   2204                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2205                  int *, bandp, int *, flagsp);
   2206    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2207    data = (struct vki_pmsg_strbuf *)ARG3;
   2208    if (ctrl && ctrl->maxlen > 0)
   2209       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
   2210    if (data && data->maxlen > 0)
   2211       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
   2212    if (ARG4)
   2213       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
   2214    if (ARG5)
   2215       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
   2216 }
   2217 POST(sys_getpmsg)
   2218 {
   2219    struct vki_pmsg_strbuf *ctrl;
   2220    struct vki_pmsg_strbuf *data;
   2221    vg_assert(SUCCESS);
   2222    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2223    data = (struct vki_pmsg_strbuf *)ARG3;
   2224    if (RES == 0 && ctrl && ctrl->len > 0) {
   2225       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
   2226    }
   2227    if (RES == 0 && data && data->len > 0) {
   2228       POST_MEM_WRITE( (Addr)data->buf, data->len);
   2229    }
   2230 }
   2231 
   2232 PRE(sys_putpmsg)
   2233 {
   2234    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
   2235    struct vki_pmsg_strbuf *ctrl;
   2236    struct vki_pmsg_strbuf *data;
   2237    *flags |= SfMayBlock;
   2238    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
   2239    PRE_REG_READ5(int, "putpmsg",
   2240                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2241                  int, band, int, flags);
   2242    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2243    data = (struct vki_pmsg_strbuf *)ARG3;
   2244    if (ctrl && ctrl->len > 0)
   2245       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
   2246    if (data && data->len > 0)
   2247       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
   2248 }
   2249 
   2250 PRE(sys_getitimer)
   2251 {
   2252    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2253    PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
   2254    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
   2255 
   2256    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
   2257    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
   2258 }
   2259 
   2260 POST(sys_getitimer)
   2261 {
   2262    if (ARG2 != (Addr)NULL) {
   2263       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2264       POST_timeval_WRITE( &(value->it_interval) );
   2265       POST_timeval_WRITE( &(value->it_value) );
   2266    }
   2267 }
   2268 
   2269 PRE(sys_setitimer)
   2270 {
   2271    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
   2272    PRE_REG_READ3(long, "setitimer",
   2273                  int, which,
   2274                  struct itimerval *, value, struct itimerval *, ovalue);
   2275    if (ARG2 != (Addr)NULL) {
   2276       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2277       PRE_timeval_READ( "setitimer(&value->it_interval)",
   2278                          &(value->it_interval));
   2279       PRE_timeval_READ( "setitimer(&value->it_value)",
   2280                          &(value->it_value));
   2281    }
   2282    if (ARG3 != (Addr)NULL) {
   2283       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2284       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
   2285                          &(ovalue->it_interval));
   2286       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
   2287                          &(ovalue->it_value));
   2288    }
   2289 }
   2290 
   2291 POST(sys_setitimer)
   2292 {
   2293    if (ARG3 != (Addr)NULL) {
   2294       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2295       POST_timeval_WRITE( &(ovalue->it_interval) );
   2296       POST_timeval_WRITE( &(ovalue->it_value) );
   2297    }
   2298 }
   2299 
   2300 PRE(sys_chroot)
   2301 {
   2302    PRINT("sys_chroot ( %#lx )", ARG1);
   2303    PRE_REG_READ1(long, "chroot", const char *, path);
   2304    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
   2305 }
   2306 
   2307 PRE(sys_madvise)
   2308 {
   2309    *flags |= SfMayBlock;
   2310    PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   2311    PRE_REG_READ3(long, "madvise",
   2312                  unsigned long, start, vki_size_t, length, int, advice);
   2313 }
   2314 
   2315 #if HAVE_MREMAP
   2316 PRE(sys_mremap)
   2317 {
   2318    // Nb: this is different to the glibc version described in the man pages,
   2319    // which lacks the fifth 'new_address' argument.
   2320    if (ARG4 & VKI_MREMAP_FIXED) {
   2321       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
   2322             ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
   2323       PRE_REG_READ5(unsigned long, "mremap",
   2324                     unsigned long, old_addr, unsigned long, old_size,
   2325                     unsigned long, new_size, unsigned long, flags,
   2326                     unsigned long, new_addr);
   2327    } else {
   2328       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
   2329             ARG1, (ULong)ARG2, ARG3, ARG4);
   2330       PRE_REG_READ4(unsigned long, "mremap",
   2331                     unsigned long, old_addr, unsigned long, old_size,
   2332                     unsigned long, new_size, unsigned long, flags);
   2333    }
   2334    SET_STATUS_from_SysRes(
   2335       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
   2336    );
   2337 }
   2338 #endif /* HAVE_MREMAP */
   2339 
   2340 PRE(sys_nice)
   2341 {
   2342    PRINT("sys_nice ( %ld )", ARG1);
   2343    PRE_REG_READ1(long, "nice", int, inc);
   2344 }
   2345 
   2346 PRE(sys_mlock)
   2347 {
   2348    *flags |= SfMayBlock;
   2349    PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
   2350    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
   2351 }
   2352 
   2353 PRE(sys_munlock)
   2354 {
   2355    *flags |= SfMayBlock;
   2356    PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
   2357    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
   2358 }
   2359 
   2360 PRE(sys_mlockall)
   2361 {
   2362    *flags |= SfMayBlock;
   2363    PRINT("sys_mlockall ( %lx )", ARG1);
   2364    PRE_REG_READ1(long, "mlockall", int, flags);
   2365 }
   2366 
   2367 PRE(sys_setpriority)
   2368 {
   2369    PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
   2370    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
   2371 }
   2372 
   2373 PRE(sys_getpriority)
   2374 {
   2375    PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
   2376    PRE_REG_READ2(long, "getpriority", int, which, int, who);
   2377 }
   2378 
   2379 PRE(sys_pwrite64)
   2380 {
   2381    *flags |= SfMayBlock;
   2382 #if VG_WORDSIZE == 4
   2383    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
   2384          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
   2385    PRE_REG_READ5(ssize_t, "pwrite64",
   2386                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2387                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2388 #elif VG_WORDSIZE == 8
   2389    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
   2390          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
   2391    PRE_REG_READ4(ssize_t, "pwrite64",
   2392                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2393                  Word, offset);
   2394 #else
   2395 #  error Unexpected word size
   2396 #endif
   2397    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
   2398 }
   2399 
   2400 PRE(sys_sync)
   2401 {
   2402    *flags |= SfMayBlock;
   2403    PRINT("sys_sync ( )");
   2404    PRE_REG_READ0(long, "sync");
   2405 }
   2406 
   2407 PRE(sys_fstatfs)
   2408 {
   2409    FUSE_COMPATIBLE_MAY_BLOCK();
   2410    PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
   2411    PRE_REG_READ2(long, "fstatfs",
   2412                  unsigned int, fd, struct statfs *, buf);
   2413    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
   2414 }
   2415 
   2416 POST(sys_fstatfs)
   2417 {
   2418    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   2419 }
   2420 
   2421 PRE(sys_fstatfs64)
   2422 {
   2423    FUSE_COMPATIBLE_MAY_BLOCK();
   2424    PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
   2425    PRE_REG_READ3(long, "fstatfs64",
   2426                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
   2427    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
   2428 }
   2429 POST(sys_fstatfs64)
   2430 {
   2431    POST_MEM_WRITE( ARG3, ARG2 );
   2432 }
   2433 
   2434 PRE(sys_getsid)
   2435 {
   2436    PRINT("sys_getsid ( %ld )", ARG1);
   2437    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
   2438 }
   2439 
   2440 PRE(sys_pread64)
   2441 {
   2442    *flags |= SfMayBlock;
   2443 #if VG_WORDSIZE == 4
   2444    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
   2445          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
   2446    PRE_REG_READ5(ssize_t, "pread64",
   2447                  unsigned int, fd, char *, buf, vki_size_t, count,
   2448                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2449 #elif VG_WORDSIZE == 8
   2450    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
   2451          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
   2452    PRE_REG_READ4(ssize_t, "pread64",
   2453                  unsigned int, fd, char *, buf, vki_size_t, count,
   2454                  Word, offset);
   2455 #else
   2456 #  error Unexpected word size
   2457 #endif
   2458    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
   2459 }
   2460 POST(sys_pread64)
   2461 {
   2462    vg_assert(SUCCESS);
   2463    if (RES > 0) {
   2464       POST_MEM_WRITE( ARG2, RES );
   2465    }
   2466 }
   2467 
   2468 PRE(sys_mknod)
   2469 {
   2470    FUSE_COMPATIBLE_MAY_BLOCK();
   2471    PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
   2472    PRE_REG_READ3(long, "mknod",
   2473                  const char *, pathname, int, mode, unsigned, dev);
   2474    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
   2475 }
   2476 
   2477 PRE(sys_flock)
   2478 {
   2479    *flags |= SfMayBlock;
   2480    PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
   2481    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
   2482 }
   2483 
   2484 // Pre_read a char** argument.
   2485 static void pre_argv_envp(Addr a, ThreadId tid, Char* s1, Char* s2)
   2486 {
   2487    while (True) {
   2488       Addr a_deref;
   2489       Addr* a_p = (Addr*)a;
   2490       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
   2491       a_deref = *a_p;
   2492       if (0 == a_deref)
   2493          break;
   2494       PRE_MEM_RASCIIZ( s2, a_deref );
   2495       a += sizeof(char*);
   2496    }
   2497 }
   2498 
   2499 static Bool i_am_the_only_thread ( void )
   2500 {
   2501    Int c = VG_(count_living_threads)();
   2502    vg_assert(c >= 1); /* stay sane */
   2503    return c == 1;
   2504 }
   2505 
   2506 /* Wait until all other threads disappear. */
   2507 void VG_(reap_threads)(ThreadId self)
   2508 {
   2509    while (!i_am_the_only_thread()) {
   2510       /* Let other thread(s) run */
   2511       VG_(vg_yield)();
   2512       VG_(poll_signals)(self);
   2513    }
   2514    vg_assert(i_am_the_only_thread());
   2515 }
   2516 
   2517 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
   2518 // but it seems to work nonetheless...
   2519 PRE(sys_execve)
   2520 {
   2521    Char*        path = NULL;       /* path to executable */
   2522    Char**       envp = NULL;
   2523    Char**       argv = NULL;
   2524    Char**       arg2copy;
   2525    Char*        launcher_basename = NULL;
   2526    ThreadState* tst;
   2527    Int          i, j, tot_args;
   2528    SysRes       res;
   2529    Bool         setuid_allowed, trace_this_child;
   2530 
   2531    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
   2532    PRE_REG_READ3(vki_off_t, "execve",
   2533                  char *, filename, char **, argv, char **, envp);
   2534    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
   2535    if (ARG2 != 0)
   2536       pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
   2537    if (ARG3 != 0)
   2538       pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
   2539 
   2540    vg_assert(VG_(is_valid_tid)(tid));
   2541    tst = VG_(get_ThreadState)(tid);
   2542 
   2543    /* Erk.  If the exec fails, then the following will have made a
   2544       mess of things which makes it hard for us to continue.  The
   2545       right thing to do is piece everything together again in
   2546       POST(execve), but that's close to impossible.  Instead, we make
   2547       an effort to check that the execve will work before actually
   2548       doing it. */
   2549 
   2550    /* Check that the name at least begins in client-accessible storage. */
   2551    if (ARG1 == 0 /* obviously bogus */
   2552        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
   2553       SET_STATUS_Failure( VKI_EFAULT );
   2554       return;
   2555    }
   2556 
   2557    // debug-only printing
   2558    if (0) {
   2559       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
   2560       if (ARG2) {
   2561          VG_(printf)("ARG2 = ");
   2562          Int q;
   2563          HChar** vec = (HChar**)ARG2;
   2564          for (q = 0; vec[q]; q++)
   2565             VG_(printf)("%p(%s) ", vec[q], vec[q]);
   2566          VG_(printf)("\n");
   2567       } else {
   2568          VG_(printf)("ARG2 = null\n");
   2569       }
   2570    }
   2571 
   2572    // Decide whether or not we want to follow along
   2573    { // Make 'child_argv' be a pointer to the child's arg vector
   2574      // (skipping the exe name)
   2575      HChar** child_argv = (HChar**)ARG2;
   2576      if (child_argv && child_argv[0] == NULL)
   2577         child_argv = NULL;
   2578      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
   2579    }
   2580 
   2581    // Do the important checks:  it is a file, is executable, permissions are
   2582    // ok, etc.  We allow setuid executables to run only in the case when
   2583    // we are not simulating them, that is, they to be run natively.
   2584    setuid_allowed = trace_this_child  ? False  : True;
   2585    res = VG_(pre_exec_check)((const Char*)ARG1, NULL, setuid_allowed);
   2586    if (sr_isError(res)) {
   2587       SET_STATUS_Failure( sr_Err(res) );
   2588       return;
   2589    }
   2590 
   2591    /* If we're tracing the child, and the launcher name looks bogus
   2592       (possibly because launcher.c couldn't figure it out, see
   2593       comments therein) then we have no option but to fail. */
   2594    if (trace_this_child
   2595        && (VG_(name_of_launcher) == NULL
   2596            || VG_(name_of_launcher)[0] != '/')) {
   2597       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
   2598       return;
   2599    }
   2600 
   2601    /* After this point, we can't recover if the execve fails. */
   2602    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (Char*)ARG1);
   2603 
   2604 
   2605    // Terminate gdbserver if it is active.
   2606    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
   2607       // If the child will not be traced, we need to terminate gdbserver
   2608       // to cleanup the gdbserver resources (e.g. the FIFO files).
   2609       // If child will be traced, we also terminate gdbserver: the new
   2610       // Valgrind will start a fresh gdbserver after exec.
   2611       VG_(gdbserver) (0);
   2612    }
   2613 
   2614    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
   2615       this. (Really, nuke them all, since the new process will make
   2616       its own new thread.) */
   2617    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
   2618    VG_(reap_threads)(tid);
   2619 
   2620    // Set up the child's exe path.
   2621    //
   2622    if (trace_this_child) {
   2623 
   2624       // We want to exec the launcher.  Get its pre-remembered path.
   2625       path = VG_(name_of_launcher);
   2626       // VG_(name_of_launcher) should have been acquired by m_main at
   2627       // startup.
   2628       vg_assert(path);
   2629 
   2630       launcher_basename = VG_(strrchr)(path, '/');
   2631       if (launcher_basename == NULL || launcher_basename[1] == 0) {
   2632          launcher_basename = path;  // hmm, tres dubious
   2633       } else {
   2634          launcher_basename++;
   2635       }
   2636 
   2637    } else {
   2638       path = (Char*)ARG1;
   2639    }
   2640 
   2641    // Set up the child's environment.
   2642    //
   2643    // Remove the valgrind-specific stuff from the environment so the
   2644    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
   2645    // This is done unconditionally, since if we are tracing the child,
   2646    // the child valgrind will set up the appropriate client environment.
   2647    // Nb: we make a copy of the environment before trying to mangle it
   2648    // as it might be in read-only memory (this was bug #101881).
   2649    //
   2650    // Then, if tracing the child, set VALGRIND_LIB for it.
   2651    //
   2652    if (ARG3 == 0) {
   2653       envp = NULL;
   2654    } else {
   2655       envp = VG_(env_clone)( (Char**)ARG3 );
   2656       if (envp == NULL) goto hosed;
   2657       VG_(env_remove_valgrind_env_stuff)( envp );
   2658    }
   2659 
   2660    if (trace_this_child) {
   2661       // Set VALGRIND_LIB in ARG3 (the environment)
   2662       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
   2663    }
   2664 
   2665    // Set up the child's args.  If not tracing it, they are
   2666    // simply ARG2.  Otherwise, they are
   2667    //
   2668    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
   2669    //
   2670    // except that the first VG_(args_for_valgrind_noexecpass) args
   2671    // are omitted.
   2672    //
   2673    if (!trace_this_child) {
   2674       argv = (Char**)ARG2;
   2675    } else {
   2676       vg_assert( VG_(args_for_valgrind) );
   2677       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
   2678       vg_assert( VG_(args_for_valgrind_noexecpass)
   2679                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
   2680       /* how many args in total will there be? */
   2681       // launcher basename
   2682       tot_args = 1;
   2683       // V's args
   2684       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
   2685       tot_args -= VG_(args_for_valgrind_noexecpass);
   2686       // name of client exe
   2687       tot_args++;
   2688       // args for client exe, skipping [0]
   2689       arg2copy = (Char**)ARG2;
   2690       if (arg2copy && arg2copy[0]) {
   2691          for (i = 1; arg2copy[i]; i++)
   2692             tot_args++;
   2693       }
   2694       // allocate
   2695       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
   2696                           (tot_args+1) * sizeof(HChar*) );
   2697       if (argv == 0) goto hosed;
   2698       // copy
   2699       j = 0;
   2700       argv[j++] = launcher_basename;
   2701       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
   2702          if (i < VG_(args_for_valgrind_noexecpass))
   2703             continue;
   2704          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
   2705       }
   2706       argv[j++] = (Char*)ARG1;
   2707       if (arg2copy && arg2copy[0])
   2708          for (i = 1; arg2copy[i]; i++)
   2709             argv[j++] = arg2copy[i];
   2710       argv[j++] = NULL;
   2711       // check
   2712       vg_assert(j == tot_args+1);
   2713    }
   2714 
   2715    /* restore the DATA rlimit for the child */
   2716    VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
   2717 
   2718    /*
   2719       Set the signal state up for exec.
   2720 
   2721       We need to set the real signal state to make sure the exec'd
   2722       process gets SIG_IGN properly.
   2723 
   2724       Also set our real sigmask to match the client's sigmask so that
   2725       the exec'd child will get the right mask.  First we need to
   2726       clear out any pending signals so they they don't get delivered,
   2727       which would confuse things.
   2728 
   2729       XXX This is a bug - the signals should remain pending, and be
   2730       delivered to the new process after exec.  There's also a
   2731       race-condition, since if someone delivers us a signal between
   2732       the sigprocmask and the execve, we'll still get the signal. Oh
   2733       well.
   2734    */
   2735    {
   2736       vki_sigset_t allsigs;
   2737       vki_siginfo_t info;
   2738 
   2739       /* What this loop does: it queries SCSS (the signal state that
   2740          the client _thinks_ the kernel is in) by calling
   2741          VG_(do_sys_sigaction), and modifies the real kernel signal
   2742          state accordingly. */
   2743       for (i = 1; i < VG_(max_signal); i++) {
   2744          vki_sigaction_fromK_t sa_f;
   2745          vki_sigaction_toK_t   sa_t;
   2746          VG_(do_sys_sigaction)(i, NULL, &sa_f);
   2747          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
   2748          if (sa_t.ksa_handler == VKI_SIG_IGN)
   2749             VG_(sigaction)(i, &sa_t, NULL);
   2750          else {
   2751             sa_t.ksa_handler = VKI_SIG_DFL;
   2752             VG_(sigaction)(i, &sa_t, NULL);
   2753          }
   2754       }
   2755 
   2756       VG_(sigfillset)(&allsigs);
   2757       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
   2758          ;
   2759 
   2760       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
   2761    }
   2762 
   2763    if (0) {
   2764       Char **cpp;
   2765       VG_(printf)("exec: %s\n", path);
   2766       for (cpp = argv; cpp && *cpp; cpp++)
   2767          VG_(printf)("argv: %s\n", *cpp);
   2768       if (0)
   2769          for (cpp = envp; cpp && *cpp; cpp++)
   2770             VG_(printf)("env: %s\n", *cpp);
   2771    }
   2772 
   2773    SET_STATUS_from_SysRes(
   2774       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
   2775    );
   2776 
   2777    /* If we got here, then the execve failed.  We've already made way
   2778       too much of a mess to continue, so we have to abort. */
   2779   hosed:
   2780    vg_assert(FAILURE);
   2781    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
   2782                 ARG1, (char*)ARG1, ARG2, ARG3, ERR);
   2783    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
   2784                             "execve() failing, so I'm dying.\n");
   2785    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
   2786                             "or work out how to recover.\n");
   2787    VG_(exit)(101);
   2788 }
   2789 
   2790 PRE(sys_access)
   2791 {
   2792    PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   2793    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
   2794    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
   2795 }
   2796 
   2797 PRE(sys_alarm)
   2798 {
   2799    PRINT("sys_alarm ( %ld )", ARG1);
   2800    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
   2801 }
   2802 
   2803 PRE(sys_brk)
   2804 {
   2805    Addr brk_limit = VG_(brk_limit);
   2806    Addr brk_new;
   2807 
   2808    /* libc   says: int   brk(void *end_data_segment);
   2809       kernel says: void* brk(void* end_data_segment);  (more or less)
   2810 
   2811       libc returns 0 on success, and -1 (and sets errno) on failure.
   2812       Nb: if you ask to shrink the dataseg end below what it
   2813       currently is, that always succeeds, even if the dataseg end
   2814       doesn't actually change (eg. brk(0)).  Unless it seg faults.
   2815 
   2816       Kernel returns the new dataseg end.  If the brk() failed, this
   2817       will be unchanged from the old one.  That's why calling (kernel)
   2818       brk(0) gives the current dataseg end (libc brk() just returns
   2819       zero in that case).
   2820 
   2821       Both will seg fault if you shrink it back into a text segment.
   2822    */
   2823    PRINT("sys_brk ( %#lx )", ARG1);
   2824    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
   2825 
   2826    brk_new = do_brk(ARG1);
   2827    SET_STATUS_Success( brk_new );
   2828 
   2829    if (brk_new == ARG1) {
   2830       /* brk() succeeded */
   2831       if (brk_new < brk_limit) {
   2832          /* successfully shrunk the data segment. */
   2833          VG_TRACK( die_mem_brk, (Addr)ARG1,
   2834 		   brk_limit-ARG1 );
   2835       } else
   2836       if (brk_new > brk_limit) {
   2837          /* successfully grew the data segment */
   2838          VG_TRACK( new_mem_brk, brk_limit,
   2839                    ARG1-brk_limit, tid );
   2840       }
   2841    } else {
   2842       /* brk() failed */
   2843       vg_assert(brk_limit == brk_new);
   2844    }
   2845 }
   2846 
   2847 PRE(sys_chdir)
   2848 {
   2849    FUSE_COMPATIBLE_MAY_BLOCK();
   2850    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   2851    PRE_REG_READ1(long, "chdir", const char *, path);
   2852    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
   2853 }
   2854 
   2855 PRE(sys_chmod)
   2856 {
   2857    FUSE_COMPATIBLE_MAY_BLOCK();
   2858    PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   2859    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
   2860    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
   2861 }
   2862 
   2863 PRE(sys_chown)
   2864 {
   2865    FUSE_COMPATIBLE_MAY_BLOCK();
   2866    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   2867    PRE_REG_READ3(long, "chown",
   2868                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   2869    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
   2870 }
   2871 
   2872 PRE(sys_lchown)
   2873 {
   2874    FUSE_COMPATIBLE_MAY_BLOCK();
   2875    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   2876    PRE_REG_READ3(long, "lchown",
   2877                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   2878    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
   2879 }
   2880 
   2881 PRE(sys_close)
   2882 {
   2883    FUSE_COMPATIBLE_MAY_BLOCK();
   2884    PRINT("sys_close ( %ld )", ARG1);
   2885    PRE_REG_READ1(long, "close", unsigned int, fd);
   2886 
   2887    /* Detect and negate attempts by the client to close Valgrind's log fd */
   2888    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
   2889         /* If doing -d style logging (which is to fd=2), don't
   2890            allow that to be closed either. */
   2891         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
   2892       SET_STATUS_Failure( VKI_EBADF );
   2893 }
   2894 
   2895 POST(sys_close)
   2896 {
   2897    if (VG_(clo_track_fds)) record_fd_close(ARG1);
   2898 }
   2899 
   2900 PRE(sys_dup)
   2901 {
   2902    PRINT("sys_dup ( %ld )", ARG1);
   2903    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
   2904 }
   2905 
   2906 POST(sys_dup)
   2907 {
   2908    vg_assert(SUCCESS);
   2909    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
   2910       VG_(close)(RES);
   2911       SET_STATUS_Failure( VKI_EMFILE );
   2912    } else {
   2913       if (VG_(clo_track_fds))
   2914          ML_(record_fd_open_named)(tid, RES);
   2915    }
   2916 }
   2917 
   2918 PRE(sys_dup2)
   2919 {
   2920    PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
   2921    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
   2922    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
   2923       SET_STATUS_Failure( VKI_EBADF );
   2924 }
   2925 
   2926 POST(sys_dup2)
   2927 {
   2928    vg_assert(SUCCESS);
   2929    if (VG_(clo_track_fds))
   2930       ML_(record_fd_open_named)(tid, RES);
   2931 }
   2932 
   2933 PRE(sys_fchdir)
   2934 {
   2935    FUSE_COMPATIBLE_MAY_BLOCK();
   2936    PRINT("sys_fchdir ( %ld )", ARG1);
   2937    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
   2938 }
   2939 
   2940 PRE(sys_fchown)
   2941 {
   2942    FUSE_COMPATIBLE_MAY_BLOCK();
   2943    PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
   2944    PRE_REG_READ3(long, "fchown",
   2945                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
   2946 }
   2947 
   2948 PRE(sys_fchmod)
   2949 {
   2950    FUSE_COMPATIBLE_MAY_BLOCK();
   2951    PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
   2952    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
   2953 }
   2954 
   2955 PRE(sys_newfstat)
   2956 {
   2957    FUSE_COMPATIBLE_MAY_BLOCK();
   2958    PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
   2959    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
   2960    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
   2961 }
   2962 
   2963 POST(sys_newfstat)
   2964 {
   2965    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   2966 }
   2967 
   2968 static vki_sigset_t fork_saved_mask;
   2969 
   2970 // In Linux, the sys_fork() function varies across architectures, but we
   2971 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
   2972 PRE(sys_fork)
   2973 {
   2974    Bool is_child;
   2975    Int child_pid;
   2976    vki_sigset_t mask;
   2977 
   2978    PRINT("sys_fork ( )");
   2979    PRE_REG_READ0(long, "fork");
   2980 
   2981    /* Block all signals during fork, so that we can fix things up in
   2982       the child without being interrupted. */
   2983    VG_(sigfillset)(&mask);
   2984    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
   2985 
   2986    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
   2987 
   2988    if (!SUCCESS) return;
   2989 
   2990 #if defined(VGO_linux)
   2991    // RES is 0 for child, non-0 (the child's PID) for parent.
   2992    is_child = ( RES == 0 ? True : False );
   2993    child_pid = ( is_child ? -1 : RES );
   2994 #elif defined(VGO_darwin)
   2995    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
   2996    is_child = RESHI;
   2997    child_pid = RES;
   2998 #else
   2999 #  error Unknown OS
   3000 #endif
   3001 
   3002    VG_(do_atfork_pre)(tid);
   3003 
   3004    if (is_child) {
   3005       VG_(do_atfork_child)(tid);
   3006 
   3007       /* restore signal mask */
   3008       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3009 
   3010       /* If --child-silent-after-fork=yes was specified, set the
   3011          output file descriptors to 'impossible' values.  This is
   3012          noticed by send_bytes_to_logging_sink in m_libcprint.c, which
   3013          duly stops writing any further output. */
   3014       if (VG_(clo_child_silent_after_fork)) {
   3015          if (!VG_(log_output_sink).is_socket)
   3016             VG_(log_output_sink).fd = -1;
   3017          if (!VG_(xml_output_sink).is_socket)
   3018             VG_(xml_output_sink).fd = -1;
   3019       }
   3020 
   3021    } else {
   3022       VG_(do_atfork_parent)(tid);
   3023 
   3024       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
   3025 
   3026       /* restore signal mask */
   3027       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   3028    }
   3029 }
   3030 
   3031 PRE(sys_ftruncate)
   3032 {
   3033    *flags |= SfMayBlock;
   3034    PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
   3035    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
   3036 }
   3037 
   3038 PRE(sys_truncate)
   3039 {
   3040    *flags |= SfMayBlock;
   3041    PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3042    PRE_REG_READ2(long, "truncate",
   3043                  const char *, path, unsigned long, length);
   3044    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
   3045 }
   3046 
   3047 PRE(sys_ftruncate64)
   3048 {
   3049    *flags |= SfMayBlock;
   3050 #if VG_WORDSIZE == 4
   3051    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
   3052    PRE_REG_READ3(long, "ftruncate64",
   3053                  unsigned int, fd,
   3054                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3055 #else
   3056    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
   3057    PRE_REG_READ2(long, "ftruncate64",
   3058                  unsigned int,fd, UWord,length);
   3059 #endif
   3060 }
   3061 
   3062 PRE(sys_truncate64)
   3063 {
   3064    *flags |= SfMayBlock;
   3065 #if VG_WORDSIZE == 4
   3066    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
   3067    PRE_REG_READ3(long, "truncate64",
   3068                  const char *, path,
   3069                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3070 #else
   3071    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
   3072    PRE_REG_READ2(long, "truncate64",
   3073                  const char *,path, UWord,length);
   3074 #endif
   3075    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
   3076 }
   3077 
   3078 PRE(sys_getdents)
   3079 {
   3080    *flags |= SfMayBlock;
   3081    PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
   3082    PRE_REG_READ3(long, "getdents",
   3083                  unsigned int, fd, struct linux_dirent *, dirp,
   3084                  unsigned int, count);
   3085    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
   3086 }
   3087 
   3088 POST(sys_getdents)
   3089 {
   3090    vg_assert(SUCCESS);
   3091    if (RES > 0)
   3092       POST_MEM_WRITE( ARG2, RES );
   3093 }
   3094 
   3095 PRE(sys_getdents64)
   3096 {
   3097    *flags |= SfMayBlock;
   3098    PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
   3099    PRE_REG_READ3(long, "getdents64",
   3100                  unsigned int, fd, struct linux_dirent64 *, dirp,
   3101                  unsigned int, count);
   3102    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
   3103 }
   3104 
   3105 POST(sys_getdents64)
   3106 {
   3107    vg_assert(SUCCESS);
   3108    if (RES > 0)
   3109       POST_MEM_WRITE( ARG2, RES );
   3110 }
   3111 
   3112 PRE(sys_getgroups)
   3113 {
   3114    PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
   3115    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
   3116    if (ARG1 > 0)
   3117       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   3118 }
   3119 
   3120 POST(sys_getgroups)
   3121 {
   3122    vg_assert(SUCCESS);
   3123    if (ARG1 > 0 && RES > 0)
   3124       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
   3125 }
   3126 
   3127 PRE(sys_getcwd)
   3128 {
   3129    // Comment from linux/fs/dcache.c:
   3130    //   NOTE! The user-level library version returns a character pointer.
   3131    //   The kernel system call just returns the length of the buffer filled
   3132    //   (which includes the ending '\0' character), or a negative error
   3133    //   value.
   3134    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
   3135    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3136    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
   3137    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
   3138 }
   3139 
   3140 POST(sys_getcwd)
   3141 {
   3142    vg_assert(SUCCESS);
   3143    if (RES != (Addr)NULL)
   3144       POST_MEM_WRITE( ARG1, RES );
   3145 }
   3146 
   3147 PRE(sys_geteuid)
   3148 {
   3149    PRINT("sys_geteuid ( )");
   3150    PRE_REG_READ0(long, "geteuid");
   3151 }
   3152 
   3153 PRE(sys_getegid)
   3154 {
   3155    PRINT("sys_getegid ( )");
   3156    PRE_REG_READ0(long, "getegid");
   3157 }
   3158 
   3159 PRE(sys_getgid)
   3160 {
   3161    PRINT("sys_getgid ( )");
   3162    PRE_REG_READ0(long, "getgid");
   3163 }
   3164 
   3165 PRE(sys_getpid)
   3166 {
   3167    PRINT("sys_getpid ()");
   3168    PRE_REG_READ0(long, "getpid");
   3169 }
   3170 
   3171 PRE(sys_getpgid)
   3172 {
   3173    PRINT("sys_getpgid ( %ld )", ARG1);
   3174    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
   3175 }
   3176 
   3177 PRE(sys_getpgrp)
   3178 {
   3179    PRINT("sys_getpgrp ()");
   3180    PRE_REG_READ0(long, "getpgrp");
   3181 }
   3182 
   3183 PRE(sys_getppid)
   3184 {
   3185    PRINT("sys_getppid ()");
   3186    PRE_REG_READ0(long, "getppid");
   3187 }
   3188 
   3189 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
   3190 {
   3191    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
   3192 
   3193 #ifdef _RLIMIT_POSIX_FLAG
   3194    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
   3195    // Unset it here to make the switch case below work correctly.
   3196    a1 &= ~_RLIMIT_POSIX_FLAG;
   3197 #endif
   3198 
   3199    switch (a1) {
   3200    case VKI_RLIMIT_NOFILE:
   3201       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
   3202       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
   3203       break;
   3204 
   3205    case VKI_RLIMIT_DATA:
   3206       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
   3207       break;
   3208 
   3209    case VKI_RLIMIT_STACK:
   3210       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
   3211       break;
   3212    }
   3213 }
   3214 
   3215 PRE(sys_old_getrlimit)
   3216 {
   3217    PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
   3218    PRE_REG_READ2(long, "old_getrlimit",
   3219                  unsigned int, resource, struct rlimit *, rlim);
   3220    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3221 }
   3222 
   3223 POST(sys_old_getrlimit)
   3224 {
   3225    common_post_getrlimit(tid, ARG1, ARG2);
   3226 }
   3227 
   3228 PRE(sys_getrlimit)
   3229 {
   3230    PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
   3231    PRE_REG_READ2(long, "getrlimit",
   3232                  unsigned int, resource, struct rlimit *, rlim);
   3233    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3234 }
   3235 
   3236 POST(sys_getrlimit)
   3237 {
   3238    common_post_getrlimit(tid, ARG1, ARG2);
   3239 }
   3240 
   3241 PRE(sys_getrusage)
   3242 {
   3243    PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
   3244    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
   3245    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
   3246 }
   3247 
   3248 POST(sys_getrusage)
   3249 {
   3250    vg_assert(SUCCESS);
   3251    if (RES == 0)
   3252       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
   3253 }
   3254 
   3255 PRE(sys_gettimeofday)
   3256 {
   3257    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3258    PRE_REG_READ2(long, "gettimeofday",
   3259                  struct timeval *, tv, struct timezone *, tz);
   3260    // GrP fixme does darwin write to *tz anymore?
   3261    if (ARG1 != 0)
   3262       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
   3263    if (ARG2 != 0)
   3264       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3265 }
   3266 
   3267 POST(sys_gettimeofday)
   3268 {
   3269    vg_assert(SUCCESS);
   3270    if (RES == 0) {
   3271       if (ARG1 != 0)
   3272          POST_timeval_WRITE( ARG1 );
   3273       if (ARG2 != 0)
   3274 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
   3275    }
   3276 }
   3277 
   3278 PRE(sys_settimeofday)
   3279 {
   3280    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
   3281    PRE_REG_READ2(long, "settimeofday",
   3282                  struct timeval *, tv, struct timezone *, tz);
   3283    if (ARG1 != 0)
   3284       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
   3285    if (ARG2 != 0) {
   3286       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
   3287       /* maybe should warn if tz->tz_dsttime is non-zero? */
   3288    }
   3289 }
   3290 
   3291 PRE(sys_getuid)
   3292 {
   3293    PRINT("sys_getuid ( )");
   3294    PRE_REG_READ0(long, "getuid");
   3295 }
   3296 
   3297 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
   3298 {
   3299    /* We don't have any specific information on it, so
   3300       try to do something reasonable based on direction and
   3301       size bits.  The encoding scheme is described in
   3302       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3303 
   3304       According to Simon Hausmann, _IOC_READ means the kernel
   3305       writes a value to the ioctl value passed from the user
   3306       space and the other way around with _IOC_WRITE. */
   3307 
   3308    UInt dir  = _VKI_IOC_DIR(request);
   3309    UInt size = _VKI_IOC_SIZE(request);
   3310    if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
   3311       /*
   3312        * Be very lax about ioctl handling; the only
   3313        * assumption is that the size is correct. Doesn't
   3314        * require the full buffer to be initialized when
   3315        * writing.  Without this, using some device
   3316        * drivers with a large number of strange ioctl
   3317        * commands becomes very tiresome.
   3318        */
   3319    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
   3320       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
   3321       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3322       static Int moans = 3;
   3323       if (moans > 0 && !VG_(clo_xml)) {
   3324          moans--;
   3325          VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
   3326                    " with no size/direction hints\n", request);
   3327          VG_(umsg)("   This could cause spurious value errors to appear.\n");
   3328          VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
   3329                    "guidance on writing a proper wrapper.\n" );
   3330       }
   3331    } else {
   3332       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
   3333       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   3334       if ((dir & _VKI_IOC_WRITE) && size > 0)
   3335          PRE_MEM_READ( "ioctl(generic)", arg, size);
   3336       if ((dir & _VKI_IOC_READ) && size > 0)
   3337          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
   3338    }
   3339 }
   3340 
   3341 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
   3342 {
   3343    /* We don't have any specific information on it, so
   3344       try to do something reasonable based on direction and
   3345       size bits.  The encoding scheme is described in
   3346       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
   3347 
   3348       According to Simon Hausmann, _IOC_READ means the kernel
   3349       writes a value to the ioctl value passed from the user
   3350       space and the other way around with _IOC_WRITE. */
   3351 
   3352    UInt dir  = _VKI_IOC_DIR(request);
   3353    UInt size = _VKI_IOC_SIZE(request);
   3354    if (size > 0 && (dir & _VKI_IOC_READ)
   3355        && res == 0
   3356        && arg != (Addr)NULL)
   3357    {
   3358       POST_MEM_WRITE(arg, size);
   3359    }
   3360 }
   3361 
   3362 /*
   3363    If we're sending a SIGKILL to one of our own threads, then simulate
   3364    it rather than really sending the signal, so that the target thread
   3365    gets a chance to clean up.  Returns True if we did the killing (or
   3366    no killing is necessary), and False if the caller should use the
   3367    normal kill syscall.
   3368 
   3369    "pid" is any pid argument which can be passed to kill; group kills
   3370    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
   3371    they'll most likely hit all the threads and we won't need to worry
   3372    about cleanup.  In truth, we can't fully emulate these multicast
   3373    kills.
   3374 
   3375    "tgid" is a thread group id.  If it is not -1, then the target
   3376    thread must be in that thread group.
   3377  */
   3378 Bool ML_(do_sigkill)(Int pid, Int tgid)
   3379 {
   3380    ThreadState *tst;
   3381    ThreadId tid;
   3382 
   3383    if (pid <= 0)
   3384       return False;
   3385 
   3386    tid = VG_(lwpid_to_vgtid)(pid);
   3387    if (tid == VG_INVALID_THREADID)
   3388       return False;		/* none of our threads */
   3389 
   3390    tst = VG_(get_ThreadState)(tid);
   3391    if (tst == NULL || tst->status == VgTs_Empty)
   3392       return False;		/* hm, shouldn't happen */
   3393 
   3394    if (tgid != -1 && tst->os_state.threadgroup != tgid)
   3395       return False;		/* not the right thread group */
   3396 
   3397    /* Check to see that the target isn't already exiting. */
   3398    if (!VG_(is_exiting)(tid)) {
   3399       if (VG_(clo_trace_signals))
   3400 	 VG_(message)(Vg_DebugMsg,
   3401                       "Thread %d being killed with SIGKILL\n",
   3402                       tst->tid);
   3403 
   3404       tst->exitreason = VgSrc_FatalSig;
   3405       tst->os_state.fatalsig = VKI_SIGKILL;
   3406 
   3407       if (!VG_(is_running_thread)(tid))
   3408 	 VG_(get_thread_out_of_syscall)(tid);
   3409    }
   3410 
   3411    return True;
   3412 }
   3413 
   3414 PRE(sys_kill)
   3415 {
   3416    PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
   3417    PRE_REG_READ2(long, "kill", int, pid, int, sig);
   3418    if (!ML_(client_signal_OK)(ARG2)) {
   3419       SET_STATUS_Failure( VKI_EINVAL );
   3420       return;
   3421    }
   3422 
   3423    /* If we're sending SIGKILL, check to see if the target is one of
   3424       our threads and handle it specially. */
   3425    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
   3426       SET_STATUS_Success(0);
   3427    else
   3428       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
   3429          affecting how posix-compliant the call is.  I guess it is
   3430          harmless to pass the 3rd arg on other platforms; hence pass
   3431          it on all. */
   3432       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
   3433 
   3434    if (VG_(clo_trace_signals))
   3435       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
   3436 		   ARG2, ARG1);
   3437 
   3438    /* This kill might have given us a pending signal.  Ask for a check once
   3439       the syscall is done. */
   3440    *flags |= SfPollAfter;
   3441 }
   3442 
   3443 PRE(sys_link)
   3444 {
   3445    *flags |= SfMayBlock;
   3446    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   3447    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
   3448    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
   3449    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
   3450 }
   3451 
   3452 PRE(sys_newlstat)
   3453 {
   3454    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   3455    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
   3456    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
   3457    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
   3458 }
   3459 
   3460 POST(sys_newlstat)
   3461 {
   3462    vg_assert(SUCCESS);
   3463    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3464 }
   3465 
   3466 PRE(sys_mkdir)
   3467 {
   3468    *flags |= SfMayBlock;
   3469    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3470    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
   3471    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
   3472 }
   3473 
   3474 PRE(sys_mprotect)
   3475 {
   3476    PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   3477    PRE_REG_READ3(long, "mprotect",
   3478                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
   3479 
   3480    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
   3481       SET_STATUS_Failure( VKI_ENOMEM );
   3482    }
   3483 #if defined(VKI_PROT_GROWSDOWN)
   3484    else
   3485    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
   3486       /* Deal with mprotects on growable stack areas.
   3487 
   3488          The critical files to understand all this are mm/mprotect.c
   3489          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
   3490          glibc.
   3491 
   3492          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
   3493          round the start/end address of mprotect to the start/end of
   3494          the underlying vma and glibc uses that as an easy way to
   3495          change the protection of the stack by calling mprotect on the
   3496          last page of the stack with PROT_GROWSDOWN set.
   3497 
   3498          The sanity check provided by the kernel is that the vma must
   3499          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
   3500       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
   3501       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
   3502       NSegment const *rseg;
   3503 
   3504       vg_assert(aseg);
   3505 
   3506       if (grows == VKI_PROT_GROWSDOWN) {
   3507          rseg = VG_(am_next_nsegment)( (NSegment*)aseg, False/*backwards*/ );
   3508          if (rseg &&
   3509              rseg->kind == SkResvn &&
   3510              rseg->smode == SmUpper &&
   3511              rseg->end+1 == aseg->start) {
   3512             Addr end = ARG1 + ARG2;
   3513             ARG1 = aseg->start;
   3514             ARG2 = end - aseg->start;
   3515             ARG3 &= ~VKI_PROT_GROWSDOWN;
   3516          } else {
   3517             SET_STATUS_Failure( VKI_EINVAL );
   3518          }
   3519       } else if (grows == VKI_PROT_GROWSUP) {
   3520          rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
   3521          if (rseg &&
   3522              rseg->kind == SkResvn &&
   3523              rseg->smode == SmLower &&
   3524              aseg->end+1 == rseg->start) {
   3525             ARG2 = aseg->end - ARG1 + 1;
   3526             ARG3 &= ~VKI_PROT_GROWSUP;
   3527          } else {
   3528             SET_STATUS_Failure( VKI_EINVAL );
   3529          }
   3530       } else {
   3531          /* both GROWSUP and GROWSDOWN */
   3532          SET_STATUS_Failure( VKI_EINVAL );
   3533       }
   3534    }
   3535 #endif   // defined(VKI_PROT_GROWSDOWN)
   3536 }
   3537 
   3538 POST(sys_mprotect)
   3539 {
   3540    Addr a    = ARG1;
   3541    SizeT len = ARG2;
   3542    Int  prot = ARG3;
   3543 
   3544    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
   3545 }
   3546 
   3547 PRE(sys_munmap)
   3548 {
   3549    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
   3550    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
   3551    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
   3552 
   3553    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
   3554       SET_STATUS_Failure( VKI_EINVAL );
   3555 }
   3556 
   3557 POST(sys_munmap)
   3558 {
   3559    Addr  a   = ARG1;
   3560    SizeT len = ARG2;
   3561 
   3562    ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
   3563 }
   3564 
   3565 PRE(sys_mincore)
   3566 {
   3567    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
   3568    PRE_REG_READ3(long, "mincore",
   3569                  unsigned long, start, vki_size_t, length,
   3570                  unsigned char *, vec);
   3571    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3572 }
   3573 POST(sys_mincore)
   3574 {
   3575    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
   3576 }
   3577 
   3578 PRE(sys_nanosleep)
   3579 {
   3580    *flags |= SfMayBlock|SfPostOnFail;
   3581    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
   3582    PRE_REG_READ2(long, "nanosleep",
   3583                  struct timespec *, req, struct timespec *, rem);
   3584    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
   3585    if (ARG2 != 0)
   3586       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
   3587 }
   3588 
   3589 POST(sys_nanosleep)
   3590 {
   3591    vg_assert(SUCCESS || FAILURE);
   3592    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
   3593       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
   3594 }
   3595 
   3596 PRE(sys_open)
   3597 {
   3598    if (ARG2 & VKI_O_CREAT) {
   3599       // 3-arg version
   3600       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
   3601       PRE_REG_READ3(long, "open",
   3602                     const char *, filename, int, flags, int, mode);
   3603    } else {
   3604       // 2-arg version
   3605       PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
   3606       PRE_REG_READ2(long, "open",
   3607                     const char *, filename, int, flags);
   3608    }
   3609    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
   3610 
   3611 #if defined(VGO_linux)
   3612    /* Handle the case where the open is of /proc/self/cmdline or
   3613       /proc/<pid>/cmdline, and just give it a copy of the fd for the
   3614       fake file we cooked up at startup (in m_main).  Also, seek the
   3615       cloned fd back to the start. */
   3616    {
   3617       HChar  name[30];
   3618       Char*  arg1s = (Char*) ARG1;
   3619       SysRes sres;
   3620 
   3621       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
   3622       if (ML_(safe_to_deref)( arg1s, 1 ) &&
   3623           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
   3624          )
   3625       {
   3626          sres = VG_(dup)( VG_(cl_cmdline_fd) );
   3627          SET_STATUS_from_SysRes( sres );
   3628          if (!sr_isError(sres)) {
   3629             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
   3630             if (off < 0)
   3631                SET_STATUS_Failure( VKI_EMFILE );
   3632          }
   3633          return;
   3634       }
   3635    }
   3636 #endif // defined(VGO_linux)
   3637 
   3638    /* Otherwise handle normally */
   3639    *flags |= SfMayBlock;
   3640 }
   3641 
   3642 POST(sys_open)
   3643 {
   3644    vg_assert(SUCCESS);
   3645    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
   3646       VG_(close)(RES);
   3647       SET_STATUS_Failure( VKI_EMFILE );
   3648    } else {
   3649       if (VG_(clo_track_fds))
   3650          ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
   3651    }
   3652 }
   3653 
   3654 PRE(sys_read)
   3655 {
   3656    *flags |= SfMayBlock;
   3657    PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
   3658    PRE_REG_READ3(ssize_t, "read",
   3659                  unsigned int, fd, char *, buf, vki_size_t, count);
   3660 
   3661    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
   3662       SET_STATUS_Failure( VKI_EBADF );
   3663    else
   3664       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
   3665 }
   3666 
   3667 POST(sys_read)
   3668 {
   3669    vg_assert(SUCCESS);
   3670    POST_MEM_WRITE( ARG2, RES );
   3671 }
   3672 
   3673 PRE(sys_write)
   3674 {
   3675    Bool ok;
   3676    *flags |= SfMayBlock;
   3677    PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
   3678    PRE_REG_READ3(ssize_t, "write",
   3679                  unsigned int, fd, const char *, buf, vki_size_t, count);
   3680    /* check to see if it is allowed.  If not, try for an exemption from
   3681       --sim-hints=enable-outer (used for self hosting). */
   3682    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
   3683    if (!ok && ARG1 == 2/*stderr*/
   3684            && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
   3685       ok = True;
   3686    if (!ok)
   3687       SET_STATUS_Failure( VKI_EBADF );
   3688    else
   3689       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
   3690 }
   3691 
   3692 PRE(sys_creat)
   3693 {
   3694    *flags |= SfMayBlock;
   3695    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3696    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
   3697    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
   3698 }
   3699 
   3700 POST(sys_creat)
   3701 {
   3702    vg_assert(SUCCESS);
   3703    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
   3704       VG_(close)(RES);
   3705       SET_STATUS_Failure( VKI_EMFILE );
   3706    } else {
   3707       if (VG_(clo_track_fds))
   3708          ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
   3709    }
   3710 }
   3711 
   3712 PRE(sys_poll)
   3713 {
   3714    /* struct pollfd {
   3715         int fd;           -- file descriptor
   3716         short events;     -- requested events
   3717         short revents;    -- returned events
   3718       };
   3719       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
   3720    */
   3721    UInt i;
   3722    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   3723    *flags |= SfMayBlock;
   3724    PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
   3725    PRE_REG_READ3(long, "poll",
   3726                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
   3727 
   3728    for (i = 0; i < ARG2; i++) {
   3729       PRE_MEM_READ( "poll(ufds.fd)",
   3730                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
   3731       PRE_MEM_READ( "poll(ufds.events)",
   3732                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
   3733       PRE_MEM_WRITE( "poll(ufds.reventss)",
   3734                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   3735    }
   3736 }
   3737 
   3738 POST(sys_poll)
   3739 {
   3740    if (RES >= 0) {
   3741       UInt i;
   3742       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
   3743       for (i = 0; i < ARG2; i++)
   3744 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
   3745    }
   3746 }
   3747 
   3748 PRE(sys_readlink)
   3749 {
   3750    FUSE_COMPATIBLE_MAY_BLOCK();
   3751    Word saved = SYSNO;
   3752 
   3753    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
   3754    PRE_REG_READ3(long, "readlink",
   3755                  const char *, path, char *, buf, int, bufsiz);
   3756    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
   3757    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
   3758 
   3759    {
   3760 #if defined(VGO_linux)
   3761       /*
   3762        * Handle the case where readlink is looking at /proc/self/exe or
   3763        * /proc/<pid>/exe.
   3764        */
   3765       HChar name[25];
   3766       Char* arg1s = (Char*) ARG1;
   3767       VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
   3768       if (ML_(safe_to_deref)(arg1s, 1) &&
   3769           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
   3770          )
   3771       {
   3772          VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
   3773          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
   3774                                                          ARG2, ARG3));
   3775       } else
   3776 #endif // defined(VGO_linux)
   3777       {
   3778          /* Normal case */
   3779          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
   3780       }
   3781    }
   3782 
   3783    if (SUCCESS && RES > 0)
   3784       POST_MEM_WRITE( ARG2, RES );
   3785 }
   3786 
   3787 PRE(sys_readv)
   3788 {
   3789    Int i;
   3790    struct vki_iovec * vec;
   3791    *flags |= SfMayBlock;
   3792    PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
   3793    PRE_REG_READ3(ssize_t, "readv",
   3794                  unsigned long, fd, const struct iovec *, vector,
   3795                  unsigned long, count);
   3796    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
   3797       SET_STATUS_Failure( VKI_EBADF );
   3798    } else {
   3799       PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
   3800 
   3801       if (ARG2 != 0) {
   3802          /* ToDo: don't do any of the following if the vector is invalid */
   3803          vec = (struct vki_iovec *)ARG2;
   3804          for (i = 0; i < (Int)ARG3; i++)
   3805             PRE_MEM_WRITE( "readv(vector[...])",
   3806                            (Addr)vec[i].iov_base, vec[i].iov_len );
   3807       }
   3808    }
   3809 }
   3810 
   3811 POST(sys_readv)
   3812 {
   3813    vg_assert(SUCCESS);
   3814    if (RES > 0) {
   3815       Int i;
   3816       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
   3817       Int remains = RES;
   3818 
   3819       /* RES holds the number of bytes read. */
   3820       for (i = 0; i < (Int)ARG3; i++) {
   3821 	 Int nReadThisBuf = vec[i].iov_len;
   3822 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
   3823 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
   3824 	 remains -= nReadThisBuf;
   3825 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
   3826       }
   3827    }
   3828 }
   3829 
   3830 PRE(sys_rename)
   3831 {
   3832    FUSE_COMPATIBLE_MAY_BLOCK();
   3833    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   3834    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
   3835    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
   3836    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
   3837 }
   3838 
   3839 PRE(sys_rmdir)
   3840 {
   3841    *flags |= SfMayBlock;
   3842    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   3843    PRE_REG_READ1(long, "rmdir", const char *, pathname);
   3844    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
   3845 }
   3846 
   3847 PRE(sys_select)
   3848 {
   3849    *flags |= SfMayBlock;
   3850    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
   3851    PRE_REG_READ5(long, "select",
   3852                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
   3853                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
   3854    // XXX: this possibly understates how much memory is read.
   3855    if (ARG2 != 0)
   3856       PRE_MEM_READ( "select(readfds)",
   3857 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
   3858    if (ARG3 != 0)
   3859       PRE_MEM_READ( "select(writefds)",
   3860 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
   3861    if (ARG4 != 0)
   3862       PRE_MEM_READ( "select(exceptfds)",
   3863 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
   3864    if (ARG5 != 0)
   3865       PRE_timeval_READ( "select(timeout)", ARG5 );
   3866 }
   3867 
   3868 PRE(sys_setgid)
   3869 {
   3870    PRINT("sys_setgid ( %ld )", ARG1);
   3871    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
   3872 }
   3873 
   3874 PRE(sys_setsid)
   3875 {
   3876    PRINT("sys_setsid ( )");
   3877    PRE_REG_READ0(long, "setsid");
   3878 }
   3879 
   3880 PRE(sys_setgroups)
   3881 {
   3882    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
   3883    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
   3884    if (ARG1 > 0)
   3885       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
   3886 }
   3887 
   3888 PRE(sys_setpgid)
   3889 {
   3890    PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
   3891    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
   3892 }
   3893 
   3894 PRE(sys_setregid)
   3895 {
   3896    PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
   3897    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
   3898 }
   3899 
   3900 PRE(sys_setreuid)
   3901 {
   3902    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
   3903    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
   3904 }
   3905 
   3906 PRE(sys_setrlimit)
   3907 {
   3908    UWord arg1 = ARG1;
   3909    PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
   3910    PRE_REG_READ2(long, "setrlimit",
   3911                  unsigned int, resource, struct rlimit *, rlim);
   3912    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
   3913 
   3914 #ifdef _RLIMIT_POSIX_FLAG
   3915    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
   3916    // Unset it here to make the if statements below work correctly.
   3917    arg1 &= ~_RLIMIT_POSIX_FLAG;
   3918 #endif
   3919 
   3920    if (ARG2 &&
   3921        ((struct vki_rlimit *)ARG2)->rlim_cur > ((struct vki_rlimit *)ARG2)->rlim_max) {
   3922       SET_STATUS_Failure( VKI_EINVAL );
   3923    }
   3924    else if (arg1 == VKI_RLIMIT_NOFILE) {
   3925       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
   3926           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
   3927          SET_STATUS_Failure( VKI_EPERM );
   3928       }
   3929       else {
   3930          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
   3931          SET_STATUS_Success( 0 );
   3932       }
   3933    }
   3934    else if (arg1 == VKI_RLIMIT_DATA) {
   3935       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
   3936           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
   3937          SET_STATUS_Failure( VKI_EPERM );
   3938       }
   3939       else {
   3940          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
   3941          SET_STATUS_Success( 0 );
   3942       }
   3943    }
   3944    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
   3945       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
   3946           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
   3947          SET_STATUS_Failure( VKI_EPERM );
   3948       }
   3949       else {
   3950          VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
   3951          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
   3952          SET_STATUS_Success( 0 );
   3953       }
   3954    }
   3955 }
   3956 
   3957 PRE(sys_setuid)
   3958 {
   3959    PRINT("sys_setuid ( %ld )", ARG1);
   3960    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
   3961 }
   3962 
   3963 PRE(sys_newstat)
   3964 {
   3965    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   3966    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
   3967    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
   3968    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
   3969 }
   3970 
   3971 POST(sys_newstat)
   3972 {
   3973    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   3974 }
   3975 
   3976 PRE(sys_statfs)
   3977 {
   3978    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
   3979    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
   3980    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
   3981    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
   3982 }
   3983 POST(sys_statfs)
   3984 {
   3985    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   3986 }
   3987 
   3988 PRE(sys_statfs64)
   3989 {
   3990    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
   3991    PRE_REG_READ3(long, "statfs64",
   3992                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
   3993    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
   3994    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
   3995 }
   3996 POST(sys_statfs64)
   3997 {
   3998    POST_MEM_WRITE( ARG3, ARG2 );
   3999 }
   4000 
   4001 PRE(sys_symlink)
   4002 {
   4003    *flags |= SfMayBlock;
   4004    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
   4005    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
   4006    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
   4007    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
   4008 }
   4009 
   4010 PRE(sys_time)
   4011 {
   4012    /* time_t time(time_t *t); */
   4013    PRINT("sys_time ( %#lx )",ARG1);
   4014    PRE_REG_READ1(long, "time", int *, t);
   4015    if (ARG1 != 0) {
   4016       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
   4017    }
   4018 }
   4019 
   4020 POST(sys_time)
   4021 {
   4022    if (ARG1 != 0) {
   4023       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
   4024    }
   4025 }
   4026 
   4027 PRE(sys_times)
   4028 {
   4029    PRINT("sys_times ( %#lx )", ARG1);
   4030    PRE_REG_READ1(long, "times", struct tms *, buf);
   4031    if (ARG1 != 0) {
   4032       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
   4033    }
   4034 }
   4035 
   4036 POST(sys_times)
   4037 {
   4038    if (ARG1 != 0) {
   4039       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
   4040    }
   4041 }
   4042 
   4043 PRE(sys_umask)
   4044 {
   4045    PRINT("sys_umask ( %ld )", ARG1);
   4046    PRE_REG_READ1(long, "umask", int, mask);
   4047 }
   4048 
   4049 PRE(sys_unlink)
   4050 {
   4051    *flags |= SfMayBlock;
   4052    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
   4053    PRE_REG_READ1(long, "unlink", const char *, pathname);
   4054    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
   4055 }
   4056 
   4057 PRE(sys_newuname)
   4058 {
   4059    PRINT("sys_newuname ( %#lx )", ARG1);
   4060    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
   4061    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
   4062 }
   4063 
   4064 POST(sys_newuname)
   4065 {
   4066    if (ARG1 != 0) {
   4067       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
   4068    }
   4069 }
   4070 
   4071 PRE(sys_waitpid)
   4072 {
   4073    *flags |= SfMayBlock;
   4074    PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
   4075    PRE_REG_READ3(long, "waitpid",
   4076                  vki_pid_t, pid, unsigned int *, status, int, options);
   4077 
   4078    if (ARG2 != (Addr)NULL)
   4079       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
   4080 }
   4081 
   4082 POST(sys_waitpid)
   4083 {
   4084    if (ARG2 != (Addr)NULL)
   4085       POST_MEM_WRITE( ARG2, sizeof(int) );
   4086 }
   4087 
   4088 PRE(sys_wait4)
   4089 {
   4090    *flags |= SfMayBlock;
   4091    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
   4092 
   4093    PRE_REG_READ4(long, "wait4",
   4094                  vki_pid_t, pid, unsigned int *, status, int, options,
   4095                  struct rusage *, rusage);
   4096    if (ARG2 != (Addr)NULL)
   4097       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
   4098    if (ARG4 != (Addr)NULL)
   4099       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
   4100 }
   4101 
   4102 POST(sys_wait4)
   4103 {
   4104    if (ARG2 != (Addr)NULL)
   4105       POST_MEM_WRITE( ARG2, sizeof(int) );
   4106    if (ARG4 != (Addr)NULL)
   4107       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
   4108 }
   4109 
   4110 PRE(sys_writev)
   4111 {
   4112    Int i;
   4113    struct vki_iovec * vec;
   4114    *flags |= SfMayBlock;
   4115    PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
   4116    PRE_REG_READ3(ssize_t, "writev",
   4117                  unsigned long, fd, const struct iovec *, vector,
   4118                  unsigned long, count);
   4119    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
   4120       SET_STATUS_Failure( VKI_EBADF );
   4121    } else {
   4122       PRE_MEM_READ( "writev(vector)",
   4123 		     ARG2, ARG3 * sizeof(struct vki_iovec) );
   4124       if (ARG2 != 0) {
   4125          /* ToDo: don't do any of the following if the vector is invalid */
   4126          vec = (struct vki_iovec *)ARG2;
   4127          for (i = 0; i < (Int)ARG3; i++)
   4128             PRE_MEM_READ( "writev(vector[...])",
   4129                            (Addr)vec[i].iov_base, vec[i].iov_len );
   4130       }
   4131    }
   4132 }
   4133 
   4134 PRE(sys_utimes)
   4135 {
   4136    FUSE_COMPATIBLE_MAY_BLOCK();
   4137    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
   4138    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
   4139    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
   4140    if (ARG2 != 0) {
   4141       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
   4142       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
   4143    }
   4144 }
   4145 
   4146 PRE(sys_acct)
   4147 {
   4148    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
   4149    PRE_REG_READ1(long, "acct", const char *, filename);
   4150    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
   4151 }
   4152 
   4153 PRE(sys_pause)
   4154 {
   4155    *flags |= SfMayBlock;
   4156    PRINT("sys_pause ( )");
   4157    PRE_REG_READ0(long, "pause");
   4158 }
   4159 
   4160 PRE(sys_sigaltstack)
   4161 {
   4162    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
   4163    PRE_REG_READ2(int, "sigaltstack",
   4164                  const vki_stack_t *, ss, vki_stack_t *, oss);
   4165    if (ARG1 != 0) {
   4166       const vki_stack_t *ss = (vki_stack_t *)ARG1;
   4167       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
   4168       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
   4169       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
   4170    }
   4171    if (ARG2 != 0) {
   4172       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
   4173    }
   4174 
   4175    SET_STATUS_from_SysRes(
   4176       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
   4177                               (vki_stack_t*)ARG2)
   4178    );
   4179 }
   4180 POST(sys_sigaltstack)
   4181 {
   4182    vg_assert(SUCCESS);
   4183    if (RES == 0 && ARG2 != 0)
   4184       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
   4185 }
   4186 
   4187 #undef PRE
   4188 #undef POST
   4189 
   4190 #endif // defined(VGO_linux) || defined(VGO_darwin)
   4191 
   4192 /*--------------------------------------------------------------------*/
   4193 /*--- end                                                          ---*/
   4194 /*--------------------------------------------------------------------*/
   4195 
   4196