Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Wrappers for generic (non-AIX5!) Unix system calls           ---*/
      4 /*---                                            syswrap-generic.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2000-2010 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #if defined(VGO_linux) || defined(VGO_darwin)
     33 
     34 #include "pub_core_basics.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_vkiscnums.h"
     37 #include "pub_core_threadstate.h"
     38 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
     39 #include "pub_core_aspacemgr.h"
     40 #include "pub_core_transtab.h"      // VG_(discard_translations)
     41 #include "pub_core_xarray.h"
     42 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
     43 #include "pub_core_debuglog.h"
     44 #include "pub_core_errormgr.h"
     45 #include "pub_core_libcbase.h"
     46 #include "pub_core_libcassert.h"
     47 #include "pub_core_libcfile.h"
     48 #include "pub_core_libcprint.h"
     49 #include "pub_core_libcproc.h"
     50 #include "pub_core_libcsignal.h"
     51 #include "pub_core_machine.h"       // VG_(get_SP)
     52 #include "pub_core_mallocfree.h"
     53 #include "pub_core_options.h"
     54 #include "pub_core_scheduler.h"
     55 #include "pub_core_signals.h"
     56 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     57 #include "pub_core_syscall.h"
     58 #include "pub_core_syswrap.h"
     59 #include "pub_core_tooliface.h"
     60 #include "pub_core_ume.h"
     61 
     62 #include "priv_types_n_macros.h"
     63 #include "priv_syswrap-generic.h"
     64 
     65 #include "config.h"
     66 
     67 
     68 /* Returns True iff address range is something the client can
     69    plausibly mess with: all of it is either already belongs to the
     70    client or is free or a reservation. */
     71 
     72 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
     73                                    const Char *syscallname)
     74 {
     75    Bool ret;
     76 
     77    if (size == 0)
     78       return True;
     79 
     80    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
     81             (start,size,VKI_PROT_NONE);
     82 
     83    if (0)
     84       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
     85 		  syscallname, start, start+size-1, (Int)ret);
     86 
     87    if (!ret && syscallname != NULL) {
     88       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
     89                                "to modify addresses %#lx-%#lx\n",
     90                                syscallname, start, start+size-1);
     91       if (VG_(clo_verbosity) > 1) {
     92          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
     93       }
     94    }
     95 
     96    return ret;
     97 }
     98 
     99 
    100 Bool ML_(client_signal_OK)(Int sigNo)
    101 {
    102    /* signal 0 is OK for kill */
    103    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
    104 
    105    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
    106 
    107    return ret;
    108 }
    109 
    110 
    111 /* Handy small function to help stop wrappers from segfaulting when
    112    presented with bogus client addresses.  Is not used for generating
    113    user-visible errors. */
    114 
    115 Bool ML_(safe_to_deref) ( void* start, SizeT size )
    116 {
    117    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
    118 }
    119 
    120 
    121 /* ---------------------------------------------------------------------
    122    Doing mmap, mremap
    123    ------------------------------------------------------------------ */
    124 
    125 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
    126    munmap, mprotect (and mremap??) work at the page level.  So addresses
    127    and lengths must be adjusted for this. */
    128 
    129 /* Mash around start and length so that the area exactly covers
    130    an integral number of pages.  If we don't do that, memcheck's
    131    idea of addressible memory diverges from that of the
    132    kernel's, which causes the leak detector to crash. */
    133 static
    134 void page_align_addr_and_len( Addr* a, SizeT* len)
    135 {
    136    Addr ra;
    137 
    138    ra = VG_PGROUNDDN(*a);
    139    *len = VG_PGROUNDUP(*a + *len) - ra;
    140    *a = ra;
    141 }
    142 
    143 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
    144                                 UInt flags, Int fd, Off64T offset)
    145 {
    146    Bool d;
    147 
    148    /* 'a' is the return value from a real kernel mmap, hence: */
    149    vg_assert(VG_IS_PAGE_ALIGNED(a));
    150    /* whereas len is whatever the syscall supplied.  So: */
    151    len = VG_PGROUNDUP(len);
    152 
    153    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
    154 
    155    if (d)
    156       VG_(discard_translations)( (Addr64)a, (ULong)len,
    157                                  "notify_core_of_mmap" );
    158 }
    159 
    160 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
    161 {
    162    SizeT fourgig = (1ULL << 32);
    163    SizeT guardpage = 10 * fourgig;
    164    Bool rr, ww, xx;
    165 
    166    /* 'a' is the return value from a real kernel mmap, hence: */
    167    vg_assert(VG_IS_PAGE_ALIGNED(a));
    168    /* whereas len is whatever the syscall supplied.  So: */
    169    len = VG_PGROUNDUP(len);
    170 
    171    rr = toBool(prot & VKI_PROT_READ);
    172    ww = toBool(prot & VKI_PROT_WRITE);
    173    xx = toBool(prot & VKI_PROT_EXEC);
    174 
    175 #ifdef VGA_amd64
    176    if (len >= fourgig + 2 * guardpage) {
    177      VG_(printf)("Valgrind: ignoring NaCl's mmap(84G)\n");
    178      return;
    179    }
    180 #endif  // VGA_amd64
    181    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
    182 }
    183 
    184 
    185 /* When a client mmap has been successfully done, this function must
    186    be called.  It notifies both aspacem and the tool of the new
    187    mapping.
    188 
    189    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
    190    it is called from is POST(sys_io_setup).  In particular,
    191    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
    192    client mmap.  But it doesn't call this function; instead it does the
    193    relevant notifications itself.  Here, we just pass di_handle=0 to
    194    notify_tool_of_mmap as we have no better information.  But really this
    195    function should be done away with; problem is I don't understand what
    196    POST(sys_io_setup) does or how it works.
    197 
    198    [However, this function is used lots for Darwin, because
    199     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
    200  */
    201 void
    202 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
    203                                     UInt flags, Int fd, Off64T offset )
    204 {
    205    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
    206    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
    207    // Should it?  --njn
    208    notify_core_of_mmap(a, len, prot, flags, fd, offset);
    209    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
    210 }
    211 
    212 void
    213 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
    214 {
    215    Bool d;
    216 
    217    page_align_addr_and_len(&a, &len);
    218    d = VG_(am_notify_munmap)(a, len);
    219    VG_TRACK( die_mem_munmap, a, len );
    220    VG_(di_notify_munmap)( a, len );
    221    if (d)
    222       VG_(discard_translations)( (Addr64)a, (ULong)len,
    223                                  "ML_(notify_core_and_tool_of_munmap)" );
    224 }
    225 
    226 void
    227 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
    228 {
    229    Bool rr = toBool(prot & VKI_PROT_READ);
    230    Bool ww = toBool(prot & VKI_PROT_WRITE);
    231    Bool xx = toBool(prot & VKI_PROT_EXEC);
    232    Bool d;
    233 
    234    page_align_addr_and_len(&a, &len);
    235    d = VG_(am_notify_mprotect)(a, len, prot);
    236    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
    237    VG_(di_notify_mprotect)( a, len, prot );
    238    if (d)
    239       VG_(discard_translations)( (Addr64)a, (ULong)len,
    240                                  "ML_(notify_core_and_tool_of_mprotect)" );
    241 }
    242 
    243 
    244 
    245 #if HAVE_MREMAP
    246 /* Expand (or shrink) an existing mapping, potentially moving it at
    247    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
    248 */
    249 static
    250 SysRes do_mremap( Addr old_addr, SizeT old_len,
    251                   Addr new_addr, SizeT new_len,
    252                   UWord flags, ThreadId tid )
    253 {
    254 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
    255 
    256    Bool      ok, d;
    257    NSegment const* old_seg;
    258    Addr      advised;
    259    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
    260    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
    261 
    262    if (0)
    263       VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
    264                   old_addr,old_len,new_addr,new_len,
    265                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
    266                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
    267    if (0)
    268       VG_(am_show_nsegments)(0, "do_remap: before");
    269 
    270    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
    271       goto eINVAL;
    272 
    273    if (!VG_IS_PAGE_ALIGNED(old_addr))
    274       goto eINVAL;
    275 
    276    old_len = VG_PGROUNDUP(old_len);
    277    new_len = VG_PGROUNDUP(new_len);
    278 
    279    if (new_len == 0)
    280       goto eINVAL;
    281 
    282    /* kernel doesn't reject this, but we do. */
    283    if (old_len == 0)
    284       goto eINVAL;
    285 
    286    /* reject wraparounds */
    287    if (old_addr + old_len < old_addr
    288        || new_addr + new_len < new_len)
    289       goto eINVAL;
    290 
    291    /* kernel rejects all fixed, no-move requests (which are
    292       meaningless). */
    293    if (f_fixed == True && f_maymove == False)
    294       goto eINVAL;
    295 
    296    /* Stay away from non-client areas. */
    297    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
    298       goto eINVAL;
    299 
    300    /* In all remaining cases, if the old range does not fall within a
    301       single segment, fail. */
    302    old_seg = VG_(am_find_nsegment)( old_addr );
    303    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
    304       goto eINVAL;
    305    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
    306       goto eINVAL;
    307 
    308    vg_assert(old_len > 0);
    309    vg_assert(new_len > 0);
    310    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
    311    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
    312    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
    313 
    314    /* There are 3 remaining cases:
    315 
    316       * maymove == False
    317 
    318         new space has to be at old address, so:
    319             - shrink    -> unmap end
    320             - same size -> do nothing
    321             - grow      -> if can grow in-place, do so, else fail
    322 
    323       * maymove == True, fixed == False
    324 
    325         new space can be anywhere, so:
    326             - shrink    -> unmap end
    327             - same size -> do nothing
    328             - grow      -> if can grow in-place, do so, else
    329                            move to anywhere large enough, else fail
    330 
    331       * maymove == True, fixed == True
    332 
    333         new space must be at new address, so:
    334 
    335             - if new address is not page aligned, fail
    336             - if new address range overlaps old one, fail
    337             - if new address range cannot be allocated, fail
    338             - else move to new address range with new size
    339             - else fail
    340    */
    341 
    342    if (f_maymove == False) {
    343       /* new space has to be at old address */
    344       if (new_len < old_len)
    345          goto shrink_in_place;
    346       if (new_len > old_len)
    347          goto grow_in_place_or_fail;
    348       goto same_in_place;
    349    }
    350 
    351    if (f_maymove == True && f_fixed == False) {
    352       /* new space can be anywhere */
    353       if (new_len < old_len)
    354          goto shrink_in_place;
    355       if (new_len > old_len)
    356          goto grow_in_place_or_move_anywhere_or_fail;
    357       goto same_in_place;
    358    }
    359 
    360    if (f_maymove == True && f_fixed == True) {
    361       /* new space can only be at the new address */
    362       if (!VG_IS_PAGE_ALIGNED(new_addr))
    363          goto eINVAL;
    364       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
    365          /* no overlap */
    366       } else {
    367          goto eINVAL;
    368       }
    369       if (new_addr == 0)
    370          goto eINVAL;
    371          /* VG_(am_get_advisory_client_simple) interprets zero to mean
    372             non-fixed, which is not what we want */
    373       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
    374       if (!ok || advised != new_addr)
    375          goto eNOMEM;
    376       ok = VG_(am_relocate_nooverlap_client)
    377               ( &d, old_addr, old_len, new_addr, new_len );
    378       if (ok) {
    379          VG_TRACK( copy_mem_remap, old_addr, new_addr,
    380                                    MIN_SIZET(old_len,new_len) );
    381          if (new_len > old_len)
    382             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
    383                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
    384                       0/*di_handle*/ );
    385          VG_TRACK(die_mem_munmap, old_addr, old_len);
    386          if (d) {
    387             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
    388             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
    389          }
    390          return VG_(mk_SysRes_Success)( new_addr );
    391       }
    392       goto eNOMEM;
    393    }
    394 
    395    /* end of the 3 cases */
    396    /*NOTREACHED*/ vg_assert(0);
    397 
    398   grow_in_place_or_move_anywhere_or_fail:
    399    {
    400    /* try growing it in-place */
    401    Addr   needA = old_addr + old_len;
    402    SSizeT needL = new_len - old_len;
    403 
    404    vg_assert(needL > 0);
    405    if (needA == 0)
    406       goto eINVAL;
    407       /* VG_(am_get_advisory_client_simple) interprets zero to mean
    408          non-fixed, which is not what we want */
    409    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    410    if (ok) {
    411       /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
    412          this-or-nothing) is too lenient, and may allow us to trash
    413          the next segment along.  So make very sure that the proposed
    414          new area really is free.  This is perhaps overly
    415          conservative, but it fixes #129866. */
    416       NSegment const* segLo = VG_(am_find_nsegment)( needA );
    417       NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
    418       if (segLo == NULL || segHi == NULL
    419           || segLo != segHi || segLo->kind != SkFree)
    420          ok = False;
    421    }
    422    if (ok && advised == needA) {
    423       ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
    424       if (ok) {
    425          VG_TRACK( new_mem_mmap, needA, needL,
    426                                  old_seg->hasR,
    427                                  old_seg->hasW, old_seg->hasX,
    428                                  0/*di_handle*/ );
    429          if (d)
    430             VG_(discard_translations)( needA, needL, "do_remap(3)" );
    431          return VG_(mk_SysRes_Success)( old_addr );
    432       }
    433    }
    434 
    435    /* that failed.  Look elsewhere. */
    436    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
    437    if (ok) {
    438       Bool oldR = old_seg->hasR;
    439       Bool oldW = old_seg->hasW;
    440       Bool oldX = old_seg->hasX;
    441       /* assert new area does not overlap old */
    442       vg_assert(advised+new_len-1 < old_addr
    443                 || advised > old_addr+old_len-1);
    444       ok = VG_(am_relocate_nooverlap_client)
    445               ( &d, old_addr, old_len, advised, new_len );
    446       if (ok) {
    447          VG_TRACK( copy_mem_remap, old_addr, advised,
    448                                    MIN_SIZET(old_len,new_len) );
    449          if (new_len > old_len)
    450             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
    451                       oldR, oldW, oldX, 0/*di_handle*/ );
    452          VG_TRACK(die_mem_munmap, old_addr, old_len);
    453          if (d) {
    454             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
    455             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
    456          }
    457          return VG_(mk_SysRes_Success)( advised );
    458       }
    459    }
    460    goto eNOMEM;
    461    }
    462    /*NOTREACHED*/ vg_assert(0);
    463 
    464   grow_in_place_or_fail:
    465    {
    466    Addr  needA = old_addr + old_len;
    467    SizeT needL = new_len - old_len;
    468    if (needA == 0)
    469       goto eINVAL;
    470       /* VG_(am_get_advisory_client_simple) interprets zero to mean
    471          non-fixed, which is not what we want */
    472    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
    473    if (ok) {
    474       /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
    475          this-or-nothing) is too lenient, and may allow us to trash
    476          the next segment along.  So make very sure that the proposed
    477          new area really is free. */
    478       NSegment const* segLo = VG_(am_find_nsegment)( needA );
    479       NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
    480       if (segLo == NULL || segHi == NULL
    481           || segLo != segHi || segLo->kind != SkFree)
    482          ok = False;
    483    }
    484    if (!ok || advised != needA)
    485       goto eNOMEM;
    486    ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
    487    if (!ok)
    488       goto eNOMEM;
    489    VG_TRACK( new_mem_mmap, needA, needL,
    490                            old_seg->hasR, old_seg->hasW, old_seg->hasX,
    491                            0/*di_handle*/ );
    492    if (d)
    493       VG_(discard_translations)( needA, needL, "do_remap(6)" );
    494    return VG_(mk_SysRes_Success)( old_addr );
    495    }
    496    /*NOTREACHED*/ vg_assert(0);
    497 
    498   shrink_in_place:
    499    {
    500    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
    501    if (sr_isError(sres))
    502       return sres;
    503    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
    504    if (d)
    505       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
    506                                  "do_remap(7)" );
    507    return VG_(mk_SysRes_Success)( old_addr );
    508    }
    509    /*NOTREACHED*/ vg_assert(0);
    510 
    511   same_in_place:
    512    return VG_(mk_SysRes_Success)( old_addr );
    513    /*NOTREACHED*/ vg_assert(0);
    514 
    515   eINVAL:
    516    return VG_(mk_SysRes_Error)( VKI_EINVAL );
    517   eNOMEM:
    518    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
    519 
    520 #  undef MIN_SIZET
    521 }
    522 #endif /* HAVE_MREMAP */
    523 
    524 
    525 /* ---------------------------------------------------------------------
    526    File-descriptor tracking
    527    ------------------------------------------------------------------ */
    528 
    529 /* One of these is allocated for each open file descriptor.  */
    530 typedef struct OpenFd
    531 {
    532    Int fd;                        /* The file descriptor */
    533    Char *pathname;                /* NULL if not a regular file or unknown */
    534    ExeContext *where;             /* NULL if inherited from parent */
    535    struct OpenFd *next, *prev;
    536 } OpenFd;
    537 
    538 /* List of allocated file descriptors. */
    539 static OpenFd *allocated_fds = NULL;
    540 
    541 /* Count of open file descriptors. */
    542 static Int fd_count = 0;
    543 
    544 
    545 /* Note the fact that a file descriptor was just closed. */
    546 static
    547 void record_fd_close(Int fd)
    548 {
    549    OpenFd *i = allocated_fds;
    550 
    551    if (fd >= VG_(fd_hard_limit))
    552       return;			/* Valgrind internal */
    553 
    554    while(i) {
    555       if(i->fd == fd) {
    556          if(i->prev)
    557             i->prev->next = i->next;
    558          else
    559             allocated_fds = i->next;
    560          if(i->next)
    561             i->next->prev = i->prev;
    562          if(i->pathname)
    563             VG_(arena_free) (VG_AR_CORE, i->pathname);
    564          VG_(arena_free) (VG_AR_CORE, i);
    565          fd_count--;
    566          break;
    567       }
    568       i = i->next;
    569    }
    570 }
    571 
    572 /* Note the fact that a file descriptor was just opened.  If the
    573    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
    574    this either indicates a non-standard file (i.e. a pipe or socket or
    575    some such thing) or that we don't know the filename.  If the fd is
    576    already open, then we're probably doing a dup2() to an existing fd,
    577    so just overwrite the existing one. */
    578 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
    579 {
    580    OpenFd *i;
    581 
    582    if (fd >= VG_(fd_hard_limit))
    583       return;			/* Valgrind internal */
    584 
    585    /* Check to see if this fd is already open. */
    586    i = allocated_fds;
    587    while (i) {
    588       if (i->fd == fd) {
    589          if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
    590          break;
    591       }
    592       i = i->next;
    593    }
    594 
    595    /* Not already one: allocate an OpenFd */
    596    if (i == NULL) {
    597       i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
    598 
    599       i->prev = NULL;
    600       i->next = allocated_fds;
    601       if(allocated_fds) allocated_fds->prev = i;
    602       allocated_fds = i;
    603       fd_count++;
    604    }
    605 
    606    i->fd = fd;
    607    i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
    608    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
    609 }
    610 
    611 // Record opening of an fd, and find its name.
    612 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
    613 {
    614    static HChar buf[VKI_PATH_MAX];
    615    Char* name;
    616    if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
    617       name = buf;
    618    else
    619       name = NULL;
    620 
    621    ML_(record_fd_open_with_given_name)(tid, fd, name);
    622 }
    623 
    624 // Record opening of a nameless fd.
    625 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
    626 {
    627    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
    628 }
    629 
    630 static
    631 Char *unix2name(struct vki_sockaddr_un *sa, UInt len, Char *name)
    632 {
    633    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
    634       VG_(sprintf)(name, "<unknown>");
    635    } else {
    636       VG_(sprintf)(name, "%s", sa->sun_path);
    637    }
    638 
    639    return name;
    640 }
    641 
    642 static
    643 Char *inet2name(struct vki_sockaddr_in *sa, UInt len, Char *name)
    644 {
    645    if (sa == NULL || len == 0) {
    646       VG_(sprintf)(name, "<unknown>");
    647    } else {
    648       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
    649       if (addr == 0) {
    650          VG_(sprintf)(name, "<unbound>");
    651       } else {
    652          VG_(sprintf)(name, "%u.%u.%u.%u:%u",
    653                       (addr>>24) & 0xFF, (addr>>16) & 0xFF,
    654                       (addr>>8) & 0xFF, addr & 0xFF,
    655                       VG_(ntohs)(sa->sin_port));
    656       }
    657    }
    658 
    659    return name;
    660 }
    661 
    662 /*
    663  * Try get some details about a socket.
    664  */
    665 static void
    666 getsockdetails(Int fd)
    667 {
    668    union u {
    669       struct vki_sockaddr a;
    670       struct vki_sockaddr_in in;
    671       struct vki_sockaddr_un un;
    672    } laddr;
    673    UInt llen;
    674 
    675    llen = sizeof(laddr);
    676    VG_(memset)(&laddr, 0, llen);
    677 
    678    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
    679       switch(laddr.a.sa_family) {
    680       case VKI_AF_INET: {
    681          static char lname[32];
    682          static char pname[32];
    683          struct vki_sockaddr_in paddr;
    684          UInt plen = sizeof(struct vki_sockaddr_in);
    685 
    686          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
    687             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
    688                          inet2name(&(laddr.in), llen, lname),
    689                          inet2name(&paddr, plen, pname));
    690          } else {
    691             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
    692                          fd, inet2name(&(laddr.in), llen, lname));
    693          }
    694          return;
    695          }
    696       case VKI_AF_UNIX: {
    697          static char lname[256];
    698          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
    699                       unix2name(&(laddr.un), llen, lname));
    700          return;
    701          }
    702       default:
    703          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
    704                       laddr.a.sa_family, fd);
    705          return;
    706       }
    707    }
    708 
    709    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
    710 }
    711 
    712 
    713 /* Dump out a summary, and a more detailed list, of open file descriptors. */
    714 void VG_(show_open_fds) (void)
    715 {
    716    OpenFd *i = allocated_fds;
    717 
    718    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open at exit.\n", fd_count);
    719 
    720    while (i) {
    721       if (i->pathname) {
    722          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
    723                       i->pathname);
    724       } else {
    725          Int val;
    726          UInt len = sizeof(val);
    727 
    728          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
    729              == -1) {
    730             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
    731          } else {
    732             getsockdetails(i->fd);
    733          }
    734       }
    735 
    736       if(i->where) {
    737          VG_(pp_ExeContext)(i->where);
    738          VG_(message)(Vg_UserMsg, "\n");
    739       } else {
    740          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
    741          VG_(message)(Vg_UserMsg, "\n");
    742       }
    743 
    744       i = i->next;
    745    }
    746 
    747    VG_(message)(Vg_UserMsg, "\n");
    748 }
    749 
    750 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
    751    have /proc support compiled in, or a non-Linux kernel), then we need to
    752    find out what file descriptors we inherited from our parent process the
    753    hard way - by checking each fd in turn. */
    754 static
    755 void init_preopened_fds_without_proc_self_fd(void)
    756 {
    757    struct vki_rlimit lim;
    758    UInt count;
    759    Int i;
    760 
    761    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
    762       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
    763          an arbitrarily high number.  1024 happens to be the limit in
    764          the 2.4 Linux kernels. */
    765       count = 1024;
    766    } else {
    767       count = lim.rlim_cur;
    768    }
    769 
    770    for (i = 0; i < count; i++)
    771       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
    772          ML_(record_fd_open_named)(-1, i);
    773 }
    774 
    775 /* Initialize the list of open file descriptors with the file descriptors
    776    we inherited from out parent process. */
    777 
    778 void VG_(init_preopened_fds)(void)
    779 {
    780 // Nb: AIX5 is handled in syswrap-aix5.c.
    781 // DDD: should probably use HAVE_PROC here or similar, instead.
    782 #if defined(VGO_linux)
    783    Int ret;
    784    struct vki_dirent d;
    785    SysRes f;
    786 
    787    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
    788    if (sr_isError(f)) {
    789       init_preopened_fds_without_proc_self_fd();
    790       return;
    791    }
    792 
    793    while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
    794       if (ret == -1)
    795          goto out;
    796 
    797       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
    798          Char* s;
    799          Int fno = VG_(strtoll10)(d.d_name, &s);
    800          if (*s == '\0') {
    801             if (fno != sr_Res(f))
    802                if (VG_(clo_track_fds))
    803                   ML_(record_fd_open_named)(-1, fno);
    804          } else {
    805             VG_(message)(Vg_DebugMsg,
    806                "Warning: invalid file name in /proc/self/fd: %s\n",
    807                d.d_name);
    808          }
    809       }
    810 
    811       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
    812    }
    813 
    814   out:
    815    VG_(close)(sr_Res(f));
    816 
    817 #elif defined(VGO_darwin)
    818    init_preopened_fds_without_proc_self_fd();
    819 
    820 #else
    821 #  error Unknown OS
    822 #endif
    823 }
    824 
    825 static
    826 Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
    827 {
    828    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
    829    Char *result = VG_(arena_malloc) ( aid, cc, len );
    830    VG_(strcpy) ( result, s1 );
    831    VG_(strcat) ( result, s2 );
    832    return result;
    833 }
    834 
    835 static
    836 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
    837                             Char *msg, Addr base, SizeT size )
    838 {
    839    Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
    840                               "socketcall.sendmsg", msg, VG_AR_CORE );
    841    PRE_MEM_READ( outmsg, base, size );
    842    VG_(arena_free) ( VG_AR_CORE, outmsg );
    843 }
    844 
    845 static
    846 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
    847                              Char *msg, Addr base, SizeT size )
    848 {
    849    Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
    850                               "socketcall.recvmsg", msg, VG_AR_CORE );
    851    if ( read )
    852       PRE_MEM_READ( outmsg, base, size );
    853    else
    854       PRE_MEM_WRITE( outmsg, base, size );
    855    VG_(arena_free) ( VG_AR_CORE, outmsg );
    856 }
    857 
    858 static
    859 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
    860                               Char *fieldName, Addr base, SizeT size )
    861 {
    862    if ( !read )
    863       POST_MEM_WRITE( base, size );
    864 }
    865 
    866 static
    867 void msghdr_foreachfield (
    868         ThreadId tid,
    869         struct vki_msghdr *msg,
    870         void (*foreach_func)( ThreadId, Bool, Char *, Addr, SizeT )
    871      )
    872 {
    873    if ( !msg )
    874       return;
    875 
    876    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
    877    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
    878    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
    879    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
    880    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
    881    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
    882    foreach_func ( tid, False, "(msg)", (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
    883 
    884    if ( msg->msg_name )
    885       foreach_func ( tid, False,
    886                      "(msg.msg_name)",
    887                      (Addr)msg->msg_name, msg->msg_namelen );
    888 
    889    if ( msg->msg_iov ) {
    890       struct vki_iovec *iov = msg->msg_iov;
    891       UInt i;
    892 
    893       foreach_func ( tid, True,
    894                      "(msg.msg_iov)",
    895                      (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
    896 
    897       for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
    898          foreach_func ( tid, False,
    899                         "(msg.msg_iov[i])",
    900                         (Addr)iov->iov_base, iov->iov_len );
    901    }
    902 
    903    if ( msg->msg_control )
    904       foreach_func ( tid, False,
    905                      "(msg.msg_control)",
    906                      (Addr)msg->msg_control, msg->msg_controllen );
    907 }
    908 
    909 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
    910 {
    911    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
    912 
    913    while (cm) {
    914       if (cm->cmsg_level == VKI_SOL_SOCKET &&
    915           cm->cmsg_type == VKI_SCM_RIGHTS ) {
    916          Int *fds = (Int *) VKI_CMSG_DATA(cm);
    917          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
    918                          / sizeof(int);
    919          Int i;
    920 
    921          for (i = 0; i < fdc; i++)
    922             if(VG_(clo_track_fds))
    923                // XXX: must we check the range on these fds with
    924                //      ML_(fd_allowed)()?
    925                ML_(record_fd_open_named)(tid, fds[i]);
    926       }
    927 
    928       cm = VKI_CMSG_NXTHDR(msg, cm);
    929    }
    930 }
    931 
    932 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
    933 static
    934 void pre_mem_read_sockaddr ( ThreadId tid,
    935                              Char *description,
    936                              struct vki_sockaddr *sa, UInt salen )
    937 {
    938    Char *outmsg;
    939    struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
    940    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
    941    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
    942 
    943    /* NULL/zero-length sockaddrs are legal */
    944    if ( sa == NULL || salen == 0 ) return;
    945 
    946    outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
    947                                 VG_(strlen)( description ) + 30 );
    948 
    949    VG_(sprintf) ( outmsg, description, "sa_family" );
    950    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
    951 
    952    switch (sa->sa_family) {
    953 
    954       case VKI_AF_UNIX:
    955          VG_(sprintf) ( outmsg, description, "sun_path" );
    956          PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
    957          // GrP fixme max of sun_len-2? what about nul char?
    958          break;
    959 
    960       case VKI_AF_INET:
    961          VG_(sprintf) ( outmsg, description, "sin_port" );
    962          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
    963          VG_(sprintf) ( outmsg, description, "sin_addr" );
    964          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
    965          break;
    966 
    967       case VKI_AF_INET6:
    968          VG_(sprintf) ( outmsg, description, "sin6_port" );
    969          PRE_MEM_READ( outmsg,
    970             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
    971          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
    972          PRE_MEM_READ( outmsg,
    973             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
    974          VG_(sprintf) ( outmsg, description, "sin6_addr" );
    975          PRE_MEM_READ( outmsg,
    976             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
    977          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
    978          PRE_MEM_READ( outmsg,
    979             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
    980          break;
    981 
    982       default:
    983          VG_(sprintf) ( outmsg, description, "" );
    984          PRE_MEM_READ( outmsg, (Addr) sa, salen );
    985          break;
    986    }
    987 
    988    VG_(arena_free) ( VG_AR_CORE, outmsg );
    989 }
    990 
    991 /* Dereference a pointer to a UInt. */
    992 static UInt deref_UInt ( ThreadId tid, Addr a, Char* s )
    993 {
    994    UInt* a_p = (UInt*)a;
    995    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
    996    if (a_p == NULL)
    997       return 0;
    998    else
    999       return *a_p;
   1000 }
   1001 
   1002 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
   1003                                   Char* buf_s, Char* buflen_s )
   1004 {
   1005    if (VG_(tdict).track_pre_mem_write) {
   1006       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
   1007       if (buflen_in > 0) {
   1008          VG_(tdict).track_pre_mem_write(
   1009             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
   1010       }
   1011    }
   1012 }
   1013 
   1014 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
   1015                                    Addr buf_p, Addr buflen_p, Char* s )
   1016 {
   1017    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
   1018       UInt buflen_out = deref_UInt( tid, buflen_p, s);
   1019       if (buflen_out > 0 && buf_p != (Addr)NULL) {
   1020          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
   1021       }
   1022    }
   1023 }
   1024 
   1025 /* ---------------------------------------------------------------------
   1026    Data seg end, for brk()
   1027    ------------------------------------------------------------------ */
   1028 
   1029 /*   +--------+------------+
   1030      | anon   |    resvn   |
   1031      +--------+------------+
   1032 
   1033      ^     ^  ^
   1034      |     |  boundary is page aligned
   1035      |     VG_(brk_limit) -- no alignment constraint
   1036      VG_(brk_base) -- page aligned -- does not move
   1037 
   1038      Both the anon part and the reservation part are always at least
   1039      one page.
   1040 */
   1041 
   1042 /* Set the new data segment end to NEWBRK.  If this succeeds, return
   1043    NEWBRK, else return the current data segment end. */
   1044 
   1045 static Addr do_brk ( Addr newbrk )
   1046 {
   1047    NSegment const* aseg;
   1048    NSegment const* rseg;
   1049    Addr newbrkP;
   1050    SizeT delta;
   1051    Bool ok;
   1052    Bool debug = False;
   1053 
   1054    if (debug)
   1055       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
   1056 		  VG_(brk_base), VG_(brk_limit), newbrk);
   1057 
   1058 #  if 0
   1059    if (0) show_segments("in_brk");
   1060 #  endif
   1061 
   1062    if (newbrk < VG_(brk_base))
   1063       /* Clearly impossible. */
   1064       goto bad;
   1065 
   1066    if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
   1067       /* shrinking the data segment.  Be lazy and don't munmap the
   1068          excess area. */
   1069       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
   1070       if (seg && seg->hasT)
   1071          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
   1072                                     "do_brk(shrink)" );
   1073       /* Since we're being lazy and not unmapping pages, we have to
   1074          zero out the area, so that if the area later comes back into
   1075          circulation, it will be filled with zeroes, as if it really
   1076          had been unmapped and later remapped.  Be a bit paranoid and
   1077          try hard to ensure we're not going to segfault by doing the
   1078          write - check both ends of the range are in the same segment
   1079          and that segment is writable. */
   1080       if (seg) {
   1081          /* pre: newbrk < VG_(brk_limit)
   1082               => newbrk <= VG_(brk_limit)-1 */
   1083          NSegment const * seg2;
   1084          vg_assert(newbrk < VG_(brk_limit));
   1085          seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1086          if (seg2 && seg == seg2 && seg->hasW)
   1087             VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
   1088       }
   1089 
   1090       VG_(brk_limit) = newbrk;
   1091       return newbrk;
   1092    }
   1093 
   1094    /* otherwise we're expanding the brk segment. */
   1095    if (VG_(brk_limit) > VG_(brk_base))
   1096       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
   1097    else
   1098       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
   1099    rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
   1100 
   1101    /* These should be assured by setup_client_dataseg in m_main. */
   1102    vg_assert(aseg);
   1103    vg_assert(rseg);
   1104    vg_assert(aseg->kind == SkAnonC);
   1105    vg_assert(rseg->kind == SkResvn);
   1106    vg_assert(aseg->end+1 == rseg->start);
   1107 
   1108    vg_assert(newbrk >= VG_(brk_base));
   1109    if (newbrk <= rseg->start) {
   1110       /* still fits within the anon segment. */
   1111       VG_(brk_limit) = newbrk;
   1112       return newbrk;
   1113    }
   1114 
   1115    if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
   1116       /* request is too large -- the resvn would fall below 1 page,
   1117          which isn't allowed. */
   1118       goto bad;
   1119    }
   1120 
   1121    newbrkP = VG_PGROUNDUP(newbrk);
   1122    vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
   1123    delta = newbrkP - rseg->start;
   1124    vg_assert(delta > 0);
   1125    vg_assert(VG_IS_PAGE_ALIGNED(delta));
   1126 
   1127    ok = VG_(am_extend_into_adjacent_reservation_client)( (NSegment*)aseg, delta );
   1128    if (!ok) goto bad;
   1129 
   1130    VG_(brk_limit) = newbrk;
   1131    return newbrk;
   1132 
   1133   bad:
   1134    return VG_(brk_limit);
   1135 }
   1136 
   1137 
   1138 /* ---------------------------------------------------------------------
   1139    Vet file descriptors for sanity
   1140    ------------------------------------------------------------------ */
   1141 /*
   1142 > - what does the "Bool soft" parameter mean?
   1143 
   1144 (Tom Hughes, 3 Oct 05):
   1145 
   1146 Whether or not to consider a file descriptor invalid if it is above
   1147 the current soft limit.
   1148 
   1149 Basically if we are testing whether a newly created file descriptor is
   1150 valid (in a post handler) then we set soft to true, and if we are
   1151 testing whether a file descriptor that is about to be used (in a pre
   1152 handler) is valid [viz, an already-existing fd] then we set it to false.
   1153 
   1154 The point is that if the (virtual) soft limit is lowered then any
   1155 existing descriptors can still be read/written/closed etc (so long as
   1156 they are below the valgrind reserved descriptors) but no new
   1157 descriptors can be created above the new soft limit.
   1158 
   1159 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
   1160 */
   1161 
   1162 /* Return true if we're allowed to use or create this fd */
   1163 Bool ML_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool isNewFd)
   1164 {
   1165    Bool allowed = True;
   1166 
   1167    /* hard limits always apply */
   1168    if (fd < 0 || fd >= VG_(fd_hard_limit))
   1169       allowed = False;
   1170 
   1171    /* hijacking the output fds is never allowed */
   1172    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
   1173       allowed = False;
   1174 
   1175    /* if creating a new fd (rather than using an existing one), the
   1176       soft limit must also be observed */
   1177    if (isNewFd && fd >= VG_(fd_soft_limit))
   1178       allowed = False;
   1179 
   1180    /* this looks like it ought to be included, but causes problems: */
   1181    /*
   1182    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
   1183       allowed = False;
   1184    */
   1185    /* The difficulty is as follows: consider a program P which expects
   1186       to be able to mess with (redirect) its own stderr (fd 2).
   1187       Usually to deal with P we would issue command line flags to send
   1188       logging somewhere other than stderr, so as not to disrupt P.
   1189       The problem is that -d unilaterally hijacks stderr with no
   1190       consultation with P.  And so, if this check is enabled, P will
   1191       work OK normally but fail if -d is issued.
   1192 
   1193       Basically -d is a hack and you take your chances when using it.
   1194       It's very useful for low level debugging -- particularly at
   1195       startup -- and having its presence change the behaviour of the
   1196       client is exactly what we don't want.  */
   1197 
   1198    /* croak? */
   1199    if ((!allowed) && VG_(showing_core_errors)() ) {
   1200       VG_(message)(Vg_UserMsg,
   1201          "Warning: invalid file descriptor %d in syscall %s()\n",
   1202          fd, syscallname);
   1203       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
   1204 	 VG_(message)(Vg_UserMsg,
   1205             "   Use --log-fd=<number> to select an alternative log fd.\n");
   1206       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
   1207 	 VG_(message)(Vg_UserMsg,
   1208             "   Use --xml-fd=<number> to select an alternative XML "
   1209             "output fd.\n");
   1210       // DDD: consider always printing this stack trace, it's useful.
   1211       // Also consider also making this a proper core error, ie.
   1212       // suppressible and all that.
   1213       if (VG_(clo_verbosity) > 1) {
   1214          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1215       }
   1216    }
   1217 
   1218    return allowed;
   1219 }
   1220 
   1221 
   1222 /* ---------------------------------------------------------------------
   1223    Deal with a bunch of socket-related syscalls
   1224    ------------------------------------------------------------------ */
   1225 
   1226 /* ------ */
   1227 
   1228 void
   1229 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
   1230                                   UWord arg0, UWord arg1,
   1231                                   UWord arg2, UWord arg3 )
   1232 {
   1233    /* int socketpair(int d, int type, int protocol, int sv[2]); */
   1234    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
   1235                   arg3, 2*sizeof(int) );
   1236 }
   1237 
   1238 SysRes
   1239 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
   1240                                    SysRes res,
   1241                                    UWord arg0, UWord arg1,
   1242                                    UWord arg2, UWord arg3 )
   1243 {
   1244    SysRes r = res;
   1245    Int fd1 = ((Int*)arg3)[0];
   1246    Int fd2 = ((Int*)arg3)[1];
   1247    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1248    POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1249    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
   1250        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
   1251       VG_(close)(fd1);
   1252       VG_(close)(fd2);
   1253       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1254    } else {
   1255       POST_MEM_WRITE( arg3, 2*sizeof(int) );
   1256       if (VG_(clo_track_fds)) {
   1257          ML_(record_fd_open_nameless)(tid, fd1);
   1258          ML_(record_fd_open_nameless)(tid, fd2);
   1259       }
   1260    }
   1261    return r;
   1262 }
   1263 
   1264 /* ------ */
   1265 
   1266 SysRes
   1267 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
   1268 {
   1269    SysRes r = res;
   1270    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1271    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
   1272       VG_(close)(sr_Res(res));
   1273       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1274    } else {
   1275       if (VG_(clo_track_fds))
   1276          ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1277    }
   1278    return r;
   1279 }
   1280 
   1281 /* ------ */
   1282 
   1283 void
   1284 ML_(generic_PRE_sys_bind) ( ThreadId tid,
   1285                             UWord arg0, UWord arg1, UWord arg2 )
   1286 {
   1287    /* int bind(int sockfd, struct sockaddr *my_addr,
   1288                int addrlen); */
   1289    pre_mem_read_sockaddr(
   1290       tid, "socketcall.bind(my_addr.%s)",
   1291       (struct vki_sockaddr *) arg1, arg2
   1292    );
   1293 }
   1294 
   1295 /* ------ */
   1296 
   1297 void
   1298 ML_(generic_PRE_sys_accept) ( ThreadId tid,
   1299                               UWord arg0, UWord arg1, UWord arg2 )
   1300 {
   1301    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
   1302    Addr addr_p     = arg1;
   1303    Addr addrlen_p  = arg2;
   1304    if (addr_p != (Addr)NULL)
   1305       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
   1306                                    "socketcall.accept(addr)",
   1307                                    "socketcall.accept(addrlen_in)" );
   1308 }
   1309 
   1310 SysRes
   1311 ML_(generic_POST_sys_accept) ( ThreadId tid,
   1312                                SysRes res,
   1313                                UWord arg0, UWord arg1, UWord arg2 )
   1314 {
   1315    SysRes r = res;
   1316    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1317    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
   1318       VG_(close)(sr_Res(res));
   1319       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
   1320    } else {
   1321       Addr addr_p     = arg1;
   1322       Addr addrlen_p  = arg2;
   1323       if (addr_p != (Addr)NULL)
   1324          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
   1325                                        "socketcall.accept(addrlen_out)" );
   1326       if (VG_(clo_track_fds))
   1327           ML_(record_fd_open_nameless)(tid, sr_Res(res));
   1328    }
   1329    return r;
   1330 }
   1331 
   1332 /* ------ */
   1333 
   1334 void
   1335 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
   1336                               UWord arg0, UWord arg1, UWord arg2,
   1337                               UWord arg3, UWord arg4, UWord arg5 )
   1338 {
   1339    /* int sendto(int s, const void *msg, int len,
   1340                  unsigned int flags,
   1341                  const struct sockaddr *to, int tolen); */
   1342    PRE_MEM_READ( "socketcall.sendto(msg)",
   1343                  arg1, /* msg */
   1344                  arg2  /* len */ );
   1345    pre_mem_read_sockaddr(
   1346       tid, "socketcall.sendto(to.%s)",
   1347       (struct vki_sockaddr *) arg4, arg5
   1348    );
   1349 }
   1350 
   1351 /* ------ */
   1352 
   1353 void
   1354 ML_(generic_PRE_sys_send) ( ThreadId tid,
   1355                             UWord arg0, UWord arg1, UWord arg2 )
   1356 {
   1357    /* int send(int s, const void *msg, size_t len, int flags); */
   1358    PRE_MEM_READ( "socketcall.send(msg)",
   1359                   arg1, /* msg */
   1360                   arg2  /* len */ );
   1361 
   1362 }
   1363 
   1364 /* ------ */
   1365 
   1366 void
   1367 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
   1368                                 UWord arg0, UWord arg1, UWord arg2,
   1369                                 UWord arg3, UWord arg4, UWord arg5 )
   1370 {
   1371    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
   1372                    struct sockaddr *from, int *fromlen); */
   1373    Addr buf_p      = arg1;
   1374    Int  len        = arg2;
   1375    Addr from_p     = arg4;
   1376    Addr fromlen_p  = arg5;
   1377    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
   1378    if (from_p != (Addr)NULL)
   1379       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
   1380                                    "socketcall.recvfrom(from)",
   1381                                    "socketcall.recvfrom(fromlen_in)" );
   1382 }
   1383 
   1384 void
   1385 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
   1386                                  SysRes res,
   1387                                  UWord arg0, UWord arg1, UWord arg2,
   1388                                  UWord arg3, UWord arg4, UWord arg5 )
   1389 {
   1390    Addr buf_p      = arg1;
   1391    Int  len        = arg2;
   1392    Addr from_p     = arg4;
   1393    Addr fromlen_p  = arg5;
   1394 
   1395    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1396    if (from_p != (Addr)NULL)
   1397       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
   1398                                     "socketcall.recvfrom(fromlen_out)" );
   1399    POST_MEM_WRITE( buf_p, len );
   1400 }
   1401 
   1402 /* ------ */
   1403 
   1404 void
   1405 ML_(generic_PRE_sys_recv) ( ThreadId tid,
   1406                             UWord arg0, UWord arg1, UWord arg2 )
   1407 {
   1408    /* int recv(int s, void *buf, int len, unsigned int flags); */
   1409    /* man 2 recv says:
   1410       The  recv call is normally used only on a connected socket
   1411       (see connect(2)) and is identical to recvfrom with a  NULL
   1412       from parameter.
   1413    */
   1414    PRE_MEM_WRITE( "socketcall.recv(buf)",
   1415                   arg1, /* buf */
   1416                   arg2  /* len */ );
   1417 }
   1418 
   1419 void
   1420 ML_(generic_POST_sys_recv) ( ThreadId tid,
   1421                              UWord res,
   1422                              UWord arg0, UWord arg1, UWord arg2 )
   1423 {
   1424    if (res >= 0 && arg1 != 0) {
   1425       POST_MEM_WRITE( arg1, /* buf */
   1426                       arg2  /* len */ );
   1427    }
   1428 }
   1429 
   1430 /* ------ */
   1431 
   1432 void
   1433 ML_(generic_PRE_sys_connect) ( ThreadId tid,
   1434                                UWord arg0, UWord arg1, UWord arg2 )
   1435 {
   1436    /* int connect(int sockfd,
   1437                   struct sockaddr *serv_addr, int addrlen ); */
   1438    pre_mem_read_sockaddr( tid,
   1439                           "socketcall.connect(serv_addr.%s)",
   1440                           (struct vki_sockaddr *) arg1, arg2);
   1441 }
   1442 
   1443 /* ------ */
   1444 
   1445 void
   1446 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
   1447                                   UWord arg0, UWord arg1, UWord arg2,
   1448                                   UWord arg3, UWord arg4 )
   1449 {
   1450    /* int setsockopt(int s, int level, int optname,
   1451                      const void *optval, int optlen); */
   1452    PRE_MEM_READ( "socketcall.setsockopt(optval)",
   1453                  arg3, /* optval */
   1454                  arg4  /* optlen */ );
   1455 }
   1456 
   1457 /* ------ */
   1458 
   1459 void
   1460 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
   1461                                    UWord arg0, UWord arg1, UWord arg2 )
   1462 {
   1463    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
   1464    Addr name_p     = arg1;
   1465    Addr namelen_p  = arg2;
   1466    /* Nb: name_p cannot be NULL */
   1467    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1468                                 "socketcall.getsockname(name)",
   1469                                 "socketcall.getsockname(namelen_in)" );
   1470 }
   1471 
   1472 void
   1473 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
   1474                                     SysRes res,
   1475                                     UWord arg0, UWord arg1, UWord arg2 )
   1476 {
   1477    Addr name_p     = arg1;
   1478    Addr namelen_p  = arg2;
   1479    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1480    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1481                                  "socketcall.getsockname(namelen_out)" );
   1482 }
   1483 
   1484 /* ------ */
   1485 
   1486 void
   1487 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
   1488                                    UWord arg0, UWord arg1, UWord arg2 )
   1489 {
   1490    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
   1491    Addr name_p     = arg1;
   1492    Addr namelen_p  = arg2;
   1493    /* Nb: name_p cannot be NULL */
   1494    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
   1495                                 "socketcall.getpeername(name)",
   1496                                 "socketcall.getpeername(namelen_in)" );
   1497 }
   1498 
   1499 void
   1500 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
   1501                                     SysRes res,
   1502                                     UWord arg0, UWord arg1, UWord arg2 )
   1503 {
   1504    Addr name_p     = arg1;
   1505    Addr namelen_p  = arg2;
   1506    vg_assert(!sr_isError(res)); /* guaranteed by caller */
   1507    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
   1508                                  "socketcall.getpeername(namelen_out)" );
   1509 }
   1510 
   1511 /* ------ */
   1512 
   1513 void
   1514 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid,
   1515                                UWord arg0, UWord arg1 )
   1516 {
   1517    /* int sendmsg(int s, const struct msghdr *msg, int flags); */
   1518    struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
   1519    msghdr_foreachfield ( tid, msg, pre_mem_read_sendmsg );
   1520 }
   1521 
   1522 /* ------ */
   1523 
   1524 void
   1525 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid,
   1526                                UWord arg0, UWord arg1 )
   1527 {
   1528    /* int recvmsg(int s, struct msghdr *msg, int flags); */
   1529    struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
   1530    msghdr_foreachfield ( tid, msg, pre_mem_write_recvmsg );
   1531 }
   1532 
   1533 void
   1534 ML_(generic_POST_sys_recvmsg) ( ThreadId tid,
   1535                                 UWord arg0, UWord arg1 )
   1536 {
   1537    struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
   1538    msghdr_foreachfield( tid, msg, post_mem_write_recvmsg );
   1539    check_cmsg_for_fds( tid, msg );
   1540 }
   1541 
   1542 
   1543 /* ---------------------------------------------------------------------
   1544    Deal with a bunch of IPC related syscalls
   1545    ------------------------------------------------------------------ */
   1546 
   1547 /* ------ */
   1548 
   1549 void
   1550 ML_(generic_PRE_sys_semop) ( ThreadId tid,
   1551                              UWord arg0, UWord arg1, UWord arg2 )
   1552 {
   1553    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
   1554    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1555 }
   1556 
   1557 /* ------ */
   1558 
   1559 void
   1560 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
   1561                                   UWord arg0, UWord arg1,
   1562                                   UWord arg2, UWord arg3 )
   1563 {
   1564    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
   1565                      struct timespec *timeout); */
   1566    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
   1567    if (arg3 != 0)
   1568       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
   1569 }
   1570 
   1571 /* ------ */
   1572 
   1573 static
   1574 UInt get_sem_count( Int semid )
   1575 {
   1576    struct vki_semid_ds buf;
   1577    union vki_semun arg;
   1578    SysRes res;
   1579 
   1580    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
   1581       (experimental) otherwise complains that the use in the return
   1582       statement below is uninitialised. */
   1583    buf.sem_nsems = 0;
   1584 
   1585    arg.buf = &buf;
   1586 
   1587 #  ifdef __NR_semctl
   1588    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
   1589 #  else
   1590    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
   1591                           VKI_IPC_STAT, (UWord)&arg);
   1592 #  endif
   1593    if (sr_isError(res))
   1594       return 0;
   1595 
   1596    return buf.sem_nsems;
   1597 }
   1598 
   1599 void
   1600 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
   1601                               UWord arg0, UWord arg1,
   1602                               UWord arg2, UWord arg3 )
   1603 {
   1604    /* int semctl(int semid, int semnum, int cmd, ...); */
   1605    union vki_semun arg = *(union vki_semun *)&arg3;
   1606    UInt nsems;
   1607    switch (arg2 /* cmd */) {
   1608 #if defined(VKI_IPC_INFO)
   1609    case VKI_IPC_INFO:
   1610    case VKI_SEM_INFO:
   1611    case VKI_IPC_INFO|VKI_IPC_64:
   1612    case VKI_SEM_INFO|VKI_IPC_64:
   1613       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
   1614                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1615       break;
   1616 #endif
   1617 
   1618    case VKI_IPC_STAT:
   1619 #if defined(VKI_SEM_STAT)
   1620    case VKI_SEM_STAT:
   1621 #endif
   1622       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1623                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1624       break;
   1625 
   1626 #if defined(VKI_IPC_64)
   1627    case VKI_IPC_STAT|VKI_IPC_64:
   1628 #if defined(VKI_SEM_STAT)
   1629    case VKI_SEM_STAT|VKI_IPC_64:
   1630 #endif
   1631       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
   1632                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1633       break;
   1634 #endif
   1635 
   1636    case VKI_IPC_SET:
   1637       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1638                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1639       break;
   1640 
   1641 #if defined(VKI_IPC_64)
   1642    case VKI_IPC_SET|VKI_IPC_64:
   1643       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
   1644                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1645       break;
   1646 #endif
   1647 
   1648    case VKI_GETALL:
   1649 #if defined(VKI_IPC_64)
   1650    case VKI_GETALL|VKI_IPC_64:
   1651 #endif
   1652       nsems = get_sem_count( arg0 );
   1653       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
   1654                      (Addr)arg.array, sizeof(unsigned short) * nsems );
   1655       break;
   1656 
   1657    case VKI_SETALL:
   1658 #if defined(VKI_IPC_64)
   1659    case VKI_SETALL|VKI_IPC_64:
   1660 #endif
   1661       nsems = get_sem_count( arg0 );
   1662       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
   1663                     (Addr)arg.array, sizeof(unsigned short) * nsems );
   1664       break;
   1665    }
   1666 }
   1667 
   1668 void
   1669 ML_(generic_POST_sys_semctl) ( ThreadId tid,
   1670                                UWord res,
   1671                                UWord arg0, UWord arg1,
   1672                                UWord arg2, UWord arg3 )
   1673 {
   1674    union vki_semun arg = *(union vki_semun *)&arg3;
   1675    UInt nsems;
   1676    switch (arg2 /* cmd */) {
   1677 #if defined(VKI_IPC_INFO)
   1678    case VKI_IPC_INFO:
   1679    case VKI_SEM_INFO:
   1680    case VKI_IPC_INFO|VKI_IPC_64:
   1681    case VKI_SEM_INFO|VKI_IPC_64:
   1682       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
   1683       break;
   1684 #endif
   1685 
   1686    case VKI_IPC_STAT:
   1687 #if defined(VKI_SEM_STAT)
   1688    case VKI_SEM_STAT:
   1689 #endif
   1690       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
   1691       break;
   1692 
   1693 #if defined(VKI_IPC_64)
   1694    case VKI_IPC_STAT|VKI_IPC_64:
   1695    case VKI_SEM_STAT|VKI_IPC_64:
   1696       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
   1697       break;
   1698 #endif
   1699 
   1700    case VKI_GETALL:
   1701 #if defined(VKI_IPC_64)
   1702    case VKI_GETALL|VKI_IPC_64:
   1703 #endif
   1704       nsems = get_sem_count( arg0 );
   1705       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
   1706       break;
   1707    }
   1708 }
   1709 
   1710 /* ------ */
   1711 
   1712 /* ------ */
   1713 
   1714 static
   1715 UInt get_shm_size ( Int shmid )
   1716 {
   1717 #ifdef __NR_shmctl
   1718 #  ifdef VKI_IPC_64
   1719    struct vki_shmid64_ds buf;
   1720 #    ifdef VGP_amd64_linux
   1721      /* See bug 222545 comment 7 */
   1722      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1723                                      VKI_IPC_STAT, (UWord)&buf);
   1724 #    else
   1725      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
   1726                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
   1727 #    endif
   1728 #  else /* !def VKI_IPC_64 */
   1729    struct vki_shmid_ds buf;
   1730    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
   1731 #  endif /* def VKI_IPC_64 */
   1732 #else
   1733    struct vki_shmid_ds buf;
   1734    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
   1735                                  VKI_IPC_STAT, 0, (UWord)&buf);
   1736 #endif
   1737    if (sr_isError(__res))
   1738       return 0;
   1739 
   1740    return buf.shm_segsz;
   1741 }
   1742 
   1743 UWord
   1744 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
   1745                              UWord arg0, UWord arg1, UWord arg2 )
   1746 {
   1747    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
   1748    UInt  segmentSize = get_shm_size ( arg0 );
   1749    UWord tmp;
   1750    Bool  ok;
   1751    if (arg1 == 0) {
   1752       /* arm-linux only: work around the fact that
   1753          VG_(am_get_advisory_client_simple) produces something that is
   1754          VKI_PAGE_SIZE aligned, whereas what we want is something
   1755          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
   1756          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
   1757          then round the result up to the next VKI_SHMLBA boundary.
   1758          See bug 222545 comment 15.  So far, arm-linux is the only
   1759          platform where this is known to be necessary. */
   1760       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
   1761       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1762          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
   1763       }
   1764       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
   1765       if (ok) {
   1766          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
   1767             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
   1768          } else {
   1769             arg1 = tmp;
   1770          }
   1771       }
   1772    }
   1773    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
   1774       arg1 = 0;
   1775    return arg1;
   1776 }
   1777 
   1778 void
   1779 ML_(generic_POST_sys_shmat) ( ThreadId tid,
   1780                               UWord res,
   1781                               UWord arg0, UWord arg1, UWord arg2 )
   1782 {
   1783    UInt segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
   1784    if ( segmentSize > 0 ) {
   1785       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
   1786       Bool d;
   1787 
   1788       if (arg2 & VKI_SHM_RDONLY)
   1789          prot &= ~VKI_PROT_WRITE;
   1790       /* It isn't exactly correct to pass 0 for the fd and offset
   1791          here.  The kernel seems to think the corresponding section
   1792          does have dev/ino numbers:
   1793 
   1794          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
   1795 
   1796          However there is no obvious way to find them.  In order to
   1797          cope with the discrepancy, aspacem's sync checker omits the
   1798          dev/ino correspondence check in cases where V does not know
   1799          the dev/ino. */
   1800       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
   1801 
   1802       /* we don't distinguish whether it's read-only or
   1803        * read-write -- it doesn't matter really. */
   1804       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
   1805                               0/*di_handle*/ );
   1806       if (d)
   1807          VG_(discard_translations)( (Addr64)res,
   1808                                     (ULong)VG_PGROUNDUP(segmentSize),
   1809                                     "ML_(generic_POST_sys_shmat)" );
   1810    }
   1811 }
   1812 
   1813 /* ------ */
   1814 
   1815 Bool
   1816 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
   1817 {
   1818    /* int shmdt(const void *shmaddr); */
   1819    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
   1820 }
   1821 
   1822 void
   1823 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
   1824 {
   1825    NSegment const* s = VG_(am_find_nsegment)(arg0);
   1826 
   1827    if (s != NULL) {
   1828       Addr  s_start = s->start;
   1829       SizeT s_len   = s->end+1 - s->start;
   1830       Bool  d;
   1831 
   1832       vg_assert(s->kind == SkShmC);
   1833       vg_assert(s->start == arg0);
   1834 
   1835       d = VG_(am_notify_munmap)(s_start, s_len);
   1836       s = NULL; /* s is now invalid */
   1837       VG_TRACK( die_mem_munmap, s_start, s_len );
   1838       if (d)
   1839          VG_(discard_translations)( (Addr64)s_start,
   1840                                     (ULong)s_len,
   1841                                     "ML_(generic_POST_sys_shmdt)" );
   1842    }
   1843 }
   1844 /* ------ */
   1845 
   1846 void
   1847 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
   1848                               UWord arg0, UWord arg1, UWord arg2 )
   1849 {
   1850    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
   1851    switch (arg1 /* cmd */) {
   1852 #if defined(VKI_IPC_INFO)
   1853    case VKI_IPC_INFO:
   1854       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   1855                      arg2, sizeof(struct vki_shminfo) );
   1856       break;
   1857 #if defined(VKI_IPC_64)
   1858    case VKI_IPC_INFO|VKI_IPC_64:
   1859       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
   1860                      arg2, sizeof(struct vki_shminfo64) );
   1861       break;
   1862 #endif
   1863 #endif
   1864 
   1865 #if defined(VKI_SHM_INFO)
   1866    case VKI_SHM_INFO:
   1867 #if defined(VKI_IPC_64)
   1868    case VKI_SHM_INFO|VKI_IPC_64:
   1869 #endif
   1870       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
   1871                      arg2, sizeof(struct vki_shm_info) );
   1872       break;
   1873 #endif
   1874 
   1875    case VKI_IPC_STAT:
   1876 #if defined(VKI_SHM_STAT)
   1877    case VKI_SHM_STAT:
   1878 #endif
   1879       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
   1880                      arg2, sizeof(struct vki_shmid_ds) );
   1881       break;
   1882 
   1883 #if defined(VKI_IPC_64)
   1884    case VKI_IPC_STAT|VKI_IPC_64:
   1885    case VKI_SHM_STAT|VKI_IPC_64:
   1886       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
   1887                      arg2, sizeof(struct vki_shmid64_ds) );
   1888       break;
   1889 #endif
   1890 
   1891    case VKI_IPC_SET:
   1892       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   1893                     arg2, sizeof(struct vki_shmid_ds) );
   1894       break;
   1895 
   1896 #if defined(VKI_IPC_64)
   1897    case VKI_IPC_SET|VKI_IPC_64:
   1898       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
   1899                     arg2, sizeof(struct vki_shmid64_ds) );
   1900       break;
   1901 #endif
   1902    }
   1903 }
   1904 
   1905 void
   1906 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
   1907                                UWord res,
   1908                                UWord arg0, UWord arg1, UWord arg2 )
   1909 {
   1910    switch (arg1 /* cmd */) {
   1911 #if defined(VKI_IPC_INFO)
   1912    case VKI_IPC_INFO:
   1913       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
   1914       break;
   1915    case VKI_IPC_INFO|VKI_IPC_64:
   1916       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
   1917       break;
   1918 #endif
   1919 
   1920 #if defined(VKI_SHM_INFO)
   1921    case VKI_SHM_INFO:
   1922    case VKI_SHM_INFO|VKI_IPC_64:
   1923       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
   1924       break;
   1925 #endif
   1926 
   1927    case VKI_IPC_STAT:
   1928 #if defined(VKI_SHM_STAT)
   1929    case VKI_SHM_STAT:
   1930 #endif
   1931       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
   1932       break;
   1933 
   1934 #if defined(VKI_IPC_64)
   1935    case VKI_IPC_STAT|VKI_IPC_64:
   1936    case VKI_SHM_STAT|VKI_IPC_64:
   1937       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
   1938       break;
   1939 #endif
   1940 
   1941 
   1942    }
   1943 }
   1944 
   1945 
   1946 /* ---------------------------------------------------------------------
   1947    Generic handler for mmap
   1948    ------------------------------------------------------------------ */
   1949 
   1950 /*
   1951  * Although mmap is specified by POSIX and the argument are generally
   1952  * consistent across platforms the precise details of the low level
   1953  * argument passing conventions differ. For example:
   1954  *
   1955  * - On x86-linux there is mmap (aka old_mmap) which takes the
   1956  *   arguments in a memory block and the offset in bytes; and
   1957  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   1958  *   way and the offset in pages.
   1959  *
   1960  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
   1961  *   arguments in the normal way and the offset in bytes; and
   1962  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
   1963  *   way and the offset in pages.
   1964  *
   1965  * - On amd64-linux everything is simple and there is just the one
   1966  *   call, mmap (aka sys_mmap)  which takes the arguments in the
   1967  *   normal way and the offset in bytes.
   1968  *
   1969  * To cope with all this we provide a generic handler function here
   1970  * and then each platform implements one or more system call handlers
   1971  * which call this generic routine after extracting and normalising
   1972  * the arguments.
   1973  */
   1974 
   1975 SysRes
   1976 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
   1977                             UWord arg1, UWord arg2, UWord arg3,
   1978                             UWord arg4, UWord arg5, Off64T arg6 )
   1979 {
   1980    Addr       advised;
   1981    SysRes     sres;
   1982    MapRequest mreq;
   1983    Bool       mreq_ok;
   1984 
   1985 #if defined(VGO_darwin)
   1986    // Nb: we can't use this on Darwin, it has races:
   1987    // * needs to RETRY if advisory succeeds but map fails
   1988    //   (could have been some other thread in a nonblocking call)
   1989    // * needs to not use fixed-position mmap() on Darwin
   1990    //   (mmap will cheerfully smash whatever's already there, which might
   1991    //   be a new mapping from some other thread in a nonblocking call)
   1992    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
   1993 #endif
   1994 
   1995    if (arg2 == 0) {
   1996       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
   1997          shall be established. */
   1998       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   1999    }
   2000 
   2001    if (!VG_IS_PAGE_ALIGNED(arg1)) {
   2002       /* zap any misaligned addresses. */
   2003       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
   2004          to fail.   Here, we catch them all. */
   2005       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2006    }
   2007 
   2008    if (!VG_IS_PAGE_ALIGNED(arg6)) {
   2009       /* zap any misaligned offsets. */
   2010       /* SuSV3 says: The off argument is constrained to be aligned and
   2011          sized according to the value returned by sysconf() when
   2012          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
   2013       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2014    }
   2015 
   2016    /* Figure out what kind of allocation constraints there are
   2017       (fixed/hint/any), and ask aspacem what we should do. */
   2018    mreq.start = arg1;
   2019    mreq.len   = arg2;
   2020    if (arg4 & VKI_MAP_FIXED) {
   2021       mreq.rkind = MFixed;
   2022    } else
   2023    if (arg1 != 0) {
   2024       mreq.rkind = MHint;
   2025    } else {
   2026       mreq.rkind = MAny;
   2027    }
   2028 
   2029    /* Enquire ... */
   2030    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2031    if (!mreq_ok) {
   2032       /* Our request was bounced, so we'd better fail. */
   2033       return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2034    }
   2035 
   2036    /* Otherwise we're OK (so far).  Install aspacem's choice of
   2037       address, and let the mmap go through.  */
   2038    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2039                                     arg4 | VKI_MAP_FIXED,
   2040                                     arg5, arg6);
   2041 
   2042    /* A refinement: it may be that the kernel refused aspacem's choice
   2043       of address.  If we were originally asked for a hinted mapping,
   2044       there is still a last chance: try again at any address.
   2045       Hence: */
   2046    if (mreq.rkind == MHint && sr_isError(sres)) {
   2047       mreq.start = 0;
   2048       mreq.len   = arg2;
   2049       mreq.rkind = MAny;
   2050       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
   2051       if (!mreq_ok) {
   2052          /* Our request was bounced, so we'd better fail. */
   2053          return VG_(mk_SysRes_Error)( VKI_EINVAL );
   2054       }
   2055       /* and try again with the kernel */
   2056       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
   2057                                        arg4 | VKI_MAP_FIXED,
   2058                                        arg5, arg6);
   2059    }
   2060 
   2061    if (!sr_isError(sres)) {
   2062       ULong di_handle;
   2063       /* Notify aspacem. */
   2064       notify_core_of_mmap(
   2065          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2066          arg2, /* length */
   2067          arg3, /* prot */
   2068          arg4, /* the original flags value */
   2069          arg5, /* fd */
   2070          arg6  /* offset */
   2071       );
   2072       /* Load symbols? */
   2073       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
   2074                                        False/*allow_SkFileV*/ );
   2075       /* Notify the tool. */
   2076       notify_tool_of_mmap(
   2077          (Addr)sr_Res(sres), /* addr kernel actually assigned */
   2078          arg2, /* length */
   2079          arg3, /* prot */
   2080          di_handle /* so the tool can refer to the read debuginfo later,
   2081                       if it wants. */
   2082       );
   2083    }
   2084 
   2085    /* Stay sane */
   2086    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
   2087       vg_assert(sr_Res(sres) == arg1);
   2088 
   2089    return sres;
   2090 }
   2091 
   2092 
   2093 /* ---------------------------------------------------------------------
   2094    The Main Entertainment ... syscall wrappers
   2095    ------------------------------------------------------------------ */
   2096 
   2097 /* Note: the PRE() and POST() wrappers are for the actual functions
   2098    implementing the system calls in the OS kernel.  These mostly have
   2099    names like sys_write();  a few have names like old_mmap().  See the
   2100    comment for ML_(syscall_table)[] for important info about the __NR_foo
   2101    constants and their relationship to the sys_foo() functions.
   2102 
   2103    Some notes about names used for syscalls and args:
   2104    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
   2105      ambiguity.
   2106 
   2107    - For error messages, we generally use a somewhat generic name
   2108      for the syscall (eg. "write" rather than "sys_write").  This should be
   2109      good enough for the average user to understand what is happening,
   2110      without confusing them with names like "sys_write".
   2111 
   2112    - Also, for error messages the arg names are mostly taken from the man
   2113      pages (even though many of those man pages are really for glibc
   2114      functions of the same name), rather than from the OS kernel source,
   2115      for the same reason -- a user presented with a "bogus foo(bar)" arg
   2116      will most likely look at the "foo" man page to see which is the "bar"
   2117      arg.
   2118 
   2119    Note that we use our own vki_* types.  The one exception is in
   2120    PRE_REG_READn calls, where pointer types haven't been changed, because
   2121    they don't need to be -- eg. for "foo*" to be used, the type foo need not
   2122    be visible.
   2123 
   2124    XXX: some of these are arch-specific, and should be factored out.
   2125 */
   2126 
   2127 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
   2128 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
   2129 
   2130 // Macros to support 64-bit syscall args split into two 32 bit values
   2131 #if defined(VG_LITTLEENDIAN)
   2132 #define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2133 #define MERGE64_FIRST(name) name##_low
   2134 #define MERGE64_SECOND(name) name##_high
   2135 #elif defined(VG_BIGENDIAN)
   2136 #define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
   2137 #define MERGE64_FIRST(name) name##_high
   2138 #define MERGE64_SECOND(name) name##_low
   2139 #else
   2140 #error Unknown endianness
   2141 #endif
   2142 
   2143 PRE(sys_exit)
   2144 {
   2145    ThreadState* tst;
   2146    /* simple; just make this thread exit */
   2147    PRINT("exit( %ld )", ARG1);
   2148    PRE_REG_READ1(void, "exit", int, status);
   2149    tst = VG_(get_ThreadState)(tid);
   2150    /* Set the thread's status to be exiting, then claim that the
   2151       syscall succeeded. */
   2152    tst->exitreason = VgSrc_ExitThread;
   2153    tst->os_state.exitcode = ARG1;
   2154    SET_STATUS_Success(0);
   2155 }
   2156 
   2157 PRE(sys_ni_syscall)
   2158 {
   2159    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
   2160       VG_SYSNUM_STRING(SYSNO));
   2161    PRE_REG_READ0(long, "ni_syscall");
   2162    SET_STATUS_Failure( VKI_ENOSYS );
   2163 }
   2164 
   2165 PRE(sys_iopl)
   2166 {
   2167    PRINT("sys_iopl ( %ld )", ARG1);
   2168    PRE_REG_READ1(long, "iopl", unsigned long, level);
   2169 }
   2170 
   2171 PRE(sys_fsync)
   2172 {
   2173    *flags |= SfMayBlock;
   2174    PRINT("sys_fsync ( %ld )", ARG1);
   2175    PRE_REG_READ1(long, "fsync", unsigned int, fd);
   2176 }
   2177 
   2178 PRE(sys_fdatasync)
   2179 {
   2180    *flags |= SfMayBlock;
   2181    PRINT("sys_fdatasync ( %ld )", ARG1);
   2182    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
   2183 }
   2184 
   2185 PRE(sys_msync)
   2186 {
   2187    *flags |= SfMayBlock;
   2188    PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   2189    PRE_REG_READ3(long, "msync",
   2190                  unsigned long, start, vki_size_t, length, int, flags);
   2191    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
   2192 }
   2193 
   2194 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
   2195 // versions of LiS (Linux Streams).  They are not part of the kernel.
   2196 // Therefore, we have to provide this type ourself, rather than getting it
   2197 // from the kernel sources.
   2198 struct vki_pmsg_strbuf {
   2199    int     maxlen;         /* no. of bytes in buffer */
   2200    int     len;            /* no. of bytes returned */
   2201    vki_caddr_t buf;        /* pointer to data */
   2202 };
   2203 PRE(sys_getpmsg)
   2204 {
   2205    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
   2206    struct vki_pmsg_strbuf *ctrl;
   2207    struct vki_pmsg_strbuf *data;
   2208    *flags |= SfMayBlock;
   2209    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
   2210    PRE_REG_READ5(int, "getpmsg",
   2211                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2212                  int *, bandp, int *, flagsp);
   2213    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2214    data = (struct vki_pmsg_strbuf *)ARG3;
   2215    if (ctrl && ctrl->maxlen > 0)
   2216       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
   2217    if (data && data->maxlen > 0)
   2218       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
   2219    if (ARG4)
   2220       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
   2221    if (ARG5)
   2222       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
   2223 }
   2224 POST(sys_getpmsg)
   2225 {
   2226    struct vki_pmsg_strbuf *ctrl;
   2227    struct vki_pmsg_strbuf *data;
   2228    vg_assert(SUCCESS);
   2229    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2230    data = (struct vki_pmsg_strbuf *)ARG3;
   2231    if (RES == 0 && ctrl && ctrl->len > 0) {
   2232       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
   2233    }
   2234    if (RES == 0 && data && data->len > 0) {
   2235       POST_MEM_WRITE( (Addr)data->buf, data->len);
   2236    }
   2237 }
   2238 
   2239 PRE(sys_putpmsg)
   2240 {
   2241    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
   2242    struct vki_pmsg_strbuf *ctrl;
   2243    struct vki_pmsg_strbuf *data;
   2244    *flags |= SfMayBlock;
   2245    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
   2246    PRE_REG_READ5(int, "putpmsg",
   2247                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
   2248                  int, band, int, flags);
   2249    ctrl = (struct vki_pmsg_strbuf *)ARG2;
   2250    data = (struct vki_pmsg_strbuf *)ARG3;
   2251    if (ctrl && ctrl->len > 0)
   2252       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
   2253    if (data && data->len > 0)
   2254       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
   2255 }
   2256 
   2257 PRE(sys_getitimer)
   2258 {
   2259    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2260    PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
   2261    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
   2262 
   2263    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
   2264    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
   2265 }
   2266 
   2267 POST(sys_getitimer)
   2268 {
   2269    if (ARG2 != (Addr)NULL) {
   2270       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2271       POST_timeval_WRITE( &(value->it_interval) );
   2272       POST_timeval_WRITE( &(value->it_value) );
   2273    }
   2274 }
   2275 
   2276 PRE(sys_setitimer)
   2277 {
   2278    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
   2279    PRE_REG_READ3(long, "setitimer",
   2280                  int, which,
   2281                  struct itimerval *, value, struct itimerval *, ovalue);
   2282    if (ARG2 != (Addr)NULL) {
   2283       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
   2284       PRE_timeval_READ( "setitimer(&value->it_interval)",
   2285                          &(value->it_interval));
   2286       PRE_timeval_READ( "setitimer(&value->it_value)",
   2287                          &(value->it_value));
   2288    }
   2289    if (ARG3 != (Addr)NULL) {
   2290       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2291       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
   2292                          &(ovalue->it_interval));
   2293       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
   2294                          &(ovalue->it_value));
   2295    }
   2296 }
   2297 
   2298 POST(sys_setitimer)
   2299 {
   2300    if (ARG3 != (Addr)NULL) {
   2301       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
   2302       POST_timeval_WRITE( &(ovalue->it_interval) );
   2303       POST_timeval_WRITE( &(ovalue->it_value) );
   2304    }
   2305 }
   2306 
   2307 PRE(sys_chroot)
   2308 {
   2309    PRINT("sys_chroot ( %#lx )", ARG1);
   2310    PRE_REG_READ1(long, "chroot", const char *, path);
   2311    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
   2312 }
   2313 
   2314 PRE(sys_madvise)
   2315 {
   2316    *flags |= SfMayBlock;
   2317    PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
   2318    PRE_REG_READ3(long, "madvise",
   2319                  unsigned long, start, vki_size_t, length, int, advice);
   2320 }
   2321 
   2322 #if HAVE_MREMAP
   2323 PRE(sys_mremap)
   2324 {
   2325    // Nb: this is different to the glibc version described in the man pages,
   2326    // which lacks the fifth 'new_address' argument.
   2327    if (ARG4 & VKI_MREMAP_FIXED) {
   2328       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
   2329             ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
   2330       PRE_REG_READ5(unsigned long, "mremap",
   2331                     unsigned long, old_addr, unsigned long, old_size,
   2332                     unsigned long, new_size, unsigned long, flags,
   2333                     unsigned long, new_addr);
   2334    } else {
   2335       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
   2336             ARG1, (ULong)ARG2, ARG3, ARG4);
   2337       PRE_REG_READ4(unsigned long, "mremap",
   2338                     unsigned long, old_addr, unsigned long, old_size,
   2339                     unsigned long, new_size, unsigned long, flags);
   2340    }
   2341    SET_STATUS_from_SysRes(
   2342       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
   2343    );
   2344 }
   2345 #endif /* HAVE_MREMAP */
   2346 
   2347 PRE(sys_nice)
   2348 {
   2349    PRINT("sys_nice ( %ld )", ARG1);
   2350    PRE_REG_READ1(long, "nice", int, inc);
   2351 }
   2352 
   2353 PRE(sys_mlock)
   2354 {
   2355    *flags |= SfMayBlock;
   2356    PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
   2357    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
   2358 }
   2359 
   2360 PRE(sys_munlock)
   2361 {
   2362    *flags |= SfMayBlock;
   2363    PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
   2364    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
   2365 }
   2366 
   2367 PRE(sys_mlockall)
   2368 {
   2369    *flags |= SfMayBlock;
   2370    PRINT("sys_mlockall ( %lx )", ARG1);
   2371    PRE_REG_READ1(long, "mlockall", int, flags);
   2372 }
   2373 
   2374 PRE(sys_setpriority)
   2375 {
   2376    PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
   2377    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
   2378 }
   2379 
   2380 PRE(sys_getpriority)
   2381 {
   2382    PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
   2383    PRE_REG_READ2(long, "getpriority", int, which, int, who);
   2384 }
   2385 
   2386 PRE(sys_pwrite64)
   2387 {
   2388    *flags |= SfMayBlock;
   2389 #if VG_WORDSIZE == 4
   2390    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
   2391          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
   2392    PRE_REG_READ5(ssize_t, "pwrite64",
   2393                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2394                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2395 #elif VG_WORDSIZE == 8
   2396    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
   2397          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
   2398    PRE_REG_READ4(ssize_t, "pwrite64",
   2399                  unsigned int, fd, const char *, buf, vki_size_t, count,
   2400                  Word, offset);
   2401 #else
   2402 #  error Unexpected word size
   2403 #endif
   2404    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
   2405 }
   2406 
   2407 PRE(sys_sync)
   2408 {
   2409    *flags |= SfMayBlock;
   2410    PRINT("sys_sync ( )");
   2411    PRE_REG_READ0(long, "sync");
   2412 }
   2413 
   2414 PRE(sys_fstatfs)
   2415 {
   2416    PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
   2417    PRE_REG_READ2(long, "fstatfs",
   2418                  unsigned int, fd, struct statfs *, buf);
   2419    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
   2420 }
   2421 
   2422 POST(sys_fstatfs)
   2423 {
   2424    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
   2425 }
   2426 
   2427 PRE(sys_fstatfs64)
   2428 {
   2429    PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
   2430    PRE_REG_READ3(long, "fstatfs64",
   2431                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
   2432    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
   2433 }
   2434 POST(sys_fstatfs64)
   2435 {
   2436    POST_MEM_WRITE( ARG3, ARG2 );
   2437 }
   2438 
   2439 PRE(sys_getsid)
   2440 {
   2441    PRINT("sys_getsid ( %ld )", ARG1);
   2442    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
   2443 }
   2444 
   2445 PRE(sys_pread64)
   2446 {
   2447    *flags |= SfMayBlock;
   2448 #if VG_WORDSIZE == 4
   2449    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
   2450          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
   2451    PRE_REG_READ5(ssize_t, "pread64",
   2452                  unsigned int, fd, char *, buf, vki_size_t, count,
   2453                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
   2454 #elif VG_WORDSIZE == 8
   2455    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
   2456          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
   2457    PRE_REG_READ4(ssize_t, "pread64",
   2458                  unsigned int, fd, char *, buf, vki_size_t, count,
   2459                  Word, offset);
   2460 #else
   2461 #  error Unexpected word size
   2462 #endif
   2463    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
   2464 }
   2465 POST(sys_pread64)
   2466 {
   2467    vg_assert(SUCCESS);
   2468    if (RES > 0) {
   2469       POST_MEM_WRITE( ARG2, RES );
   2470    }
   2471 }
   2472 
   2473 PRE(sys_mknod)
   2474 {
   2475    PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
   2476    PRE_REG_READ3(long, "mknod",
   2477                  const char *, pathname, int, mode, unsigned, dev);
   2478    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
   2479 }
   2480 
   2481 PRE(sys_flock)
   2482 {
   2483    *flags |= SfMayBlock;
   2484    PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
   2485    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
   2486 }
   2487 
   2488 // Pre_read a char** argument.
   2489 static void pre_argv_envp(Addr a, ThreadId tid, Char* s1, Char* s2)
   2490 {
   2491    while (True) {
   2492       Addr a_deref;
   2493       Addr* a_p = (Addr*)a;
   2494       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
   2495       a_deref = *a_p;
   2496       if (0 == a_deref)
   2497          break;
   2498       PRE_MEM_RASCIIZ( s2, a_deref );
   2499       a += sizeof(char*);
   2500    }
   2501 }
   2502 
   2503 static Bool i_am_the_only_thread ( void )
   2504 {
   2505    Int c = VG_(count_living_threads)();
   2506    vg_assert(c >= 1); /* stay sane */
   2507    return c == 1;
   2508 }
   2509 
   2510 /* Wait until all other threads disappear. */
   2511 void VG_(reap_threads)(ThreadId self)
   2512 {
   2513    while (!i_am_the_only_thread()) {
   2514       /* Let other thread(s) run */
   2515       VG_(vg_yield)();
   2516       VG_(poll_signals)(self);
   2517    }
   2518    vg_assert(i_am_the_only_thread());
   2519 }
   2520 
   2521 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
   2522 // but it seems to work nonetheless...
   2523 PRE(sys_execve)
   2524 {
   2525    Char*        path = NULL;       /* path to executable */
   2526    Char**       envp = NULL;
   2527    Char**       argv = NULL;
   2528    Char**       arg2copy;
   2529    Char*        launcher_basename = NULL;
   2530    ThreadState* tst;
   2531    Int          i, j, tot_args;
   2532    SysRes       res;
   2533    Bool         setuid_allowed, trace_this_child;
   2534 
   2535    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
   2536    PRE_REG_READ3(vki_off_t, "execve",
   2537                  char *, filename, char **, argv, char **, envp);
   2538    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
   2539    if (ARG2 != 0)
   2540       pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
   2541    if (ARG3 != 0)
   2542       pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
   2543 
   2544    vg_assert(VG_(is_valid_tid)(tid));
   2545    tst = VG_(get_ThreadState)(tid);
   2546 
   2547    /* Erk.  If the exec fails, then the following will have made a
   2548       mess of things which makes it hard for us to continue.  The
   2549       right thing to do is piece everything together again in
   2550       POST(execve), but that's close to impossible.  Instead, we make
   2551       an effort to check that the execve will work before actually
   2552       doing it. */
   2553 
   2554    /* Check that the name at least begins in client-accessible storage. */
   2555    if (ARG1 == 0 /* obviously bogus */
   2556        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
   2557       SET_STATUS_Failure( VKI_EFAULT );
   2558       return;
   2559    }
   2560 
   2561    // Decide whether or not we want to follow along
   2562    trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1 );
   2563 
   2564    // Do the important checks:  it is a file, is executable, permissions are
   2565    // ok, etc.  We allow setuid executables to run only in the case when
   2566    // we are not simulating them, that is, they to be run natively.
   2567    setuid_allowed = trace_this_child  ? False  : True;
   2568    res = VG_(pre_exec_check)((const Char*)ARG1, NULL, setuid_allowed);
   2569    if (sr_isError(res)) {
   2570       SET_STATUS_Failure( sr_Err(res) );
   2571       return;
   2572    }
   2573 
   2574    /* If we're tracing the child, and the launcher name looks bogus
   2575       (possibly because launcher.c couldn't figure it out, see
   2576       comments therein) then we have no option but to fail. */
   2577    if (trace_this_child
   2578        && (VG_(name_of_launcher) == NULL
   2579            || VG_(name_of_launcher)[0] != '/')) {
   2580       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
   2581       return;
   2582    }
   2583 
   2584    /* After this point, we can't recover if the execve fails. */
   2585    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (Char*)ARG1);
   2586 
   2587    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
   2588       this. (Really, nuke them all, since the new process will make
   2589       its own new thread.) */
   2590    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
   2591    VG_(reap_threads)(tid);
   2592 
   2593    // Set up the child's exe path.
   2594    //
   2595    if (trace_this_child) {
   2596 
   2597       // We want to exec the launcher.  Get its pre-remembered path.
   2598       path = VG_(name_of_launcher);
   2599       // VG_(name_of_launcher) should have been acquired by m_main at
   2600       // startup.
   2601       vg_assert(path);
   2602 
   2603       launcher_basename = VG_(strrchr)(path, '/');
   2604       if (launcher_basename == NULL || launcher_basename[1] == 0) {
   2605          launcher_basename = path;  // hmm, tres dubious
   2606       } else {
   2607          launcher_basename++;
   2608       }
   2609 
   2610    } else {
   2611       path = (Char*)ARG1;
   2612       if (VG_(clo_xml)) {
   2613         VG_(printf_xml)("\n<execv/>\n\n</valgrindoutput>\n\n");
   2614       } else {
   2615         VG_(umsg)("execv called - the tool will now quit\n");
   2616       }
   2617    }
   2618 
   2619    // Set up the child's environment.
   2620    //
   2621    // Remove the valgrind-specific stuff from the environment so the
   2622    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
   2623    // This is done unconditionally, since if we are tracing the child,
   2624    // the child valgrind will set up the appropriate client environment.
   2625    // Nb: we make a copy of the environment before trying to mangle it
   2626    // as it might be in read-only memory (this was bug #101881).
   2627    //
   2628    // Then, if tracing the child, set VALGRIND_LIB for it.
   2629    //
   2630    if (ARG3 == 0) {
   2631       envp = NULL;
   2632    } else {
   2633       envp = VG_(env_clone)( (Char**)ARG3 );
   2634       if (envp == NULL) goto hosed;
   2635       VG_(env_remove_valgrind_env_stuff)( envp );
   2636    }
   2637 
   2638    if (trace_this_child) {
   2639       // Set VALGRIND_LIB in ARG3 (the environment)
   2640       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
   2641    }
   2642 
   2643    // Set up the child's args.  If not tracing it, they are
   2644    // simply ARG2.  Otherwise, they are
   2645    //
   2646    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
   2647    //
   2648    // except that the first VG_(args_for_valgrind_noexecpass) args
   2649    // are omitted.
   2650    //
   2651    if (!trace_this_child) {
   2652       argv = (Char**)ARG2;
   2653    } else {
   2654       vg_assert( VG_(args_for_valgrind) );
   2655       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
   2656       vg_assert( VG_(args_for_valgrind_noexecpass)
   2657                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
   2658       /* how many args in total will there be? */
   2659       // launcher basename
   2660       tot_args = 1;
   2661       // V's args
   2662       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
   2663       tot_args -= VG_(args_for_valgrind_noexecpass);
   2664       // name of client exe
   2665       tot_args++;
   2666       // args for client exe, skipping [0]
   2667       arg2copy = (Char**)ARG2;
   2668       if (arg2copy && arg2copy[0]) {
   2669          for (i = 1; arg2copy[i]; i++)
   2670             tot_args++;
   2671       }
   2672       // allocate
   2673       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
   2674                           (tot_args+1) * sizeof(HChar*) );
   2675       if (argv == 0) goto hosed;
   2676       // copy
   2677       j = 0;
   2678       argv[j++] = launcher_basename;
   2679       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
   2680          if (i < VG_(args_for_valgrind_noexecpass))
   2681             continue;
   2682          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
   2683       }
   2684       argv[j++] = (Char*)ARG1;
   2685       if (arg2copy && arg2copy[0])
   2686          for (i = 1; arg2copy[i]; i++)
   2687             argv[j++] = arg2copy[i];
   2688       argv[j++] = NULL;
   2689       // check
   2690       vg_assert(j == tot_args+1);
   2691    }
   2692 
   2693    /* restore the DATA rlimit for the child */
   2694    VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
   2695 
   2696    /*
   2697       Set the signal state up for exec.
   2698 
   2699       We need to set the real signal state to make sure the exec'd
   2700       process gets SIG_IGN properly.
   2701 
   2702       Also set our real sigmask to match the client's sigmask so that
   2703       the exec'd child will get the right mask.  First we need to
   2704       clear out any pending signals so they they don't get delivered,
   2705       which would confuse things.
   2706 
   2707       XXX This is a bug - the signals should remain pending, and be
   2708       delivered to the new process after exec.  There's also a
   2709       race-condition, since if someone delivers us a signal between
   2710       the sigprocmask and the execve, we'll still get the signal. Oh
   2711       well.
   2712    */
   2713    {
   2714       vki_sigset_t allsigs;
   2715       vki_siginfo_t info;
   2716 
   2717       /* What this loop does: it queries SCSS (the signal state that
   2718          the client _thinks_ the kernel is in) by calling
   2719          VG_(do_sys_sigaction), and modifies the real kernel signal
   2720          state accordingly. */
   2721       for (i = 1; i < VG_(max_signal); i++) {
   2722          vki_sigaction_fromK_t sa_f;
   2723          vki_sigaction_toK_t   sa_t;
   2724          VG_(do_sys_sigaction)(i, NULL, &sa_f);
   2725          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
   2726          if (sa_t.ksa_handler == VKI_SIG_IGN)
   2727             VG_(sigaction)(i, &sa_t, NULL);
   2728          else {
   2729             sa_t.ksa_handler = VKI_SIG_DFL;
   2730             VG_(sigaction)(i, &sa_t, NULL);
   2731          }
   2732       }
   2733 
   2734       VG_(sigfillset)(&allsigs);
   2735       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
   2736          ;
   2737 
   2738       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
   2739    }
   2740 
   2741    if (0) {
   2742       Char **cpp;
   2743       VG_(printf)("exec: %s\n", path);
   2744       for (cpp = argv; cpp && *cpp; cpp++)
   2745          VG_(printf)("argv: %s\n", *cpp);
   2746       if (0)
   2747          for (cpp = envp; cpp && *cpp; cpp++)
   2748             VG_(printf)("env: %s\n", *cpp);
   2749    }
   2750 
   2751    SET_STATUS_from_SysRes(
   2752       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
   2753    );
   2754 
   2755    /* If we got here, then the execve failed.  We've already made way
   2756       too much of a mess to continue, so we have to abort. */
   2757   hosed:
   2758    vg_assert(FAILURE);
   2759    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
   2760                 ARG1, (char*)ARG1, ARG2, ARG3, ERR);
   2761    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
   2762                             "execve() failing, so I'm dying.\n");
   2763    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
   2764                             "or work out how to recover.\n");
   2765    VG_(exit)(101);
   2766 }
   2767 
   2768 PRE(sys_access)
   2769 {
   2770    PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   2771    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
   2772    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
   2773 }
   2774 
   2775 PRE(sys_alarm)
   2776 {
   2777    PRINT("sys_alarm ( %ld )", ARG1);
   2778    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
   2779 }
   2780 
   2781 PRE(sys_brk)
   2782 {
   2783    Addr brk_limit = VG_(brk_limit);
   2784    Addr brk_new;
   2785 
   2786    /* libc   says: int   brk(void *end_data_segment);
   2787       kernel says: void* brk(void* end_data_segment);  (more or less)
   2788 
   2789       libc returns 0 on success, and -1 (and sets errno) on failure.
   2790       Nb: if you ask to shrink the dataseg end below what it
   2791       currently is, that always succeeds, even if the dataseg end
   2792       doesn't actually change (eg. brk(0)).  Unless it seg faults.
   2793 
   2794       Kernel returns the new dataseg end.  If the brk() failed, this
   2795       will be unchanged from the old one.  That's why calling (kernel)
   2796       brk(0) gives the current dataseg end (libc brk() just returns
   2797       zero in that case).
   2798 
   2799       Both will seg fault if you shrink it back into a text segment.
   2800    */
   2801    PRINT("sys_brk ( %#lx )", ARG1);
   2802    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
   2803 
   2804    brk_new = do_brk(ARG1);
   2805    SET_STATUS_Success( brk_new );
   2806 
   2807    if (brk_new == ARG1) {
   2808       /* brk() succeeded */
   2809       if (brk_new < brk_limit) {
   2810          /* successfully shrunk the data segment. */
   2811          VG_TRACK( die_mem_brk, (Addr)ARG1,
   2812 		   brk_limit-ARG1 );
   2813       } else
   2814       if (brk_new > brk_limit) {
   2815          /* successfully grew the data segment */
   2816          VG_TRACK( new_mem_brk, brk_limit,
   2817                    ARG1-brk_limit, tid );
   2818       }
   2819    } else {
   2820       /* brk() failed */
   2821       vg_assert(brk_limit == brk_new);
   2822    }
   2823 }
   2824 
   2825 PRE(sys_chdir)
   2826 {
   2827    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
   2828    PRE_REG_READ1(long, "chdir", const char *, path);
   2829    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
   2830 }
   2831 
   2832 PRE(sys_chmod)
   2833 {
   2834    PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   2835    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
   2836    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
   2837 }
   2838 
   2839 PRE(sys_chown)
   2840 {
   2841    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   2842    PRE_REG_READ3(long, "chown",
   2843                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   2844    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
   2845 }
   2846 
   2847 PRE(sys_lchown)
   2848 {
   2849    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
   2850    PRE_REG_READ3(long, "lchown",
   2851                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
   2852    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
   2853 }
   2854 
   2855 PRE(sys_close)
   2856 {
   2857    PRINT("sys_close ( %ld )", ARG1);
   2858    PRE_REG_READ1(long, "close", unsigned int, fd);
   2859 
   2860    /* Detect and negate attempts by the client to close Valgrind's log fd */
   2861    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
   2862         /* If doing -d style logging (which is to fd=2), don't
   2863            allow that to be closed either. */
   2864         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
   2865       SET_STATUS_Failure( VKI_EBADF );
   2866 }
   2867 
   2868 POST(sys_close)
   2869 {
   2870    if (VG_(clo_track_fds)) record_fd_close(ARG1);
   2871 }
   2872 
   2873 PRE(sys_dup)
   2874 {
   2875    PRINT("sys_dup ( %ld )", ARG1);
   2876    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
   2877 }
   2878 
   2879 POST(sys_dup)
   2880 {
   2881    vg_assert(SUCCESS);
   2882    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
   2883       VG_(close)(RES);
   2884       SET_STATUS_Failure( VKI_EMFILE );
   2885    } else {
   2886       if (VG_(clo_track_fds))
   2887          ML_(record_fd_open_named)(tid, RES);
   2888    }
   2889 }
   2890 
   2891 PRE(sys_dup2)
   2892 {
   2893    PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
   2894    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
   2895    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
   2896       SET_STATUS_Failure( VKI_EBADF );
   2897 }
   2898 
   2899 POST(sys_dup2)
   2900 {
   2901    vg_assert(SUCCESS);
   2902    if (VG_(clo_track_fds))
   2903       ML_(record_fd_open_named)(tid, RES);
   2904 }
   2905 
   2906 PRE(sys_fchdir)
   2907 {
   2908    PRINT("sys_fchdir ( %ld )", ARG1);
   2909    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
   2910 }
   2911 
   2912 PRE(sys_fchown)
   2913 {
   2914    PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
   2915    PRE_REG_READ3(long, "fchown",
   2916                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
   2917 }
   2918 
   2919 PRE(sys_fchmod)
   2920 {
   2921    PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
   2922    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
   2923 }
   2924 
   2925 PRE(sys_newfstat)
   2926 {
   2927    PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
   2928    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
   2929    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
   2930 }
   2931 
   2932 POST(sys_newfstat)
   2933 {
   2934    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
   2935 }
   2936 
   2937 static vki_sigset_t fork_saved_mask;
   2938 
   2939 // In Linux, the sys_fork() function varies across architectures, but we
   2940 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
   2941 PRE(sys_fork)
   2942 {
   2943    Bool is_child;
   2944    Int child_pid;
   2945    vki_sigset_t mask;
   2946 
   2947    PRINT("sys_fork ( )");
   2948    PRE_REG_READ0(long, "fork");
   2949 
   2950    /* Block all signals during fork, so that we can fix things up in
   2951       the child without being interrupted. */
   2952    VG_(sigfillset)(&mask);
   2953    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
   2954 
   2955    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
   2956 
   2957    if (!SUCCESS) return;
   2958 
   2959 #if defined(VGO_linux) || defined(VGO_aix5)
   2960    // RES is 0 for child, non-0 (the child's PID) for parent.
   2961    is_child = ( RES == 0 ? True : False );
   2962    child_pid = ( is_child ? -1 : RES );
   2963 #elif defined(VGO_darwin)
   2964    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
   2965    is_child = RESHI;
   2966    child_pid = RES;
   2967 #else
   2968 #  error Unknown OS
   2969 #endif
   2970 
   2971    VG_(do_atfork_pre)(tid);
   2972 
   2973    if (is_child) {
   2974       VG_(do_atfork_child)(tid);
   2975 
   2976       /* restore signal mask */
   2977       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   2978 
   2979       /* If --child-silent-after-fork=yes was specified, set the
   2980          output file descriptors to 'impossible' values.  This is
   2981          noticed by send_bytes_to_logging_sink in m_libcprint.c, which
   2982          duly stops writing any further output. */
   2983       if (VG_(clo_child_silent_after_fork)) {
   2984          if (!VG_(log_output_sink).is_socket)
   2985             VG_(log_output_sink).fd = -1;
   2986          if (!VG_(xml_output_sink).is_socket)
   2987             VG_(xml_output_sink).fd = -1;
   2988       }
   2989 
   2990    } else {
   2991       VG_(do_atfork_parent)(tid);
   2992 
   2993       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
   2994 
   2995       /* restore signal mask */
   2996       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
   2997    }
   2998 }
   2999 
   3000 PRE(sys_ftruncate)
   3001 {
   3002    *flags |= SfMayBlock;
   3003    PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
   3004    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
   3005 }
   3006 
   3007 PRE(sys_truncate)
   3008 {
   3009    *flags |= SfMayBlock;
   3010    PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
   3011    PRE_REG_READ2(long, "truncate",
   3012                  const char *, path, unsigned long, length);
   3013    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
   3014 }
   3015 
   3016 PRE(sys_ftruncate64)
   3017 {
   3018    *flags |= SfMayBlock;
   3019 #if VG_WORDSIZE == 4
   3020    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
   3021    PRE_REG_READ3(long, "ftruncate64",
   3022                  unsigned int, fd,
   3023                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3024 #else
   3025    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
   3026    PRE_REG_READ2(long, "ftruncate64",
   3027                  unsigned int,fd, UWord,length);
   3028 #endif
   3029 }
   3030 
   3031 PRE(sys_truncate64)
   3032 {
   3033    *flags |= SfMayBlock;
   3034 #if VG_WORDSIZE == 4
   3035    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
   3036    PRE_REG_READ3(long, "truncate64",
   3037                  const char *, path,
   3038                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
   3039 #else
   3040    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
   3041    PRE_REG_READ2(long, "truncate64",
   3042                  const char *,path, UWord,length);
   3043 #endif
   3044    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
   3045 }
   3046 
   3047 PRE(sys_getdents)
   3048 {
   3049    *flags |= SfMayBlock;
   3050    PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
   3051    PRE_REG_READ3(long, "getdents",
   3052                  unsigned int, fd, struct linux_dirent *, dirp,
   3053                  unsigned int, count);
   3054    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
   3055 }
   3056 
   3057 POST(sys_getdents)
   3058 {
   3059    vg_assert(SUCCESS);
   3060    if (RES > 0)
   3061       POST_MEM_WRITE( ARG2, RES );
   3062 }
   3063 
   3064 PRE(sys_getdents64)
   3065 {
   3066    *flags |= SfMayBlock;
   3067    PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
   3068    PRE_REG_READ3(long, "getdents64",
   3069                  unsigned int, fd, struct linux_dirent64 *, dirp,
   3070                  unsigned int, count);