Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Handle system calls.                          syswrap-main.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2010 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "libvex_guest_offsets.h"
     32 #include "libvex_trc_values.h"
     33 #include "pub_core_basics.h"
     34 #include "pub_core_aspacemgr.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_vkiscnums.h"
     37 #include "pub_core_threadstate.h"
     38 #include "pub_core_libcbase.h"
     39 #include "pub_core_libcassert.h"
     40 #include "pub_core_libcprint.h"
     41 #include "pub_core_libcproc.h"      // For VG_(getpid)()
     42 #include "pub_core_libcsignal.h"
     43 #include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
     44                                     //   and VG_(vg_yield)
     45 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     46 #include "pub_core_tooliface.h"
     47 #include "pub_core_options.h"
     48 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
     49 #include "pub_core_syscall.h"
     50 #include "pub_core_machine.h"
     51 #include "pub_core_syswrap.h"
     52 
     53 #include "priv_types_n_macros.h"
     54 #include "priv_syswrap-main.h"
     55 
     56 #if defined(VGO_darwin)
     57 #include "priv_syswrap-darwin.h"
     58 #endif
     59 
     60 /* Useful info which needs to be recorded somewhere:
     61    Use of registers in syscalls is:
     62 
     63           NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
     64    LINUX:
     65    x86    eax ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
     66    amd64  rax rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
     67    ppc32  r0  r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
     68    ppc64  r0  r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
     69    arm    r7  r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
     70 
     71    AIX:
     72    ppc32  r2  r3   r4   r5   r6   r7   r8   r9   r10  r3(res),r4(err)
     73    ppc64  r2  r3   r4   r5   r6   r7   r8   r9   r10  r3(res),r4(err)
     74 
     75    DARWIN:
     76    x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
     77    amd64  rax rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
     78 
     79    For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
     80    amd64-darwin.  Apparently 0(%esp) is some kind of return address
     81    (perhaps for syscalls done with "sysenter"?)  I don't think it is
     82    relevant for syscalls done with "int $0x80/1/2".
     83 */
     84 
     85 /* This is the top level of the system-call handler module.  All
     86    system calls are channelled through here, doing two things:
     87 
     88    * notify the tool of the events (mem/reg reads, writes) happening
     89 
     90    * perform the syscall, usually by passing it along to the kernel
     91      unmodified.
     92 
     93    A magical piece of assembly code, do_syscall_for_client_WRK, in
     94    syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
     95    kernel, whilst having the simulator retain control.
     96 */
     97 
     98 /* The main function is VG_(client_syscall).  The simulation calls it
     99    whenever a client thread wants to do a syscall.  The following is a
    100    sketch of what it does.
    101 
    102    * Ensures the root thread's stack is suitably mapped.  Tedious and
    103      arcane.  See big big comment in VG_(client_syscall).
    104 
    105    * First, it rounds up the syscall number and args (which is a
    106      platform dependent activity) and puts them in a struct ("args")
    107      and also a copy in "orig_args".
    108 
    109      The pre/post wrappers refer to these structs and so no longer
    110      need magic macros to access any specific registers.  This struct
    111      is stored in thread-specific storage.
    112 
    113 
    114    * The pre-wrapper is called, passing it a pointer to struct
    115      "args".
    116 
    117 
    118    * The pre-wrapper examines the args and pokes the tool
    119      appropriately.  It may modify the args; this is why "orig_args"
    120      is also stored.
    121 
    122      The pre-wrapper may choose to 'do' the syscall itself, and
    123      concludes one of three outcomes:
    124 
    125        Success(N)    -- syscall is already complete, with success;
    126                         result is N
    127 
    128        Fail(N)       -- syscall is already complete, with failure;
    129                         error code is N
    130 
    131        HandToKernel  -- (the usual case): this needs to be given to
    132                         the kernel to be done, using the values in
    133                         the possibly-modified "args" struct.
    134 
    135      In addition, the pre-wrapper may set some flags:
    136 
    137        MayBlock   -- only applicable when outcome==HandToKernel
    138 
    139        PostOnFail -- only applicable when outcome==HandToKernel or Fail
    140 
    141 
    142    * If the pre-outcome is HandToKernel, the syscall is duly handed
    143      off to the kernel (perhaps involving some thread switchery, but
    144      that's not important).  This reduces the possible set of outcomes
    145      to either Success(N) or Fail(N).
    146 
    147 
    148    * The outcome (Success(N) or Fail(N)) is written back to the guest
    149      register(s).  This is platform specific:
    150 
    151      x86:    Success(N) ==>  eax = N
    152              Fail(N)    ==>  eax = -N
    153 
    154      ditto amd64
    155 
    156      ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
    157              Fail(N) ==>     r3 = N, CR0.SO = 1
    158 
    159      Darwin:
    160      x86:    Success(N) ==>  edx:eax = N, cc = 0
    161              Fail(N)    ==>  edx:eax = N, cc = 1
    162 
    163    * The post wrapper is called if:
    164 
    165      - it exists, and
    166      - outcome==Success or (outcome==Fail and PostOnFail is set)
    167 
    168      The post wrapper is passed the adulterated syscall args (struct
    169      "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
    170 
    171    There are several other complications, primarily to do with
    172    syscalls getting interrupted, explained in comments in the code.
    173 */
    174 
    175 /* CAVEATS for writing wrappers.  It is important to follow these!
    176 
    177    The macros defined in priv_types_n_macros.h are designed to help
    178    decouple the wrapper logic from the actual representation of
    179    syscall args/results, since these wrappers are designed to work on
    180    multiple platforms.
    181 
    182    Sometimes a PRE wrapper will complete the syscall itself, without
    183    handing it to the kernel.  It will use one of SET_STATUS_Success,
    184    SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
    185    value.  It is critical to appreciate that use of the macro does not
    186    immediately cause the underlying guest state to be updated -- that
    187    is done by the driver logic in this file, when the wrapper returns.
    188 
    189    As a result, PRE wrappers of the following form will malfunction:
    190 
    191    PRE(fooble)
    192    {
    193       ... do stuff ...
    194       SET_STATUS_Somehow(...)
    195 
    196       // do something that assumes guest state is up to date
    197    }
    198 
    199    In particular, direct or indirect calls to VG_(poll_signals) after
    200    setting STATUS can cause the guest state to be read (in order to
    201    build signal frames).  Do not do this.  If you want a signal poll
    202    after the syscall goes through, do "*flags |= SfPollAfter" and the
    203    driver logic will do it for you.
    204 
    205    -----------
    206 
    207    Another critical requirement following introduction of new address
    208    space manager (JRS, 20050923):
    209 
    210    In a situation where the mappedness of memory has changed, aspacem
    211    should be notified BEFORE the tool.  Hence the following is
    212    correct:
    213 
    214       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
    215       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
    216       if (d)
    217          VG_(discard_translations)(s->start, s->end+1 - s->start);
    218 
    219    whilst this is wrong:
    220 
    221       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
    222       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
    223       if (d)
    224          VG_(discard_translations)(s->start, s->end+1 - s->start);
    225 
    226    The reason is that the tool may itself ask aspacem for more shadow
    227    memory as a result of the VG_TRACK call.  In such a situation it is
    228    critical that aspacem's segment array is up to date -- hence the
    229    need to notify aspacem first.
    230 
    231    -----------
    232 
    233    Also .. take care to call VG_(discard_translations) whenever
    234    memory with execute permissions is unmapped.
    235 */
    236 
    237 
    238 /* ---------------------------------------------------------------------
    239    Do potentially blocking syscall for the client, and mess with
    240    signal masks at the same time.
    241    ------------------------------------------------------------------ */
    242 
    243 /* Perform a syscall on behalf of a client thread, using a specific
    244    signal mask.  On completion, the signal mask is set to restore_mask
    245    (which presumably blocks almost everything).  If a signal happens
    246    during the syscall, the handler should call
    247    VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
    248    thread's context to do the right thing.
    249 
    250    The _WRK function is handwritten assembly, implemented per-platform
    251    in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
    252    properties.  See comments at the top of
    253    VG_(fixup_guest_state_after_syscall_interrupted) below for details.
    254 
    255    This function (these functions) are required to return zero in case
    256    of success (even if the syscall itself failed), and nonzero if the
    257    sigprocmask-swizzling calls failed.  We don't actually care about
    258    the failure values from sigprocmask, although most of the assembly
    259    implementations do attempt to return that, using the convention
    260    0 for success, or 0x8000 | error-code for failure.
    261 */
    262 #if defined(VGO_linux)
    263 extern
    264 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
    265                                       void* guest_state,
    266                                       const vki_sigset_t *syscall_mask,
    267                                       const vki_sigset_t *restore_mask,
    268                                       Word sigsetSzB );
    269 #elif defined(VGO_aix5)
    270 extern
    271 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
    272                                       void* guest_state,
    273                                       const vki_sigset_t *syscall_mask,
    274                                       const vki_sigset_t *restore_mask,
    275                                       Word sigsetSzB, /* unused */
    276                                       Word __nr_sigprocmask );
    277 #elif defined(VGO_darwin)
    278 extern
    279 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
    280                                            void* guest_state,
    281                                            const vki_sigset_t *syscall_mask,
    282                                            const vki_sigset_t *restore_mask,
    283                                            Word sigsetSzB ); /* unused */
    284 extern
    285 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
    286                                            void* guest_state,
    287                                            const vki_sigset_t *syscall_mask,
    288                                            const vki_sigset_t *restore_mask,
    289                                            Word sigsetSzB ); /* unused */
    290 extern
    291 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
    292                                            void* guest_state,
    293                                            const vki_sigset_t *syscall_mask,
    294                                            const vki_sigset_t *restore_mask,
    295                                            Word sigsetSzB ); /* unused */
    296 #else
    297 #  error "Unknown OS"
    298 #endif
    299 
    300 
    301 static
    302 void do_syscall_for_client ( Int syscallno,
    303                              ThreadState* tst,
    304                              const vki_sigset_t* syscall_mask )
    305 {
    306    vki_sigset_t saved;
    307    UWord err;
    308 #  if defined(VGO_linux)
    309    err = ML_(do_syscall_for_client_WRK)(
    310             syscallno, &tst->arch.vex,
    311             syscall_mask, &saved, sizeof(vki_sigset_t)
    312          );
    313 #  elif defined(VGO_aix5)
    314    err = ML_(do_syscall_for_client_WRK)(
    315             syscallno, &tst->arch.vex,
    316             syscall_mask, &saved, 0/*unused:sigsetSzB*/,
    317             __NR_rt_sigprocmask
    318          );
    319 #  elif defined(VGO_darwin)
    320    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
    321       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    322          err = ML_(do_syscall_for_client_unix_WRK)(
    323                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    324                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    325                );
    326          break;
    327       case VG_DARWIN_SYSCALL_CLASS_MACH:
    328          err = ML_(do_syscall_for_client_mach_WRK)(
    329                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    330                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    331                );
    332          break;
    333       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    334          err = ML_(do_syscall_for_client_mdep_WRK)(
    335                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    336                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    337                );
    338          break;
    339       default:
    340          vg_assert(0);
    341          /*NOTREACHED*/
    342          break;
    343    }
    344 #  else
    345 #    error "Unknown OS"
    346 #  endif
    347    vg_assert2(
    348       err == 0,
    349       "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
    350       (Int)(err & 0xFFF)
    351    );
    352 }
    353 
    354 
    355 /* ---------------------------------------------------------------------
    356    Impedance matchers and misc helpers
    357    ------------------------------------------------------------------ */
    358 
    359 static
    360 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
    361 {
    362    return a1->sysno == a2->sysno
    363           && a1->arg1 == a2->arg1
    364           && a1->arg2 == a2->arg2
    365           && a1->arg3 == a2->arg3
    366           && a1->arg4 == a2->arg4
    367           && a1->arg5 == a2->arg5
    368           && a1->arg6 == a2->arg6
    369           && a1->arg7 == a2->arg7
    370           && a1->arg8 == a2->arg8;
    371 }
    372 
    373 static
    374 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
    375 {
    376    /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
    377    if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres ))
    378       return True;
    379 #  if defined(VGO_darwin)
    380    /* Darwin-specific debugging guff */
    381    vg_assert(s1->what == s2->what);
    382    VG_(printf)("eq_SyscallStatus:\n");
    383    VG_(printf)("  {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
    384    VG_(printf)("  {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
    385    vg_assert(0);
    386 #  endif
    387    return False;
    388 }
    389 
    390 /* Convert between SysRes and SyscallStatus, to the extent possible. */
    391 
    392 static
    393 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
    394 {
    395    SyscallStatus status;
    396    status.what = SsComplete;
    397    status.sres = res;
    398    return status;
    399 }
    400 
    401 
    402 /* Impedance matchers.  These convert syscall arg or result data from
    403    the platform-specific in-guest-state format to the canonical
    404    formats, and back. */
    405 
    406 static
    407 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
    408                                     /*IN*/ VexGuestArchState* gst_vanilla,
    409                                     /*IN*/ UInt trc )
    410 {
    411 #if defined(VGP_x86_linux)
    412    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    413    canonical->sysno = gst->guest_EAX;
    414    canonical->arg1  = gst->guest_EBX;
    415    canonical->arg2  = gst->guest_ECX;
    416    canonical->arg3  = gst->guest_EDX;
    417    canonical->arg4  = gst->guest_ESI;
    418    canonical->arg5  = gst->guest_EDI;
    419    canonical->arg6  = gst->guest_EBP;
    420    canonical->arg7  = 0;
    421    canonical->arg8  = 0;
    422 
    423 #elif defined(VGP_amd64_linux)
    424    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    425    canonical->sysno = gst->guest_RAX;
    426    canonical->arg1  = gst->guest_RDI;
    427    canonical->arg2  = gst->guest_RSI;
    428    canonical->arg3  = gst->guest_RDX;
    429    canonical->arg4  = gst->guest_R10;
    430    canonical->arg5  = gst->guest_R8;
    431    canonical->arg6  = gst->guest_R9;
    432    canonical->arg7  = 0;
    433    canonical->arg8  = 0;
    434 
    435 #elif defined(VGP_ppc32_linux)
    436    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    437    canonical->sysno = gst->guest_GPR0;
    438    canonical->arg1  = gst->guest_GPR3;
    439    canonical->arg2  = gst->guest_GPR4;
    440    canonical->arg3  = gst->guest_GPR5;
    441    canonical->arg4  = gst->guest_GPR6;
    442    canonical->arg5  = gst->guest_GPR7;
    443    canonical->arg6  = gst->guest_GPR8;
    444    canonical->arg7  = 0;
    445    canonical->arg8  = 0;
    446 
    447 #elif defined(VGP_ppc64_linux)
    448    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    449    canonical->sysno = gst->guest_GPR0;
    450    canonical->arg1  = gst->guest_GPR3;
    451    canonical->arg2  = gst->guest_GPR4;
    452    canonical->arg3  = gst->guest_GPR5;
    453    canonical->arg4  = gst->guest_GPR6;
    454    canonical->arg5  = gst->guest_GPR7;
    455    canonical->arg6  = gst->guest_GPR8;
    456    canonical->arg7  = 0;
    457    canonical->arg8  = 0;
    458 
    459 #elif defined(VGP_arm_linux)
    460    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    461    canonical->sysno = gst->guest_R7;
    462    canonical->arg1  = gst->guest_R0;
    463    canonical->arg2  = gst->guest_R1;
    464    canonical->arg3  = gst->guest_R2;
    465    canonical->arg4  = gst->guest_R3;
    466    canonical->arg5  = gst->guest_R4;
    467    canonical->arg6  = gst->guest_R5;
    468    canonical->arg7  = 0;
    469    canonical->arg8  = 0;
    470 
    471 #elif defined(VGP_ppc32_aix5)
    472    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    473    canonical->sysno = gst->guest_GPR2;
    474    canonical->arg1  = gst->guest_GPR3;
    475    canonical->arg2  = gst->guest_GPR4;
    476    canonical->arg3  = gst->guest_GPR5;
    477    canonical->arg4  = gst->guest_GPR6;
    478    canonical->arg5  = gst->guest_GPR7;
    479    canonical->arg6  = gst->guest_GPR8;
    480    canonical->arg7  = gst->guest_GPR9;
    481    canonical->arg8  = gst->guest_GPR10;
    482 
    483 #elif defined(VGP_ppc64_aix5)
    484    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    485    canonical->sysno = gst->guest_GPR2;
    486    canonical->arg1  = gst->guest_GPR3;
    487    canonical->arg2  = gst->guest_GPR4;
    488    canonical->arg3  = gst->guest_GPR5;
    489    canonical->arg4  = gst->guest_GPR6;
    490    canonical->arg5  = gst->guest_GPR7;
    491    canonical->arg6  = gst->guest_GPR8;
    492    canonical->arg7  = gst->guest_GPR9;
    493    canonical->arg8  = gst->guest_GPR10;
    494 
    495 #elif defined(VGP_x86_darwin)
    496    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    497    UWord *stack = (UWord *)gst->guest_ESP;
    498    // GrP fixme hope syscalls aren't called with really shallow stacks...
    499    canonical->sysno = gst->guest_EAX;
    500    if (canonical->sysno != 0) {
    501       // stack[0] is return address
    502       canonical->arg1  = stack[1];
    503       canonical->arg2  = stack[2];
    504       canonical->arg3  = stack[3];
    505       canonical->arg4  = stack[4];
    506       canonical->arg5  = stack[5];
    507       canonical->arg6  = stack[6];
    508       canonical->arg7  = stack[7];
    509       canonical->arg8  = stack[8];
    510    } else {
    511       // GrP fixme hack handle syscall()
    512       // GrP fixme what about __syscall() ?
    513       // stack[0] is return address
    514       // DDD: the tool can't see that the params have been shifted!  Can
    515       //      lead to incorrect checking, I think, because the PRRAn/PSARn
    516       //      macros will mention the pre-shifted args.
    517       canonical->sysno = stack[1];
    518       vg_assert(canonical->sysno != 0);
    519       canonical->arg1  = stack[2];
    520       canonical->arg2  = stack[3];
    521       canonical->arg3  = stack[4];
    522       canonical->arg4  = stack[5];
    523       canonical->arg5  = stack[6];
    524       canonical->arg6  = stack[7];
    525       canonical->arg7  = stack[8];
    526       canonical->arg8  = stack[9];
    527 
    528       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
    529             VG_(getpid)(), /*tid,*/
    530             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
    531    }
    532 
    533    // Here we determine what kind of syscall it was by looking at the
    534    // interrupt kind, and then encode the syscall number using the 64-bit
    535    // encoding for Valgrind's internal use.
    536    //
    537    // DDD: Would it be better to stash the JMP kind into the Darwin
    538    // thread state rather than passing in the trc?
    539    switch (trc) {
    540    case VEX_TRC_JMP_SYS_INT128:
    541       // int $0x80 = Unix, 64-bit result
    542       vg_assert(canonical->sysno >= 0);
    543       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
    544       break;
    545    case VEX_TRC_JMP_SYS_SYSENTER:
    546       // syscall = Unix, 32-bit result
    547       // OR        Mach, 32-bit result
    548       if (canonical->sysno >= 0) {
    549          // GrP fixme hack:  0xffff == I386_SYSCALL_NUMBER_MASK
    550          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
    551                                                              & 0xffff);
    552       } else {
    553          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
    554       }
    555       break;
    556    case VEX_TRC_JMP_SYS_INT129:
    557       // int $0x81 = Mach, 32-bit result
    558       vg_assert(canonical->sysno < 0);
    559       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
    560       break;
    561    case VEX_TRC_JMP_SYS_INT130:
    562       // int $0x82 = mdep, 32-bit result
    563       vg_assert(canonical->sysno >= 0);
    564       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
    565       break;
    566    default:
    567       vg_assert(0);
    568       break;
    569    }
    570 
    571 #elif defined(VGP_amd64_darwin)
    572    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    573    UWord *stack = (UWord *)gst->guest_RSP;
    574 
    575    vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
    576 
    577    // GrP fixme hope syscalls aren't called with really shallow stacks...
    578    canonical->sysno = gst->guest_RAX;
    579    if (canonical->sysno != __NR_syscall) {
    580       // stack[0] is return address
    581       canonical->arg1  = gst->guest_RDI;
    582       canonical->arg2  = gst->guest_RSI;
    583       canonical->arg3  = gst->guest_RDX;
    584       canonical->arg4  = gst->guest_R10;  // not rcx with syscall insn
    585       canonical->arg5  = gst->guest_R8;
    586       canonical->arg6  = gst->guest_R9;
    587       canonical->arg7  = stack[1];
    588       canonical->arg8  = stack[2];
    589    } else {
    590       // GrP fixme hack handle syscall()
    591       // GrP fixme what about __syscall() ?
    592       // stack[0] is return address
    593       // DDD: the tool can't see that the params have been shifted!  Can
    594       //      lead to incorrect checking, I think, because the PRRAn/PSARn
    595       //      macros will mention the pre-shifted args.
    596       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
    597       vg_assert(canonical->sysno != __NR_syscall);
    598       canonical->arg1  = gst->guest_RSI;
    599       canonical->arg2  = gst->guest_RDX;
    600       canonical->arg3  = gst->guest_R10;  // not rcx with syscall insn
    601       canonical->arg4  = gst->guest_R8;
    602       canonical->arg5  = gst->guest_R9;
    603       canonical->arg6  = stack[1];
    604       canonical->arg7  = stack[2];
    605       canonical->arg8  = stack[3];
    606 
    607       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
    608             VG_(getpid)(), /*tid,*/
    609             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
    610    }
    611 
    612    // no canonical->sysno adjustment needed
    613 
    614 #else
    615 #  error "getSyscallArgsFromGuestState: unknown arch"
    616 #endif
    617 }
    618 
    619 static
    620 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
    621                                     /*OUT*/VexGuestArchState* gst_vanilla )
    622 {
    623 #if defined(VGP_x86_linux)
    624    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    625    gst->guest_EAX = canonical->sysno;
    626    gst->guest_EBX = canonical->arg1;
    627    gst->guest_ECX = canonical->arg2;
    628    gst->guest_EDX = canonical->arg3;
    629    gst->guest_ESI = canonical->arg4;
    630    gst->guest_EDI = canonical->arg5;
    631    gst->guest_EBP = canonical->arg6;
    632 
    633 #elif defined(VGP_amd64_linux)
    634    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    635    gst->guest_RAX = canonical->sysno;
    636    gst->guest_RDI = canonical->arg1;
    637    gst->guest_RSI = canonical->arg2;
    638    gst->guest_RDX = canonical->arg3;
    639    gst->guest_R10 = canonical->arg4;
    640    gst->guest_R8  = canonical->arg5;
    641    gst->guest_R9  = canonical->arg6;
    642 
    643 #elif defined(VGP_ppc32_linux)
    644    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    645    gst->guest_GPR0 = canonical->sysno;
    646    gst->guest_GPR3 = canonical->arg1;
    647    gst->guest_GPR4 = canonical->arg2;
    648    gst->guest_GPR5 = canonical->arg3;
    649    gst->guest_GPR6 = canonical->arg4;
    650    gst->guest_GPR7 = canonical->arg5;
    651    gst->guest_GPR8 = canonical->arg6;
    652 
    653 #elif defined(VGP_ppc64_linux)
    654    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    655    gst->guest_GPR0 = canonical->sysno;
    656    gst->guest_GPR3 = canonical->arg1;
    657    gst->guest_GPR4 = canonical->arg2;
    658    gst->guest_GPR5 = canonical->arg3;
    659    gst->guest_GPR6 = canonical->arg4;
    660    gst->guest_GPR7 = canonical->arg5;
    661    gst->guest_GPR8 = canonical->arg6;
    662 
    663 #elif defined(VGP_arm_linux)
    664    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    665    gst->guest_R7 = canonical->sysno;
    666    gst->guest_R0 = canonical->arg1;
    667    gst->guest_R1 = canonical->arg2;
    668    gst->guest_R2 = canonical->arg3;
    669    gst->guest_R3 = canonical->arg4;
    670    gst->guest_R4 = canonical->arg5;
    671    gst->guest_R5 = canonical->arg6;
    672 
    673 #elif defined(VGP_ppc32_aix5)
    674    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    675    gst->guest_GPR2  = canonical->sysno;
    676    gst->guest_GPR3  = canonical->arg1;
    677    gst->guest_GPR4  = canonical->arg2;
    678    gst->guest_GPR5  = canonical->arg3;
    679    gst->guest_GPR6  = canonical->arg4;
    680    gst->guest_GPR7  = canonical->arg5;
    681    gst->guest_GPR8  = canonical->arg6;
    682    gst->guest_GPR9  = canonical->arg7;
    683    gst->guest_GPR10 = canonical->arg8;
    684 
    685 #elif defined(VGP_ppc64_aix5)
    686    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    687    gst->guest_GPR2  = canonical->sysno;
    688    gst->guest_GPR3  = canonical->arg1;
    689    gst->guest_GPR4  = canonical->arg2;
    690    gst->guest_GPR5  = canonical->arg3;
    691    gst->guest_GPR6  = canonical->arg4;
    692    gst->guest_GPR7  = canonical->arg5;
    693    gst->guest_GPR8  = canonical->arg6;
    694    gst->guest_GPR9  = canonical->arg7;
    695    gst->guest_GPR10 = canonical->arg8;
    696 
    697 #elif defined(VGP_x86_darwin)
    698    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    699    UWord *stack = (UWord *)gst->guest_ESP;
    700 
    701    gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
    702 
    703    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
    704    // stack[0] is return address
    705    stack[1] = canonical->arg1;
    706    stack[2] = canonical->arg2;
    707    stack[3] = canonical->arg3;
    708    stack[4] = canonical->arg4;
    709    stack[5] = canonical->arg5;
    710    stack[6] = canonical->arg6;
    711    stack[7] = canonical->arg7;
    712    stack[8] = canonical->arg8;
    713 
    714 #elif defined(VGP_amd64_darwin)
    715    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    716    UWord *stack = (UWord *)gst->guest_RSP;
    717 
    718    gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
    719    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
    720 
    721    // stack[0] is return address
    722    gst->guest_RDI = canonical->arg1;
    723    gst->guest_RSI = canonical->arg2;
    724    gst->guest_RDX = canonical->arg3;
    725    gst->guest_RCX = canonical->arg4;
    726    gst->guest_R8  = canonical->arg5;
    727    gst->guest_R9  = canonical->arg6;
    728    stack[1]       = canonical->arg7;
    729    stack[2]       = canonical->arg8;
    730 
    731 #else
    732 #  error "putSyscallArgsIntoGuestState: unknown arch"
    733 #endif
    734 }
    735 
    736 static
    737 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
    738                                       /*IN*/ VexGuestArchState* gst_vanilla )
    739 {
    740 #  if defined(VGP_x86_linux)
    741    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    742    canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
    743    canonical->what = SsComplete;
    744 
    745 #  elif defined(VGP_amd64_linux)
    746    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    747    canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
    748    canonical->what = SsComplete;
    749 
    750 #  elif defined(VGP_ppc32_linux)
    751    VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
    752    UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
    753    UInt                cr0so = (cr >> 28) & 1;
    754    canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
    755    canonical->what = SsComplete;
    756 
    757 #  elif defined(VGP_ppc64_linux)
    758    VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
    759    UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
    760    UInt                cr0so = (cr >> 28) & 1;
    761    canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
    762    canonical->what = SsComplete;
    763 
    764 #  elif defined(VGP_arm_linux)
    765    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    766    canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
    767    canonical->what = SsComplete;
    768 
    769 #  elif defined(VGP_ppc32_aix5)
    770    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    771    canonical->sres = VG_(mk_SysRes_ppc32_aix5)( gst->guest_GPR3,
    772                                                 gst->guest_GPR4 );
    773    canonical->what = SsComplete;
    774 
    775 #  elif defined(VGP_ppc64_aix5)
    776    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    777    canonical->sres = VG_(mk_SysRes_ppc64_aix5)( gst->guest_GPR3,
    778                                                 gst->guest_GPR4 );
    779    canonical->what = SsComplete;
    780 
    781 #  elif defined(VGP_x86_darwin)
    782    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
    783    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    784    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
    785    UInt err = 0;
    786    UInt wLO = 0;
    787    UInt wHI = 0;
    788    switch (gst->guest_SC_CLASS) {
    789       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    790          // int $0x80 = Unix, 64-bit result
    791          err = carry;
    792          wLO = gst->guest_EAX;
    793          wHI = gst->guest_EDX;
    794          break;
    795       case VG_DARWIN_SYSCALL_CLASS_MACH:
    796          // int $0x81 = Mach, 32-bit result
    797          wLO = gst->guest_EAX;
    798          break;
    799       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    800          // int $0x82 = mdep, 32-bit result
    801          wLO = gst->guest_EAX;
    802          break;
    803       default:
    804          vg_assert(0);
    805          break;
    806    }
    807    canonical->sres = VG_(mk_SysRes_x86_darwin)(
    808                         gst->guest_SC_CLASS, err ? True : False,
    809                         wHI, wLO
    810                      );
    811    canonical->what = SsComplete;
    812 
    813 #  elif defined(VGP_amd64_darwin)
    814    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
    815    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    816    ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
    817    ULong err = 0;
    818    ULong wLO = 0;
    819    ULong wHI = 0;
    820    switch (gst->guest_SC_CLASS) {
    821       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    822          // syscall = Unix, 128-bit result
    823          err = carry;
    824          wLO = gst->guest_RAX;
    825          wHI = gst->guest_RDX;
    826          break;
    827       case VG_DARWIN_SYSCALL_CLASS_MACH:
    828          // syscall = Mach, 64-bit result
    829          wLO = gst->guest_RAX;
    830          break;
    831       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    832          // syscall = mdep, 64-bit result
    833          wLO = gst->guest_RAX;
    834          break;
    835       default:
    836          vg_assert(0);
    837          break;
    838    }
    839    canonical->sres = VG_(mk_SysRes_amd64_darwin)(
    840                         gst->guest_SC_CLASS, err ? True : False,
    841                         wHI, wLO
    842                      );
    843    canonical->what = SsComplete;
    844 
    845 #  else
    846 #    error "getSyscallStatusFromGuestState: unknown arch"
    847 #  endif
    848 }
    849 
    850 static
    851 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
    852                                       /*IN*/ SyscallStatus*     canonical,
    853                                       /*OUT*/VexGuestArchState* gst_vanilla )
    854 {
    855 #  if defined(VGP_x86_linux)
    856    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    857    vg_assert(canonical->what == SsComplete);
    858    if (sr_isError(canonical->sres)) {
    859       /* This isn't exactly right, in that really a Failure with res
    860          not in the range 1 .. 4095 is unrepresentable in the
    861          Linux-x86 scheme.  Oh well. */
    862       gst->guest_EAX = - (Int)sr_Err(canonical->sres);
    863    } else {
    864       gst->guest_EAX = sr_Res(canonical->sres);
    865    }
    866    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    867              OFFSET_x86_EAX, sizeof(UWord) );
    868 
    869 #  elif defined(VGP_amd64_linux)
    870    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    871    vg_assert(canonical->what == SsComplete);
    872    if (sr_isError(canonical->sres)) {
    873       /* This isn't exactly right, in that really a Failure with res
    874          not in the range 1 .. 4095 is unrepresentable in the
    875          Linux-amd64 scheme.  Oh well. */
    876       gst->guest_RAX = - (Long)sr_Err(canonical->sres);
    877    } else {
    878       gst->guest_RAX = sr_Res(canonical->sres);
    879    }
    880    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    881              OFFSET_amd64_RAX, sizeof(UWord) );
    882 
    883 #  elif defined(VGP_ppc32_linux)
    884    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    885    UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
    886    vg_assert(canonical->what == SsComplete);
    887    if (sr_isError(canonical->sres)) {
    888       /* set CR0.SO */
    889       LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
    890       gst->guest_GPR3 = sr_Err(canonical->sres);
    891    } else {
    892       /* clear CR0.SO */
    893       LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
    894       gst->guest_GPR3 = sr_Res(canonical->sres);
    895    }
    896    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    897              OFFSET_ppc32_GPR3, sizeof(UWord) );
    898    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    899              OFFSET_ppc32_CR0_0, sizeof(UChar) );
    900 
    901 #  elif defined(VGP_ppc64_linux)
    902    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    903    UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
    904    vg_assert(canonical->what == SsComplete);
    905    if (sr_isError(canonical->sres)) {
    906       /* set CR0.SO */
    907       LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
    908       gst->guest_GPR3 = sr_Err(canonical->sres);
    909    } else {
    910       /* clear CR0.SO */
    911       LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
    912       gst->guest_GPR3 = sr_Res(canonical->sres);
    913    }
    914    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    915              OFFSET_ppc64_GPR3, sizeof(UWord) );
    916    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    917              OFFSET_ppc64_CR0_0, sizeof(UChar) );
    918 
    919 #  elif defined(VGP_arm_linux)
    920    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    921    vg_assert(canonical->what == SsComplete);
    922    if (sr_isError(canonical->sres)) {
    923       /* This isn't exactly right, in that really a Failure with res
    924          not in the range 1 .. 4095 is unrepresentable in the
    925          Linux-arm scheme.  Oh well. */
    926       gst->guest_R0 = - (Int)sr_Err(canonical->sres);
    927    } else {
    928       gst->guest_R0 = sr_Res(canonical->sres);
    929    }
    930    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    931              OFFSET_arm_R0, sizeof(UWord) );
    932 
    933 #  elif defined(VGP_ppc32_aix5)
    934    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    935    vg_assert(canonical->what == SsComplete);
    936    gst->guest_GPR3 = canonical->sres.res;
    937    gst->guest_GPR4 = canonical->sres.err;
    938    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    939              OFFSET_ppc32_GPR3, sizeof(UWord) );
    940    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    941              OFFSET_ppc32_GPR4, sizeof(UWord) );
    942 
    943 #  elif defined(VGP_ppc64_aix5)
    944    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    945    vg_assert(canonical->what == SsComplete);
    946    gst->guest_GPR3 = canonical->sres.res;
    947    gst->guest_GPR4 = canonical->sres.err;
    948    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    949              OFFSET_ppc64_GPR3, sizeof(UWord) );
    950    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    951              OFFSET_ppc64_GPR4, sizeof(UWord) );
    952 
    953 #elif defined(VGP_x86_darwin)
    954    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    955    SysRes sres = canonical->sres;
    956    vg_assert(canonical->what == SsComplete);
    957    /* Unfortunately here we have to break abstraction and look
    958       directly inside 'res', in order to decide what to do. */
    959    switch (sres._mode) {
    960       case SysRes_MACH: // int $0x81 = Mach, 32-bit result
    961       case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
    962          gst->guest_EAX = sres._wLO;
    963          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    964                    OFFSET_x86_EAX, sizeof(UInt) );
    965          break;
    966       case SysRes_UNIX_OK:  // int $0x80 = Unix, 64-bit result
    967       case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
    968          gst->guest_EAX = sres._wLO;
    969          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    970                    OFFSET_x86_EAX, sizeof(UInt) );
    971          gst->guest_EDX = sres._wHI;
    972          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    973                    OFFSET_x86_EDX, sizeof(UInt) );
    974          LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
    975                                       gst );
    976          // GrP fixme sets defined for entire eflags, not just bit c
    977          // DDD: this breaks exp-ptrcheck.
    978          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    979                    offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
    980          break;
    981       default:
    982          vg_assert(0);
    983          break;
    984    }
    985 
    986 #elif defined(VGP_amd64_darwin)
    987    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    988    SysRes sres = canonical->sres;
    989    vg_assert(canonical->what == SsComplete);
    990    /* Unfortunately here we have to break abstraction and look
    991       directly inside 'res', in order to decide what to do. */
    992    switch (sres._mode) {
    993       case SysRes_MACH: // syscall = Mach, 64-bit result
    994       case SysRes_MDEP: // syscall = mdep, 64-bit result
    995          gst->guest_RAX = sres._wLO;
    996          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    997                    OFFSET_amd64_RAX, sizeof(ULong) );
    998          break;
    999       case SysRes_UNIX_OK:  // syscall = Unix, 128-bit result
   1000       case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
   1001          gst->guest_RAX = sres._wLO;
   1002          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1003                    OFFSET_amd64_RAX, sizeof(ULong) );
   1004          gst->guest_RDX = sres._wHI;
   1005          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1006                    OFFSET_amd64_RDX, sizeof(ULong) );
   1007          LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
   1008                                         gst );
   1009          // GrP fixme sets defined for entire rflags, not just bit c
   1010          // DDD: this breaks exp-ptrcheck.
   1011          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1012                    offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
   1013          break;
   1014       default:
   1015          vg_assert(0);
   1016          break;
   1017    }
   1018 
   1019 #  else
   1020 #    error "putSyscallStatusIntoGuestState: unknown arch"
   1021 #  endif
   1022 }
   1023 
   1024 
   1025 /* Tell me the offsets in the guest state of the syscall params, so
   1026    that the scalar argument checkers don't have to have this info
   1027    hardwired. */
   1028 
   1029 static
   1030 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
   1031 {
   1032 #if defined(VGP_x86_linux)
   1033    layout->o_sysno  = OFFSET_x86_EAX;
   1034    layout->o_arg1   = OFFSET_x86_EBX;
   1035    layout->o_arg2   = OFFSET_x86_ECX;
   1036    layout->o_arg3   = OFFSET_x86_EDX;
   1037    layout->o_arg4   = OFFSET_x86_ESI;
   1038    layout->o_arg5   = OFFSET_x86_EDI;
   1039    layout->o_arg6   = OFFSET_x86_EBP;
   1040    layout->uu_arg7  = -1; /* impossible value */
   1041    layout->uu_arg8  = -1; /* impossible value */
   1042 
   1043 #elif defined(VGP_amd64_linux)
   1044    layout->o_sysno  = OFFSET_amd64_RAX;
   1045    layout->o_arg1   = OFFSET_amd64_RDI;
   1046    layout->o_arg2   = OFFSET_amd64_RSI;
   1047    layout->o_arg3   = OFFSET_amd64_RDX;
   1048    layout->o_arg4   = OFFSET_amd64_R10;
   1049    layout->o_arg5   = OFFSET_amd64_R8;
   1050    layout->o_arg6   = OFFSET_amd64_R9;
   1051    layout->uu_arg7  = -1; /* impossible value */
   1052    layout->uu_arg8  = -1; /* impossible value */
   1053 
   1054 #elif defined(VGP_ppc32_linux)
   1055    layout->o_sysno  = OFFSET_ppc32_GPR0;
   1056    layout->o_arg1   = OFFSET_ppc32_GPR3;
   1057    layout->o_arg2   = OFFSET_ppc32_GPR4;
   1058    layout->o_arg3   = OFFSET_ppc32_GPR5;
   1059    layout->o_arg4   = OFFSET_ppc32_GPR6;
   1060    layout->o_arg5   = OFFSET_ppc32_GPR7;
   1061    layout->o_arg6   = OFFSET_ppc32_GPR8;
   1062    layout->uu_arg7  = -1; /* impossible value */
   1063    layout->uu_arg8  = -1; /* impossible value */
   1064 
   1065 #elif defined(VGP_ppc64_linux)
   1066    layout->o_sysno  = OFFSET_ppc64_GPR0;
   1067    layout->o_arg1   = OFFSET_ppc64_GPR3;
   1068    layout->o_arg2   = OFFSET_ppc64_GPR4;
   1069    layout->o_arg3   = OFFSET_ppc64_GPR5;
   1070    layout->o_arg4   = OFFSET_ppc64_GPR6;
   1071    layout->o_arg5   = OFFSET_ppc64_GPR7;
   1072    layout->o_arg6   = OFFSET_ppc64_GPR8;
   1073    layout->uu_arg7  = -1; /* impossible value */
   1074    layout->uu_arg8  = -1; /* impossible value */
   1075 
   1076 #elif defined(VGP_arm_linux)
   1077    layout->o_sysno  = OFFSET_arm_R7;
   1078    layout->o_arg1   = OFFSET_arm_R0;
   1079    layout->o_arg2   = OFFSET_arm_R1;
   1080    layout->o_arg3   = OFFSET_arm_R2;
   1081    layout->o_arg4   = OFFSET_arm_R3;
   1082    layout->o_arg5   = OFFSET_arm_R4;
   1083    layout->o_arg6   = OFFSET_arm_R5;
   1084    layout->uu_arg7  = -1; /* impossible value */
   1085    layout->uu_arg8  = -1; /* impossible value */
   1086 
   1087 #elif defined(VGP_ppc32_aix5)
   1088    layout->o_sysno  = OFFSET_ppc32_GPR2;
   1089    layout->o_arg1   = OFFSET_ppc32_GPR3;
   1090    layout->o_arg2   = OFFSET_ppc32_GPR4;
   1091    layout->o_arg3   = OFFSET_ppc32_GPR5;
   1092    layout->o_arg4   = OFFSET_ppc32_GPR6;
   1093    layout->o_arg5   = OFFSET_ppc32_GPR7;
   1094    layout->o_arg6   = OFFSET_ppc32_GPR8;
   1095    layout->o_arg7   = OFFSET_ppc32_GPR9;
   1096    layout->o_arg8   = OFFSET_ppc32_GPR10;
   1097 
   1098 #elif defined(VGP_ppc64_aix5)
   1099    layout->o_sysno  = OFFSET_ppc64_GPR2;
   1100    layout->o_arg1   = OFFSET_ppc64_GPR3;
   1101    layout->o_arg2   = OFFSET_ppc64_GPR4;
   1102    layout->o_arg3   = OFFSET_ppc64_GPR5;
   1103    layout->o_arg4   = OFFSET_ppc64_GPR6;
   1104    layout->o_arg5   = OFFSET_ppc64_GPR7;
   1105    layout->o_arg6   = OFFSET_ppc64_GPR8;
   1106    layout->o_arg7   = OFFSET_ppc64_GPR9;
   1107    layout->o_arg8   = OFFSET_ppc64_GPR10;
   1108 
   1109 #elif defined(VGP_x86_darwin)
   1110    layout->o_sysno  = OFFSET_x86_EAX;
   1111    // syscall parameters are on stack in C convention
   1112    layout->s_arg1   = sizeof(UWord) * 1;
   1113    layout->s_arg2   = sizeof(UWord) * 2;
   1114    layout->s_arg3   = sizeof(UWord) * 3;
   1115    layout->s_arg4   = sizeof(UWord) * 4;
   1116    layout->s_arg5   = sizeof(UWord) * 5;
   1117    layout->s_arg6   = sizeof(UWord) * 6;
   1118    layout->s_arg7   = sizeof(UWord) * 7;
   1119    layout->s_arg8   = sizeof(UWord) * 8;
   1120 
   1121 #elif defined(VGP_amd64_darwin)
   1122    layout->o_sysno  = OFFSET_amd64_RAX;
   1123    layout->o_arg1   = OFFSET_amd64_RDI;
   1124    layout->o_arg2   = OFFSET_amd64_RSI;
   1125    layout->o_arg3   = OFFSET_amd64_RDX;
   1126    layout->o_arg4   = OFFSET_amd64_RCX;
   1127    layout->o_arg5   = OFFSET_amd64_R8;
   1128    layout->o_arg6   = OFFSET_amd64_R9;
   1129    layout->s_arg7   = sizeof(UWord) * 1;
   1130    layout->s_arg8   = sizeof(UWord) * 2;
   1131 
   1132 #else
   1133 #  error "getSyscallLayout: unknown arch"
   1134 #endif
   1135 }
   1136 
   1137 
   1138 /* ---------------------------------------------------------------------
   1139    The main driver logic
   1140    ------------------------------------------------------------------ */
   1141 
   1142 /* Finding the handlers for a given syscall, or faking up one
   1143    when no handler is found. */
   1144 
   1145 static
   1146 void bad_before ( ThreadId              tid,
   1147                   SyscallArgLayout*     layout,
   1148                   /*MOD*/SyscallArgs*   args,
   1149                   /*OUT*/SyscallStatus* status,
   1150                   /*OUT*/UWord*         flags )
   1151 {
   1152    VG_(dmsg)("WARNING: unhandled syscall: %s\n",
   1153       VG_SYSNUM_STRING_EXTRA(args->sysno));
   1154    if (VG_(clo_verbosity) > 1) {
   1155       VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1156    }
   1157    VG_(dmsg)("You may be able to write your own handler.\n");
   1158    VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
   1159    VG_(dmsg)("Nevertheless we consider this a bug.  Please report\n");
   1160    VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
   1161 
   1162    SET_STATUS_Failure(VKI_ENOSYS);
   1163 }
   1164 
   1165 static SyscallTableEntry bad_sys =
   1166    { bad_before, NULL };
   1167 
   1168 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
   1169 {
   1170    const SyscallTableEntry* sys = NULL;
   1171 
   1172 #  if defined(VGO_linux)
   1173    sys = ML_(get_linux_syscall_entry)( syscallno );
   1174 
   1175 #  elif defined(VGP_ppc32_aix5)
   1176    sys = ML_(get_ppc32_aix5_syscall_entry) ( syscallno );
   1177 
   1178 #  elif defined(VGP_ppc64_aix5)
   1179    sys = ML_(get_ppc64_aix5_syscall_entry) ( syscallno );
   1180 
   1181 #  elif defined(VGO_darwin)
   1182    Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
   1183 
   1184    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
   1185    case VG_DARWIN_SYSCALL_CLASS_UNIX:
   1186       if (idx >= 0 && idx < ML_(syscall_table_size) &&
   1187           ML_(syscall_table)[idx].before != NULL)
   1188          sys = &ML_(syscall_table)[idx];
   1189          break;
   1190    case VG_DARWIN_SYSCALL_CLASS_MACH:
   1191       if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
   1192           ML_(mach_trap_table)[idx].before != NULL)
   1193          sys = &ML_(mach_trap_table)[idx];
   1194          break;
   1195    case VG_DARWIN_SYSCALL_CLASS_MDEP:
   1196       if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
   1197           ML_(mdep_trap_table)[idx].before != NULL)
   1198          sys = &ML_(mdep_trap_table)[idx];
   1199          break;
   1200    default:
   1201       vg_assert(0);
   1202       break;
   1203    }
   1204 
   1205 #  else
   1206 #    error Unknown OS
   1207 #  endif
   1208 
   1209    return sys == NULL  ? &bad_sys  : sys;
   1210 }
   1211 
   1212 
   1213 /* Add and remove signals from mask so that we end up telling the
   1214    kernel the state we actually want rather than what the client
   1215    wants. */
   1216 static void sanitize_client_sigmask(vki_sigset_t *mask)
   1217 {
   1218    VG_(sigdelset)(mask, VKI_SIGKILL);
   1219    VG_(sigdelset)(mask, VKI_SIGSTOP);
   1220    VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
   1221 }
   1222 
   1223 typedef
   1224    struct {
   1225       SyscallArgs   orig_args;
   1226       SyscallArgs   args;
   1227       SyscallStatus status;
   1228       UWord         flags;
   1229    }
   1230    SyscallInfo;
   1231 
   1232 SyscallInfo syscallInfo[VG_N_THREADS];
   1233 
   1234 
   1235 /* The scheduler needs to be able to zero out these records after a
   1236    fork, hence this is exported from m_syswrap. */
   1237 void VG_(clear_syscallInfo) ( Int tid )
   1238 {
   1239    vg_assert(tid >= 0 && tid < VG_N_THREADS);
   1240    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
   1241    syscallInfo[tid].status.what = SsIdle;
   1242 }
   1243 
   1244 static void ensure_initialised ( void )
   1245 {
   1246    Int i;
   1247    static Bool init_done = False;
   1248    if (init_done)
   1249       return;
   1250    init_done = True;
   1251    for (i = 0; i < VG_N_THREADS; i++) {
   1252       VG_(clear_syscallInfo)( i );
   1253    }
   1254 }
   1255 
   1256 /* --- This is the main function of this file. --- */
   1257 
   1258 void VG_(client_syscall) ( ThreadId tid, UInt trc )
   1259 {
   1260    Word                     sysno;
   1261    ThreadState*             tst;
   1262    const SyscallTableEntry* ent;
   1263    SyscallArgLayout         layout;
   1264    SyscallInfo*             sci;
   1265 
   1266    ensure_initialised();
   1267 
   1268    vg_assert(VG_(is_valid_tid)(tid));
   1269    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   1270    vg_assert(VG_(is_running_thread)(tid));
   1271 
   1272    tst = VG_(get_ThreadState)(tid);
   1273 
   1274    /* BEGIN ensure root thread's stack is suitably mapped */
   1275    /* In some rare circumstances, we may do the syscall without the
   1276       bottom page of the stack being mapped, because the stack pointer
   1277       was moved down just a few instructions before the syscall
   1278       instruction, and there have been no memory references since
   1279       then, that would cause a call to VG_(extend_stack) to have
   1280       happened.
   1281 
   1282       In native execution that's OK: the kernel automagically extends
   1283       the stack's mapped area down to cover the stack pointer (or sp -
   1284       redzone, really).  In simulated normal execution that's OK too,
   1285       since any signals we get from accessing below the mapped area of
   1286       the (guest's) stack lead us to VG_(extend_stack), where we
   1287       simulate the kernel's stack extension logic.  But that leaves
   1288       the problem of entering a syscall with the SP unmapped.  Because
   1289       the kernel doesn't know that the segment immediately above SP is
   1290       supposed to be a grow-down segment, it causes the syscall to
   1291       fail, and thereby causes a divergence between native behaviour
   1292       (syscall succeeds) and simulated behaviour (syscall fails).
   1293 
   1294       This is quite a rare failure mode.  It has only been seen
   1295       affecting calls to sys_readlink on amd64-linux, and even then it
   1296       requires a certain code sequence around the syscall to trigger
   1297       it.  Here is one:
   1298 
   1299       extern int my_readlink ( const char* path );
   1300       asm(
   1301       ".text\n"
   1302       ".globl my_readlink\n"
   1303       "my_readlink:\n"
   1304       "\tsubq    $0x1008,%rsp\n"
   1305       "\tmovq    %rdi,%rdi\n"              // path is in rdi
   1306       "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
   1307       "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
   1308       "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
   1309       "\tsyscall\n"
   1310       "\taddq    $0x1008,%rsp\n"
   1311       "\tret\n"
   1312       ".previous\n"
   1313       );
   1314 
   1315       For more details, see bug #156404
   1316       (https://bugs.kde.org/show_bug.cgi?id=156404).
   1317 
   1318       The fix is actually very simple.  We simply need to call
   1319       VG_(extend_stack) for this thread, handing it the lowest
   1320       possible valid address for stack (sp - redzone), to ensure the
   1321       pages all the way down to that address, are mapped.  Because
   1322       this is a potentially expensive and frequent operation, we
   1323       filter in two ways:
   1324 
   1325       First, only the main thread (tid=1) has a growdown stack.  So
   1326       ignore all others.  It is conceivable, although highly unlikely,
   1327       that the main thread exits, and later another thread is
   1328       allocated tid=1, but that's harmless, I believe;
   1329       VG_(extend_stack) will do nothing when applied to a non-root
   1330       thread.
   1331 
   1332       Secondly, first call VG_(am_find_nsegment) directly, to see if
   1333       the page holding (sp - redzone) is mapped correctly.  If so, do
   1334       nothing.  This is almost always the case.  VG_(extend_stack)
   1335       calls VG_(am_find_nsegment) twice, so this optimisation -- and
   1336       that's all it is -- more or less halves the number of calls to
   1337       VG_(am_find_nsegment) required.
   1338 
   1339       TODO: the test "seg->kind == SkAnonC" is really inadequate,
   1340       because although it tests whether the segment is mapped
   1341       _somehow_, it doesn't check that it has the right permissions
   1342       (r,w, maybe x) ?  We could test that here, but it will also be
   1343       necessary to fix the corresponding test in VG_(extend_stack).
   1344 
   1345       All this guff is of course Linux-specific.  Hence the ifdef.
   1346    */
   1347 #  if defined(VGO_linux)
   1348    if (tid == 1/*ROOT THREAD*/) {
   1349       Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
   1350       NSegment const* seg = VG_(am_find_nsegment)(stackMin);
   1351       if (seg && seg->kind == SkAnonC) {
   1352          /* stackMin is already mapped.  Nothing to do. */
   1353       } else {
   1354          (void)VG_(extend_stack)( stackMin,
   1355                                   tst->client_stack_szB );
   1356       }
   1357    }
   1358 #  endif
   1359    /* END ensure root thread's stack is suitably mapped */
   1360 
   1361    /* First off, get the syscall args and number.  This is a
   1362       platform-dependent action. */
   1363 
   1364    sci = & syscallInfo[tid];
   1365    vg_assert(sci->status.what == SsIdle);
   1366 
   1367    getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
   1368 
   1369    /* Copy .orig_args to .args.  The pre-handler may modify .args, but
   1370       we want to keep the originals too, just in case. */
   1371    sci->args = sci->orig_args;
   1372 
   1373    /* Save the syscall number in the thread state in case the syscall
   1374       is interrupted by a signal. */
   1375    sysno = sci->orig_args.sysno;
   1376 
   1377 #  if defined(VGO_darwin)
   1378    /* Record syscall class.  But why?  Because the syscall might be
   1379       interrupted by a signal, and in the signal handler (which will
   1380       be m_signals.async_signalhandler) we will need to build a SysRes
   1381       reflecting the syscall return result.  In order to do that we
   1382       need to know the syscall class.  Hence stash it in the guest
   1383       state of this thread.  This madness is not needed on Linux or
   1384       AIX5, because those OSs only have a single syscall return
   1385       convention and so there is no ambiguity involved in converting
   1386       the post-signal machine state into a SysRes. */
   1387    tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
   1388 #  endif
   1389 
   1390    /* The default what-to-do-next thing is hand the syscall to the
   1391       kernel, so we pre-set that here.  Set .sres to something
   1392       harmless looking (is irrelevant because .what is not
   1393       SsComplete.) */
   1394    sci->status.what = SsHandToKernel;
   1395    sci->status.sres = VG_(mk_SysRes_Error)(0);
   1396    sci->flags       = 0;
   1397 
   1398    /* Fetch the syscall's handlers.  If no handlers exist for this
   1399       syscall, we are given dummy handlers which force an immediate
   1400       return with ENOSYS. */
   1401    ent = get_syscall_entry(sysno);
   1402 
   1403    /* Fetch the layout information, which tells us where in the guest
   1404       state the syscall args reside.  This is a platform-dependent
   1405       action.  This info is needed so that the scalar syscall argument
   1406       checks (PRE_REG_READ calls) know which bits of the guest state
   1407       they need to inspect. */
   1408    getSyscallArgLayout( &layout );
   1409 
   1410    /* Make sure the tmp signal mask matches the real signal mask;
   1411       sigsuspend may change this. */
   1412    vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
   1413 
   1414    /* Right, we're finally ready to Party.  Call the pre-handler and
   1415       see what we get back.  At this point:
   1416 
   1417         sci->status.what  is Unset (we don't know yet).
   1418         sci->orig_args    contains the original args.
   1419         sci->args         is the same as sci->orig_args.
   1420         sci->flags        is zero.
   1421    */
   1422 
   1423    PRINT("SYSCALL[%d,%d](%s) ",
   1424       VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
   1425 
   1426    /* Do any pre-syscall actions */
   1427    if (VG_(needs).syscall_wrapper) {
   1428       UWord tmpv[8];
   1429       tmpv[0] = sci->orig_args.arg1;
   1430       tmpv[1] = sci->orig_args.arg2;
   1431       tmpv[2] = sci->orig_args.arg3;
   1432       tmpv[3] = sci->orig_args.arg4;
   1433       tmpv[4] = sci->orig_args.arg5;
   1434       tmpv[5] = sci->orig_args.arg6;
   1435       tmpv[6] = sci->orig_args.arg7;
   1436       tmpv[7] = sci->orig_args.arg8;
   1437       VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
   1438                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
   1439    }
   1440 
   1441    vg_assert(ent);
   1442    vg_assert(ent->before);
   1443    (ent->before)( tid,
   1444                   &layout,
   1445                   &sci->args, &sci->status, &sci->flags );
   1446 
   1447    /* The pre-handler may have modified:
   1448          sci->args
   1449          sci->status
   1450          sci->flags
   1451       All else remains unchanged.
   1452       Although the args may be modified, pre handlers are not allowed
   1453       to change the syscall number.
   1454    */
   1455    /* Now we proceed according to what the pre-handler decided. */
   1456    vg_assert(sci->status.what == SsHandToKernel
   1457              || sci->status.what == SsComplete);
   1458    vg_assert(sci->args.sysno == sci->orig_args.sysno);
   1459 
   1460    if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
   1461       /* The pre-handler completed the syscall itself, declaring
   1462          success. */
   1463       if (sci->flags & SfNoWriteResult) {
   1464          PRINT(" --> [pre-success] NoWriteResult");
   1465       } else {
   1466          PRINT(" --> [pre-success] Success(0x%llx:0x%llx)",
   1467                (ULong)sr_ResHI(sci->status.sres),
   1468                (ULong)sr_Res(sci->status.sres));
   1469       }
   1470       /* In this case the allowable flags are to ask for a signal-poll
   1471          and/or a yield after the call.  Changing the args isn't
   1472          allowed. */
   1473       vg_assert(0 == (sci->flags
   1474                       & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
   1475       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1476    }
   1477 
   1478    else
   1479    if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
   1480       /* The pre-handler decided to fail syscall itself. */
   1481       PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres));
   1482       /* In this case, the pre-handler is also allowed to ask for the
   1483          post-handler to be run anyway.  Changing the args is not
   1484          allowed. */
   1485       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
   1486       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1487    }
   1488 
   1489    else
   1490    if (sci->status.what != SsHandToKernel) {
   1491       /* huh?! */
   1492       vg_assert(0);
   1493    }
   1494 
   1495    else /* (sci->status.what == HandToKernel) */ {
   1496       /* Ok, this is the usual case -- and the complicated one.  There
   1497          are two subcases: sync and async.  async is the general case
   1498          and is to be used when there is any possibility that the
   1499          syscall might block [a fact that the pre-handler must tell us
   1500          via the sci->flags field.]  Because the tidying-away /
   1501          context-switch overhead of the async case could be large, if
   1502          we are sure that the syscall will not block, we fast-track it
   1503          by doing it directly in this thread, which is a lot
   1504          simpler. */
   1505 
   1506       /* Check that the given flags are allowable: MayBlock, PollAfter
   1507          and PostOnFail are ok. */
   1508       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
   1509 
   1510       if (sci->flags & SfMayBlock) {
   1511 
   1512          /* Syscall may block, so run it asynchronously */
   1513          vki_sigset_t mask;
   1514 
   1515          PRINT(" --> [async] ... \n");
   1516 
   1517          mask = tst->sig_mask;
   1518          sanitize_client_sigmask(&mask);
   1519 
   1520          /* Gack.  More impedance matching.  Copy the possibly
   1521             modified syscall args back into the guest state. */
   1522          /* JRS 2009-Mar-16: if the syscall args are possibly modified,
   1523             then this assertion is senseless:
   1524               vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1525             The case that exposed it was sys_posix_spawn on Darwin,
   1526             which heavily modifies its arguments but then lets the call
   1527             go through anyway, with SfToBlock set, hence we end up here. */
   1528          putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
   1529 
   1530          /* Drop the bigLock */
   1531          VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
   1532          /* Urr.  We're now in a race against other threads trying to
   1533             acquire the bigLock.  I guess that doesn't matter provided
   1534             that do_syscall_for_client only touches thread-local
   1535             state. */
   1536 
   1537          /* Do the call, which operates directly on the guest state,
   1538             not on our abstracted copies of the args/result. */
   1539          do_syscall_for_client(sysno, tst, &mask);
   1540 
   1541          /* do_syscall_for_client may not return if the syscall was
   1542             interrupted by a signal.  In that case, flow of control is
   1543             first to m_signals.async_sighandler, which calls
   1544             VG_(fixup_guest_state_after_syscall_interrupted), which
   1545             fixes up the guest state, and possibly calls
   1546             VG_(post_syscall).  Once that's done, control drops back
   1547             to the scheduler.  */
   1548 
   1549          /* Darwin: do_syscall_for_client may not return if the
   1550             syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
   1551             responded by starting the thread at wqthread_hijack(reuse=1)
   1552             (to run another workqueue item). In that case, wqthread_hijack
   1553             calls ML_(wqthread_continue), which is similar to
   1554             VG_(fixup_guest_state_after_syscall_interrupted). */
   1555 
   1556          /* Reacquire the lock */
   1557          VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
   1558 
   1559          /* Even more impedance matching.  Extract the syscall status
   1560             from the guest state. */
   1561          getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
   1562          vg_assert(sci->status.what == SsComplete);
   1563 
   1564          /* Be decorative, if required. */
   1565          if (VG_(clo_trace_syscalls)) {
   1566             Bool failed = sr_isError(sci->status.sres);
   1567             if (failed) {
   1568                PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)",
   1569                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
   1570                      (ULong)sr_Err(sci->status.sres));
   1571             } else {
   1572                PRINT("SYSCALL[%d,%d](%s) ... [async] --> "
   1573                      "Success(0x%llx:0x%llx)",
   1574                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
   1575                      (ULong)sr_ResHI(sci->status.sres),
   1576                      (ULong)sr_Res(sci->status.sres) );
   1577             }
   1578          }
   1579 
   1580       } else {
   1581 
   1582          /* run the syscall directly */
   1583          /* The pre-handler may have modified the syscall args, but
   1584             since we're passing values in ->args directly to the
   1585             kernel, there's no point in flushing them back to the
   1586             guest state.  Indeed doing so could be construed as
   1587             incorrect. */
   1588          SysRes sres
   1589             = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
   1590                                      sci->args.arg3, sci->args.arg4,
   1591                                      sci->args.arg5, sci->args.arg6,
   1592                                      sci->args.arg7, sci->args.arg8 );
   1593          sci->status = convert_SysRes_to_SyscallStatus(sres);
   1594 
   1595          /* Be decorative, if required. */
   1596          if (VG_(clo_trace_syscalls)) {
   1597             Bool failed = sr_isError(sci->status.sres);
   1598             if (failed) {
   1599                PRINT("[sync] --> Failure(0x%llx)",
   1600                      (ULong)sr_Err(sci->status.sres) );
   1601             } else {
   1602                PRINT("[sync] --> Success(0x%llx:0x%llx)",
   1603                      (ULong)sr_ResHI(sci->status.sres),
   1604                      (ULong)sr_Res(sci->status.sres) );
   1605             }
   1606          }
   1607       }
   1608    }
   1609 
   1610    vg_assert(sci->status.what == SsComplete);
   1611 
   1612    vg_assert(VG_(is_running_thread)(tid));
   1613 
   1614    /* Dump the syscall result back in the guest state.  This is
   1615       a platform-specific action. */
   1616    if (!(sci->flags & SfNoWriteResult))
   1617       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
   1618 
   1619    /* Situation now:
   1620       - the guest state is now correctly modified following the syscall
   1621       - modified args, original args and syscall status are still
   1622         available in the syscallInfo[] entry for this syscall.
   1623 
   1624       Now go on to do the post-syscall actions (read on down ..)
   1625    */
   1626    PRINT(" ");
   1627    VG_(post_syscall)(tid);
   1628    PRINT("\n");
   1629 }
   1630 
   1631 
   1632 /* Perform post syscall actions.  The expected state on entry is
   1633    precisely as at the end of VG_(client_syscall), that is:
   1634 
   1635    - guest state up to date following the syscall
   1636    - modified args, original args and syscall status are still
   1637      available in the syscallInfo[] entry for this syscall.
   1638    - syscall status matches what's in the guest state.
   1639 
   1640    There are two ways to get here: the normal way -- being called by
   1641    VG_(client_syscall), and the unusual way, from
   1642    VG_(fixup_guest_state_after_syscall_interrupted).
   1643    Darwin: there's a third way, ML_(wqthread_continue).
   1644 */
   1645 void VG_(post_syscall) (ThreadId tid)
   1646 {
   1647    SyscallInfo*             sci;
   1648    const SyscallTableEntry* ent;
   1649    SyscallStatus            test_status;
   1650    ThreadState*             tst;
   1651    Word sysno;
   1652 
   1653    /* Preliminaries */
   1654    vg_assert(VG_(is_valid_tid)(tid));
   1655    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   1656    vg_assert(VG_(is_running_thread)(tid));
   1657 
   1658    tst = VG_(get_ThreadState)(tid);
   1659    sci = & syscallInfo[tid];
   1660 
   1661    /* m_signals.sigvgkill_handler might call here even when not in
   1662       a syscall. */
   1663    if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
   1664       sci->status.what = SsIdle;
   1665       return;
   1666    }
   1667 
   1668    /* Validate current syscallInfo entry.  In particular we require
   1669       that the current .status matches what's actually in the guest
   1670       state.  At least in the normal case where we have actually
   1671       previously written the result into the guest state. */
   1672    vg_assert(sci->status.what == SsComplete);
   1673 
   1674    getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
   1675    if (!(sci->flags & SfNoWriteResult))
   1676       vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
   1677    /* Failure of the above assertion on Darwin can indicate a problem
   1678       in the syscall wrappers that pre-fail or pre-succeed the
   1679       syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
   1680       when they really should call SET_STATUS_from_SysRes.  The former
   1681       create a UNIX-class syscall result on Darwin, which may not be
   1682       correct for the syscall; if that's the case then this assertion
   1683       fires.  See PRE(thread_fast_set_cthread_self) for an example.  On
   1684       non-Darwin platforms this assertion is should never fail, and this
   1685       comment is completely irrelevant. */
   1686    /* Ok, looks sane */
   1687 
   1688    /* Get the system call number.  Because the pre-handler isn't
   1689       allowed to mess with it, it should be the same for both the
   1690       original and potentially-modified args. */
   1691    vg_assert(sci->args.sysno == sci->orig_args.sysno);
   1692    sysno = sci->args.sysno;
   1693    ent = get_syscall_entry(sysno);
   1694 
   1695    /* pre: status == Complete (asserted above) */
   1696    /* Consider either success or failure.  Now run the post handler if:
   1697       - it exists, and
   1698       - Success or (Failure and PostOnFail is set)
   1699    */
   1700    if (ent->after
   1701        && ((!sr_isError(sci->status.sres))
   1702            || (sr_isError(sci->status.sres)
   1703                && (sci->flags & SfPostOnFail) ))) {
   1704 
   1705       (ent->after)( tid, &sci->args, &sci->status );
   1706    }
   1707 
   1708    /* Because the post handler might have changed the status (eg, the
   1709       post-handler for sys_open can change the result from success to
   1710       failure if the kernel supplied a fd that it doesn't like), once
   1711       again dump the syscall result back in the guest state.*/
   1712    if (!(sci->flags & SfNoWriteResult))
   1713       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
   1714 
   1715    /* Do any post-syscall actions required by the tool. */
   1716    if (VG_(needs).syscall_wrapper) {
   1717       UWord tmpv[8];
   1718       tmpv[0] = sci->orig_args.arg1;
   1719       tmpv[1] = sci->orig_args.arg2;
   1720       tmpv[2] = sci->orig_args.arg3;
   1721       tmpv[3] = sci->orig_args.arg4;
   1722       tmpv[4] = sci->orig_args.arg5;
   1723       tmpv[5] = sci->orig_args.arg6;
   1724       tmpv[6] = sci->orig_args.arg7;
   1725       tmpv[7] = sci->orig_args.arg8;
   1726       VG_TDICT_CALL(tool_post_syscall, tid,
   1727                     sysno,
   1728                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
   1729                     sci->status.sres);
   1730    }
   1731 
   1732    /* The syscall is done. */
   1733    vg_assert(sci->status.what == SsComplete);
   1734    sci->status.what = SsIdle;
   1735 
   1736    /* The pre/post wrappers may have concluded that pending signals
   1737       might have been created, and will have set SfPollAfter to
   1738       request a poll for them once the syscall is done. */
   1739    if (sci->flags & SfPollAfter)
   1740       VG_(poll_signals)(tid);
   1741 
   1742    /* Similarly, the wrappers might have asked for a yield
   1743       afterwards. */
   1744    if (sci->flags & SfYieldAfter)
   1745       VG_(vg_yield)();
   1746 }
   1747 
   1748 
   1749 /* ---------------------------------------------------------------------
   1750    Dealing with syscalls which get interrupted by a signal:
   1751    VG_(fixup_guest_state_after_syscall_interrupted)
   1752    ------------------------------------------------------------------ */
   1753 
   1754 /* Syscalls done on behalf of the client are finally handed off to the
   1755    kernel in VG_(client_syscall) above, either by calling
   1756    do_syscall_for_client (the async case), or by calling
   1757    VG_(do_syscall6) (the sync case).
   1758 
   1759    If the syscall is not interrupted by a signal (it may block and
   1760    later unblock, but that's irrelevant here) then those functions
   1761    eventually return and so control is passed to VG_(post_syscall).
   1762    NB: not sure if the sync case can actually get interrupted, as it
   1763    operates with all signals masked.
   1764 
   1765    However, the syscall may get interrupted by an async-signal.  In
   1766    that case do_syscall_for_client/VG_(do_syscall6) do not
   1767    return.  Instead we wind up in m_signals.async_sighandler.  We need
   1768    to fix up the guest state to make it look like the syscall was
   1769    interrupted for guest.  So async_sighandler calls here, and this
   1770    does the fixup.  Note that from here we wind up calling
   1771    VG_(post_syscall) too.
   1772 */
   1773 
   1774 
   1775 /* These are addresses within ML_(do_syscall_for_client_WRK).  See
   1776    syscall-$PLAT.S for details.
   1777 */
   1778 #if defined(VGO_linux) || defined(VGO_aix5)
   1779   extern const Addr ML_(blksys_setup);
   1780   extern const Addr ML_(blksys_restart);
   1781   extern const Addr ML_(blksys_complete);
   1782   extern const Addr ML_(blksys_committed);
   1783   extern const Addr ML_(blksys_finished);
   1784 #elif defined(VGO_darwin)
   1785   /* Darwin requires extra uglyness */
   1786   extern const Addr ML_(blksys_setup_MACH);
   1787   extern const Addr ML_(blksys_restart_MACH);
   1788   extern const Addr ML_(blksys_complete_MACH);
   1789   extern const Addr ML_(blksys_committed_MACH);
   1790   extern const Addr ML_(blksys_finished_MACH);
   1791   extern const Addr ML_(blksys_setup_MDEP);
   1792   extern const Addr ML_(blksys_restart_MDEP);
   1793   extern const Addr ML_(blksys_complete_MDEP);
   1794   extern const Addr ML_(blksys_committed_MDEP);
   1795   extern const Addr ML_(blksys_finished_MDEP);
   1796   extern const Addr ML_(blksys_setup_UNIX);
   1797   extern const Addr ML_(blksys_restart_UNIX);
   1798   extern const Addr ML_(blksys_complete_UNIX);
   1799   extern const Addr ML_(blksys_committed_UNIX);
   1800   extern const Addr ML_(blksys_finished_UNIX);
   1801 #else
   1802 # error "Unknown OS"
   1803 #endif
   1804 
   1805 
   1806 /* Back up guest state to restart a system call. */
   1807 
   1808 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
   1809 {
   1810 #if defined(VGP_x86_linux)
   1811    arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
   1812 
   1813    /* Make sure our caller is actually sane, and we're really backing
   1814       back over a syscall.
   1815 
   1816       int $0x80 == CD 80
   1817    */
   1818    {
   1819       UChar *p = (UChar *)arch->vex.guest_EIP;
   1820 
   1821       if (p[0] != 0xcd || p[1] != 0x80)
   1822          VG_(message)(Vg_DebugMsg,
   1823                       "?! restarting over syscall at %#x %02x %02x\n",
   1824                       arch->vex.guest_EIP, p[0], p[1]);
   1825 
   1826       vg_assert(p[0] == 0xcd && p[1] == 0x80);
   1827    }
   1828 
   1829 #elif defined(VGP_amd64_linux)
   1830    arch->vex.guest_RIP -= 2;             // sizeof(syscall)
   1831 
   1832    /* Make sure our caller is actually sane, and we're really backing
   1833       back over a syscall.
   1834 
   1835       syscall == 0F 05
   1836    */
   1837    {
   1838       UChar *p = (UChar *)arch->vex.guest_RIP;
   1839 
   1840       if (p[0] != 0x0F || p[1] != 0x05)
   1841          VG_(message)(Vg_DebugMsg,
   1842                       "?! restarting over syscall at %#llx %02x %02x\n",
   1843                       arch->vex.guest_RIP, p[0], p[1]);
   1844 
   1845       vg_assert(p[0] == 0x0F && p[1] == 0x05);
   1846    }
   1847 
   1848 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
   1849    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
   1850 
   1851    /* Make sure our caller is actually sane, and we're really backing
   1852       back over a syscall.
   1853 
   1854       sc == 44 00 00 02
   1855    */
   1856    {
   1857       UChar *p = (UChar *)arch->vex.guest_CIA;
   1858 
   1859       if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
   1860          VG_(message)(Vg_DebugMsg,
   1861                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
   1862                       arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
   1863 
   1864       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
   1865    }
   1866 
   1867 #elif defined(VGP_arm_linux)
   1868    if (arch->vex.guest_R15T & 1) {
   1869       // Thumb mode.  SVC is a encoded as
   1870       //   1101 1111 imm8
   1871       // where imm8 is the SVC number, and we only accept 0.
   1872       arch->vex.guest_R15T -= 2;   // sizeof(thumb 16 bit insn)
   1873       UChar* p     = (UChar*)(arch->vex.guest_R15T - 1);
   1874       Bool   valid = p[0] == 0 && p[1] == 0xDF;
   1875       if (!valid) {
   1876          VG_(message)(Vg_DebugMsg,
   1877                       "?! restarting over (Thumb) syscall that is not syscall "
   1878                       "at %#llx %02x %02x\n",
   1879                       arch->vex.guest_R15T - 1ULL, p[0], p[1]);
   1880       }
   1881       vg_assert(valid);
   1882       // FIXME: NOTE, this really isn't right.  We need to back up
   1883       // ITSTATE to what it was before the SVC instruction, but we
   1884       // don't know what it was.  At least assert that it is now
   1885       // zero, because if it is nonzero then it must also have
   1886       // been nonzero for the SVC itself, which means it was
   1887       // conditional.  Urk.
   1888       vg_assert(arch->vex.guest_ITSTATE == 0);
   1889    } else {
   1890       // ARM mode.  SVC is encoded as
   1891       //   cond 1111 imm24
   1892       // where imm24 is the SVC number, and we only accept 0.
   1893       arch->vex.guest_R15T -= 4;   // sizeof(arm instr)
   1894       UChar* p     = (UChar*)arch->vex.guest_R15T;
   1895       Bool   valid = p[0] == 0 && p[1] == 0 && p[2] == 0
   1896                      && (p[3] & 0xF) == 0xF;
   1897       if (!valid) {
   1898          VG_(message)(Vg_DebugMsg,
   1899                       "?! restarting over (ARM) syscall that is not syscall "
   1900                       "at %#llx %02x %02x %02x %02x\n",
   1901                       arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]);
   1902       }
   1903       vg_assert(valid);
   1904    }
   1905 
   1906 #elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
   1907    /* Hmm.  This is problematic, because on AIX the kernel resumes
   1908       after a syscall at LR, not at the insn following SC.  Hence
   1909       there is no obvious way to figure out where the SC is.  Current
   1910       solution is to have a pseudo-register in the guest state,
   1911       CIA_AT_SC, which holds the address of the most recent SC
   1912       executed.  Backing up to that syscall then simply involves
   1913       copying that value back into CIA (the program counter). */
   1914    arch->vex.guest_CIA = arch->vex.guest_CIA_AT_SC;
   1915 
   1916    /* Make sure our caller is actually sane, and we're really backing
   1917       back over a syscall.
   1918 
   1919       sc == 44 00 00 02
   1920    */
   1921    {
   1922       UChar *p = (UChar *)arch->vex.guest_CIA;
   1923 
   1924       if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
   1925          VG_(message)(Vg_DebugMsg,
   1926                       "?! restarting over syscall at %#lx %02x %02x %02x %02x\n",
   1927                       (UWord)arch->vex.guest_CIA, p[0], p[1], p[2], p[3]);
   1928 
   1929       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
   1930    }
   1931 
   1932 #elif defined(VGP_x86_darwin)
   1933    arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
   1934 
   1935    /* Make sure our caller is actually sane, and we're really backing
   1936       back over a syscall.
   1937 
   1938       int $0x80 == CD 80
   1939       int $0x81 == CD 81
   1940       int $0x82 == CD 82
   1941       sysenter  == 0F 34
   1942    */
   1943    {
   1944        UChar *p = (UChar *)arch->vex.guest_EIP;
   1945        Bool  ok = (p[0] == 0xCD && p[1] == 0x80)
   1946                   || (p[0] == 0xCD && p[1] == 0x81)
   1947                   || (p[0] == 0xCD && p[1] == 0x82)
   1948                   || (p[0] == 0x0F && p[1] == 0x34);
   1949        if (!ok)
   1950            VG_(message)(Vg_DebugMsg,
   1951                         "?! restarting over syscall at %#x %02x %02x\n",
   1952                         arch->vex.guest_EIP, p[0], p[1]);
   1953        vg_assert(ok);
   1954    }
   1955 
   1956 #elif defined(VGP_amd64_darwin)
   1957    // DDD: #warning GrP fixme amd64 restart unimplemented
   1958    vg_assert(0);
   1959 
   1960 #else
   1961 #  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
   1962 #endif
   1963 }
   1964 
   1965 /*
   1966    Fix up the guest state when a syscall is interrupted by a signal
   1967    and so has been forced to return 'sysret'.
   1968 
   1969    To do this, we determine the precise state of the syscall by
   1970    looking at the (real) IP at the time the signal happened.  The
   1971    syscall sequence looks like:
   1972 
   1973      1. unblock signals
   1974      2. perform syscall
   1975      3. save result to guest state (EAX, RAX, R3+CR0.SO)
   1976      4. re-block signals
   1977 
   1978    If a signal
   1979    happens at      Then     Why?
   1980    [1-2)           restart  nothing has happened (restart syscall)
   1981    [2]             restart  syscall hasn't started, or kernel wants to restart
   1982    [2-3)           save     syscall complete, but results not saved
   1983    [3-4)           syscall complete, results saved
   1984 
   1985    Sometimes we never want to restart an interrupted syscall (because
   1986    sigaction says not to), so we only restart if "restart" is True.
   1987 
   1988    This will also call VG_(post_syscall) if the syscall has actually
   1989    completed (either because it was interrupted, or because it
   1990    actually finished).  It will not call VG_(post_syscall) if the
   1991    syscall is set up for restart, which means that the pre-wrapper may
   1992    get called multiple times.
   1993 */
   1994 
   1995 void
   1996 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
   1997                                                   Addr     ip,
   1998                                                   SysRes   sres,
   1999                                                   Bool     restart)
   2000 {
   2001    /* Note that we don't know the syscall number here, since (1) in
   2002       general there's no reliable way to get hold of it short of
   2003       stashing it in the guest state before the syscall, and (2) in
   2004       any case we don't need to know it for the actions done by this
   2005       routine.
   2006 
   2007       Furthermore, 'sres' is only used in the case where the syscall
   2008       is complete, but the result has not been committed to the guest
   2009       state yet.  In any other situation it will be meaningless and
   2010       therefore ignored. */
   2011 
   2012    ThreadState*     tst;
   2013    SyscallStatus    canonical;
   2014    ThreadArchState* th_regs;
   2015    SyscallInfo*     sci;
   2016 
   2017    /* Compute some Booleans indicating which range we're in. */
   2018    Bool outside_range,
   2019         in_setup_to_restart,      // [1,2) in the .S files
   2020         at_restart,               // [2]   in the .S files
   2021         in_complete_to_committed, // [3,4) in the .S files
   2022         in_committed_to_finished; // [4,5) in the .S files
   2023 
   2024 #  if defined(VGO_linux) || defined(VGO_aix5)
   2025    outside_range
   2026       = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
   2027    in_setup_to_restart
   2028       = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
   2029    at_restart
   2030       = ip == ML_(blksys_restart);
   2031    in_complete_to_committed
   2032       = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
   2033    in_committed_to_finished
   2034       = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
   2035 #  elif defined(VGO_darwin)
   2036    outside_range
   2037       =  (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
   2038       && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
   2039       && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
   2040    in_setup_to_restart
   2041       =  (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
   2042       || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
   2043       || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
   2044    at_restart
   2045       =  (ip == ML_(blksys_restart_MACH))
   2046       || (ip == ML_(blksys_restart_MDEP))
   2047       || (ip == ML_(blksys_restart_UNIX));
   2048    in_complete_to_committed
   2049       =  (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
   2050       || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
   2051       || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
   2052    in_committed_to_finished
   2053       =  (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
   2054       || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
   2055       || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
   2056    /* Wasn't that just So Much Fun?  Does your head hurt yet?  Mine does. */
   2057 #  else
   2058 #    error "Unknown OS"
   2059 #  endif
   2060 
   2061    if (VG_(clo_trace_signals))
   2062       VG_(message)( Vg_DebugMsg,
   2063                     "interrupted_syscall: tid=%d, ip=0x%llx, "
   2064                     "restart=%s, sres.isErr=%s, sres.val=%lld\n",
   2065                     (Int)tid,
   2066                     (ULong)ip,
   2067                     restart ? "True" : "False",
   2068                     sr_isError(sres) ? "True" : "False",
   2069                     (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
   2070 
   2071    vg_assert(VG_(is_valid_tid)(tid));
   2072    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   2073    vg_assert(VG_(is_running_thread)(tid));
   2074 
   2075    tst     = VG_(get_ThreadState)(tid);
   2076    th_regs = &tst->arch;
   2077    sci     = & syscallInfo[tid];
   2078 
   2079    /* Figure out what the state of the syscall was by examining the
   2080       (real) IP at the time of the signal, and act accordingly. */
   2081    if (outside_range) {
   2082       if (VG_(clo_trace_signals))
   2083          VG_(message)( Vg_DebugMsg,
   2084                        "  not in syscall at all: hmm, very suspicious\n" );
   2085       /* Looks like we weren't in a syscall at all.  Hmm. */
   2086       vg_assert(sci->status.what != SsIdle);
   2087       return;
   2088    }
   2089 
   2090    /* We should not be here unless this thread had first started up
   2091       the machinery for a syscall by calling VG_(client_syscall).
   2092       Hence: */
   2093    vg_assert(sci->status.what != SsIdle);
   2094 
   2095    /* now, do one of four fixup actions, depending on where the IP has
   2096       got to. */
   2097 
   2098    if (in_setup_to_restart) {
   2099       /* syscall hasn't even started; go around again */
   2100       if (VG_(clo_trace_signals))
   2101          VG_(message)( Vg_DebugMsg, "  not started: restarting\n");
   2102       vg_assert(sci->status.what == SsHandToKernel);
   2103       ML_(fixup_guest_state_to_restart_syscall)(th_regs);
   2104    }
   2105 
   2106    else
   2107    if (at_restart) {
   2108       /* We're either about to run the syscall, or it was interrupted
   2109          and the kernel restarted it.  Restart if asked, otherwise
   2110          EINTR it. */
   2111       if (restart) {
   2112          if (VG_(clo_trace_signals))
   2113             VG_(message)( Vg_DebugMsg, "  at syscall instr: restarting\n");
   2114          ML_(fixup_guest_state_to_restart_syscall)(th_regs);
   2115       } else {
   2116          if (VG_(clo_trace_signals))
   2117             VG_(message)( Vg_DebugMsg, "  at syscall instr: returning EINTR\n");
   2118          canonical = convert_SysRes_to_SyscallStatus(
   2119                         VG_(mk_SysRes_Error)( VKI_EINTR )
   2120                      );
   2121          if (!(sci->flags & SfNoWriteResult))
   2122             putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
   2123          sci->status = canonical;
   2124          VG_(post_syscall)(tid);
   2125       }
   2126    }
   2127 
   2128    else
   2129    if (in_complete_to_committed) {
   2130       /* Syscall complete, but result hasn't been written back yet.
   2131          Write the SysRes we were supplied with back to the guest
   2132          state. */
   2133       if (VG_(clo_trace_signals))
   2134          VG_(message)( Vg_DebugMsg,
   2135                        "  completed, but uncommitted: committing\n");
   2136       canonical = convert_SysRes_to_SyscallStatus( sres );
   2137       if (!(sci->flags & SfNoWriteResult))
   2138          putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
   2139       sci->status = canonical;
   2140       VG_(post_syscall)(tid);
   2141    }
   2142 
   2143    else
   2144    if (in_committed_to_finished) {
   2145       /* Result committed, but the signal mask has not been restored;
   2146          we expect our caller (the signal handler) will have fixed
   2147          this up. */
   2148       if (VG_(clo_trace_signals))
   2149          VG_(message)( Vg_DebugMsg,
   2150                        "  completed and committed: nothing to do\n");
   2151       getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
   2152       vg_assert(sci->status.what == SsComplete);
   2153       VG_(post_syscall)(tid);
   2154    }
   2155 
   2156    else
   2157       VG_(core_panic)("?? strange syscall interrupt state?");
   2158 
   2159    /* In all cases, the syscall is now finished (even if we called
   2160       ML_(fixup_guest_state_to_restart_syscall), since that just
   2161       re-positions the guest's IP for another go at it).  So we need
   2162       to record that fact. */
   2163    sci->status.what = SsIdle;
   2164 }
   2165 
   2166 
   2167 #if defined(VGO_darwin)
   2168 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
   2169 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
   2170 // This longjmps back to the scheduler.
   2171 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
   2172 {
   2173    ThreadState*     tst;
   2174    SyscallInfo*     sci;
   2175 
   2176    VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
   2177 
   2178    PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n",
   2179          VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
   2180 
   2181    vg_assert(VG_(is_valid_tid)(tid));
   2182    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   2183    vg_assert(VG_(is_running_thread)(tid));
   2184 
   2185    tst     = VG_(get_ThreadState)(tid);
   2186    sci     = & syscallInfo[tid];
   2187    vg_assert(sci->status.what != SsIdle);
   2188    vg_assert(tst->os_state.wq_jmpbuf_valid);  // check this BEFORE post_syscall
   2189 
   2190    // Pretend the syscall completed normally, but don't touch the thread state.
   2191    sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
   2192    sci->flags |= SfNoWriteResult;
   2193    VG_(post_syscall)(tid);
   2194 
   2195    sci->status.what = SsIdle;
   2196 
   2197    vg_assert(tst->sched_jmpbuf_valid);
   2198    __builtin_longjmp(tst->sched_jmpbuf, True);
   2199 
   2200    /* NOTREACHED */
   2201    vg_assert(0);
   2202 }
   2203 #endif
   2204 
   2205 
   2206 /* ---------------------------------------------------------------------
   2207    A place to store the where-to-call-when-really-done pointer
   2208    ------------------------------------------------------------------ */
   2209 
   2210 // When the final thread is done, where shall I call to shutdown the
   2211 // system cleanly?  Is set once at startup (in m_main) and never
   2212 // changes after that.  Is basically a pointer to the exit
   2213 // continuation.  This is all just a nasty hack to avoid calling
   2214 // directly from m_syswrap to m_main at exit, since that would cause
   2215 // m_main to become part of a module cycle, which is silly.
   2216 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
   2217        (ThreadId,VgSchedReturnCode)
   2218    = NULL;
   2219 
   2220 /*--------------------------------------------------------------------*/
   2221 /*--- end                                                          ---*/
   2222 /*--------------------------------------------------------------------*/
   2223