Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Handle system calls.                          syswrap-main.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2011 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "libvex_guest_offsets.h"
     32 #include "libvex_trc_values.h"
     33 #include "pub_core_basics.h"
     34 #include "pub_core_aspacemgr.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_vkiscnums.h"
     37 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
     38 #include "pub_core_threadstate.h"
     39 #include "pub_core_libcbase.h"
     40 #include "pub_core_libcassert.h"
     41 #include "pub_core_libcprint.h"
     42 #include "pub_core_libcproc.h"      // For VG_(getpid)()
     43 #include "pub_core_libcsignal.h"
     44 #include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
     45                                     //   and VG_(vg_yield)
     46 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     47 #include "pub_core_tooliface.h"
     48 #include "pub_core_options.h"
     49 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
     50 #include "pub_core_syscall.h"
     51 #include "pub_core_machine.h"
     52 #include "pub_core_syswrap.h"
     53 
     54 #include "priv_types_n_macros.h"
     55 #include "priv_syswrap-main.h"
     56 
     57 #if defined(VGO_darwin)
     58 #include "priv_syswrap-darwin.h"
     59 #endif
     60 
     61 /* Useful info which needs to be recorded somewhere:
     62    Use of registers in syscalls is:
     63 
     64           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
     65    LINUX:
     66    x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
     67    amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
     68    ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
     69    ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
     70    arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
     71 
     72    On s390x the svc instruction is used for system calls. The system call
     73    number is encoded in the instruction (8 bit immediate field). Since Linux
     74    2.6 it is also allowed to use svc 0 with the system call number in r1.
     75    This was introduced for system calls >255, but works for all. It is
     76    also possible to see the svc 0 together with an EXecute instruction, that
     77    fills in the immediate field.
     78    s390x r1/SVC r2   r3   r4   r5   r6   r7   n/a  n/a  r2        (== ARG1)
     79 
     80    DARWIN:
     81    x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
     82    amd64  rax rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
     83 
     84    For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
     85    amd64-darwin.  Apparently 0(%esp) is some kind of return address
     86    (perhaps for syscalls done with "sysenter"?)  I don't think it is
     87    relevant for syscalls done with "int $0x80/1/2".
     88 */
     89 
     90 /* This is the top level of the system-call handler module.  All
     91    system calls are channelled through here, doing two things:
     92 
     93    * notify the tool of the events (mem/reg reads, writes) happening
     94 
     95    * perform the syscall, usually by passing it along to the kernel
     96      unmodified.
     97 
     98    A magical piece of assembly code, do_syscall_for_client_WRK, in
     99    syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
    100    kernel, whilst having the simulator retain control.
    101 */
    102 
    103 /* The main function is VG_(client_syscall).  The simulation calls it
    104    whenever a client thread wants to do a syscall.  The following is a
    105    sketch of what it does.
    106 
    107    * Ensures the root thread's stack is suitably mapped.  Tedious and
    108      arcane.  See big big comment in VG_(client_syscall).
    109 
    110    * First, it rounds up the syscall number and args (which is a
    111      platform dependent activity) and puts them in a struct ("args")
    112      and also a copy in "orig_args".
    113 
    114      The pre/post wrappers refer to these structs and so no longer
    115      need magic macros to access any specific registers.  This struct
    116      is stored in thread-specific storage.
    117 
    118 
    119    * The pre-wrapper is called, passing it a pointer to struct
    120      "args".
    121 
    122 
    123    * The pre-wrapper examines the args and pokes the tool
    124      appropriately.  It may modify the args; this is why "orig_args"
    125      is also stored.
    126 
    127      The pre-wrapper may choose to 'do' the syscall itself, and
    128      concludes one of three outcomes:
    129 
    130        Success(N)    -- syscall is already complete, with success;
    131                         result is N
    132 
    133        Fail(N)       -- syscall is already complete, with failure;
    134                         error code is N
    135 
    136        HandToKernel  -- (the usual case): this needs to be given to
    137                         the kernel to be done, using the values in
    138                         the possibly-modified "args" struct.
    139 
    140      In addition, the pre-wrapper may set some flags:
    141 
    142        MayBlock   -- only applicable when outcome==HandToKernel
    143 
    144        PostOnFail -- only applicable when outcome==HandToKernel or Fail
    145 
    146 
    147    * If the pre-outcome is HandToKernel, the syscall is duly handed
    148      off to the kernel (perhaps involving some thread switchery, but
    149      that's not important).  This reduces the possible set of outcomes
    150      to either Success(N) or Fail(N).
    151 
    152 
    153    * The outcome (Success(N) or Fail(N)) is written back to the guest
    154      register(s).  This is platform specific:
    155 
    156      x86:    Success(N) ==>  eax = N
    157              Fail(N)    ==>  eax = -N
    158 
    159      ditto amd64
    160 
    161      ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
    162              Fail(N) ==>     r3 = N, CR0.SO = 1
    163 
    164      Darwin:
    165      x86:    Success(N) ==>  edx:eax = N, cc = 0
    166              Fail(N)    ==>  edx:eax = N, cc = 1
    167 
    168      s390x:  Success(N) ==>  r2 = N
    169              Fail(N)    ==>  r2 = -N
    170 
    171    * The post wrapper is called if:
    172 
    173      - it exists, and
    174      - outcome==Success or (outcome==Fail and PostOnFail is set)
    175 
    176      The post wrapper is passed the adulterated syscall args (struct
    177      "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
    178 
    179    There are several other complications, primarily to do with
    180    syscalls getting interrupted, explained in comments in the code.
    181 */
    182 
    183 /* CAVEATS for writing wrappers.  It is important to follow these!
    184 
    185    The macros defined in priv_types_n_macros.h are designed to help
    186    decouple the wrapper logic from the actual representation of
    187    syscall args/results, since these wrappers are designed to work on
    188    multiple platforms.
    189 
    190    Sometimes a PRE wrapper will complete the syscall itself, without
    191    handing it to the kernel.  It will use one of SET_STATUS_Success,
    192    SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
    193    value.  It is critical to appreciate that use of the macro does not
    194    immediately cause the underlying guest state to be updated -- that
    195    is done by the driver logic in this file, when the wrapper returns.
    196 
    197    As a result, PRE wrappers of the following form will malfunction:
    198 
    199    PRE(fooble)
    200    {
    201       ... do stuff ...
    202       SET_STATUS_Somehow(...)
    203 
    204       // do something that assumes guest state is up to date
    205    }
    206 
    207    In particular, direct or indirect calls to VG_(poll_signals) after
    208    setting STATUS can cause the guest state to be read (in order to
    209    build signal frames).  Do not do this.  If you want a signal poll
    210    after the syscall goes through, do "*flags |= SfPollAfter" and the
    211    driver logic will do it for you.
    212 
    213    -----------
    214 
    215    Another critical requirement following introduction of new address
    216    space manager (JRS, 20050923):
    217 
    218    In a situation where the mappedness of memory has changed, aspacem
    219    should be notified BEFORE the tool.  Hence the following is
    220    correct:
    221 
    222       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
    223       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
    224       if (d)
    225          VG_(discard_translations)(s->start, s->end+1 - s->start);
    226 
    227    whilst this is wrong:
    228 
    229       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
    230       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
    231       if (d)
    232          VG_(discard_translations)(s->start, s->end+1 - s->start);
    233 
    234    The reason is that the tool may itself ask aspacem for more shadow
    235    memory as a result of the VG_TRACK call.  In such a situation it is
    236    critical that aspacem's segment array is up to date -- hence the
    237    need to notify aspacem first.
    238 
    239    -----------
    240 
    241    Also .. take care to call VG_(discard_translations) whenever
    242    memory with execute permissions is unmapped.
    243 */
    244 
    245 
    246 /* ---------------------------------------------------------------------
    247    Do potentially blocking syscall for the client, and mess with
    248    signal masks at the same time.
    249    ------------------------------------------------------------------ */
    250 
    251 /* Perform a syscall on behalf of a client thread, using a specific
    252    signal mask.  On completion, the signal mask is set to restore_mask
    253    (which presumably blocks almost everything).  If a signal happens
    254    during the syscall, the handler should call
    255    VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
    256    thread's context to do the right thing.
    257 
    258    The _WRK function is handwritten assembly, implemented per-platform
    259    in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
    260    properties.  See comments at the top of
    261    VG_(fixup_guest_state_after_syscall_interrupted) below for details.
    262 
    263    This function (these functions) are required to return zero in case
    264    of success (even if the syscall itself failed), and nonzero if the
    265    sigprocmask-swizzling calls failed.  We don't actually care about
    266    the failure values from sigprocmask, although most of the assembly
    267    implementations do attempt to return that, using the convention
    268    0 for success, or 0x8000 | error-code for failure.
    269 */
    270 #if defined(VGO_linux)
    271 extern
    272 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
    273                                       void* guest_state,
    274                                       const vki_sigset_t *syscall_mask,
    275                                       const vki_sigset_t *restore_mask,
    276                                       Word sigsetSzB );
    277 #elif defined(VGO_darwin)
    278 extern
    279 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
    280                                            void* guest_state,
    281                                            const vki_sigset_t *syscall_mask,
    282                                            const vki_sigset_t *restore_mask,
    283                                            Word sigsetSzB ); /* unused */
    284 extern
    285 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
    286                                            void* guest_state,
    287                                            const vki_sigset_t *syscall_mask,
    288                                            const vki_sigset_t *restore_mask,
    289                                            Word sigsetSzB ); /* unused */
    290 extern
    291 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
    292                                            void* guest_state,
    293                                            const vki_sigset_t *syscall_mask,
    294                                            const vki_sigset_t *restore_mask,
    295                                            Word sigsetSzB ); /* unused */
    296 #else
    297 #  error "Unknown OS"
    298 #endif
    299 
    300 
    301 static
    302 void do_syscall_for_client ( Int syscallno,
    303                              ThreadState* tst,
    304                              const vki_sigset_t* syscall_mask )
    305 {
    306    vki_sigset_t saved;
    307    UWord err;
    308 #  if defined(VGO_linux)
    309    err = ML_(do_syscall_for_client_WRK)(
    310             syscallno, &tst->arch.vex,
    311             syscall_mask, &saved, sizeof(vki_sigset_t)
    312          );
    313 #  elif defined(VGO_darwin)
    314    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
    315       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    316          err = ML_(do_syscall_for_client_unix_WRK)(
    317                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    318                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    319                );
    320          break;
    321       case VG_DARWIN_SYSCALL_CLASS_MACH:
    322          err = ML_(do_syscall_for_client_mach_WRK)(
    323                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    324                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    325                );
    326          break;
    327       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    328          err = ML_(do_syscall_for_client_mdep_WRK)(
    329                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    330                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    331                );
    332          break;
    333       default:
    334          vg_assert(0);
    335          /*NOTREACHED*/
    336          break;
    337    }
    338 #  else
    339 #    error "Unknown OS"
    340 #  endif
    341    vg_assert2(
    342       err == 0,
    343       "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
    344       (Int)(err & 0xFFF)
    345    );
    346 }
    347 
    348 
    349 /* ---------------------------------------------------------------------
    350    Impedance matchers and misc helpers
    351    ------------------------------------------------------------------ */
    352 
    353 static
    354 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
    355 {
    356    return a1->sysno == a2->sysno
    357           && a1->arg1 == a2->arg1
    358           && a1->arg2 == a2->arg2
    359           && a1->arg3 == a2->arg3
    360           && a1->arg4 == a2->arg4
    361           && a1->arg5 == a2->arg5
    362           && a1->arg6 == a2->arg6
    363           && a1->arg7 == a2->arg7
    364           && a1->arg8 == a2->arg8;
    365 }
    366 
    367 static
    368 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
    369 {
    370    /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
    371    if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres ))
    372       return True;
    373 #  if defined(VGO_darwin)
    374    /* Darwin-specific debugging guff */
    375    vg_assert(s1->what == s2->what);
    376    VG_(printf)("eq_SyscallStatus:\n");
    377    VG_(printf)("  {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
    378    VG_(printf)("  {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
    379    vg_assert(0);
    380 #  endif
    381    return False;
    382 }
    383 
    384 /* Convert between SysRes and SyscallStatus, to the extent possible. */
    385 
    386 static
    387 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
    388 {
    389    SyscallStatus status;
    390    status.what = SsComplete;
    391    status.sres = res;
    392    return status;
    393 }
    394 
    395 
    396 /* Impedance matchers.  These convert syscall arg or result data from
    397    the platform-specific in-guest-state format to the canonical
    398    formats, and back. */
    399 
    400 static
    401 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
    402                                     /*IN*/ VexGuestArchState* gst_vanilla,
    403                                     /*IN*/ UInt trc )
    404 {
    405 #if defined(VGP_x86_linux)
    406    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    407    canonical->sysno = gst->guest_EAX;
    408    canonical->arg1  = gst->guest_EBX;
    409    canonical->arg2  = gst->guest_ECX;
    410    canonical->arg3  = gst->guest_EDX;
    411    canonical->arg4  = gst->guest_ESI;
    412    canonical->arg5  = gst->guest_EDI;
    413    canonical->arg6  = gst->guest_EBP;
    414    canonical->arg7  = 0;
    415    canonical->arg8  = 0;
    416 
    417 #elif defined(VGP_amd64_linux)
    418    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    419    canonical->sysno = gst->guest_RAX;
    420    canonical->arg1  = gst->guest_RDI;
    421    canonical->arg2  = gst->guest_RSI;
    422    canonical->arg3  = gst->guest_RDX;
    423    canonical->arg4  = gst->guest_R10;
    424    canonical->arg5  = gst->guest_R8;
    425    canonical->arg6  = gst->guest_R9;
    426    canonical->arg7  = 0;
    427    canonical->arg8  = 0;
    428 
    429 #elif defined(VGP_ppc32_linux)
    430    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    431    canonical->sysno = gst->guest_GPR0;
    432    canonical->arg1  = gst->guest_GPR3;
    433    canonical->arg2  = gst->guest_GPR4;
    434    canonical->arg3  = gst->guest_GPR5;
    435    canonical->arg4  = gst->guest_GPR6;
    436    canonical->arg5  = gst->guest_GPR7;
    437    canonical->arg6  = gst->guest_GPR8;
    438    canonical->arg7  = 0;
    439    canonical->arg8  = 0;
    440 
    441 #elif defined(VGP_ppc64_linux)
    442    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    443    canonical->sysno = gst->guest_GPR0;
    444    canonical->arg1  = gst->guest_GPR3;
    445    canonical->arg2  = gst->guest_GPR4;
    446    canonical->arg3  = gst->guest_GPR5;
    447    canonical->arg4  = gst->guest_GPR6;
    448    canonical->arg5  = gst->guest_GPR7;
    449    canonical->arg6  = gst->guest_GPR8;
    450    canonical->arg7  = 0;
    451    canonical->arg8  = 0;
    452 
    453 #elif defined(VGP_arm_linux)
    454    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    455    canonical->sysno = gst->guest_R7;
    456    canonical->arg1  = gst->guest_R0;
    457    canonical->arg2  = gst->guest_R1;
    458    canonical->arg3  = gst->guest_R2;
    459    canonical->arg4  = gst->guest_R3;
    460    canonical->arg5  = gst->guest_R4;
    461    canonical->arg6  = gst->guest_R5;
    462    canonical->arg7  = 0;
    463    canonical->arg8  = 0;
    464 
    465 #elif defined(VGP_x86_darwin)
    466    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    467    UWord *stack = (UWord *)gst->guest_ESP;
    468    // GrP fixme hope syscalls aren't called with really shallow stacks...
    469    canonical->sysno = gst->guest_EAX;
    470    if (canonical->sysno != 0) {
    471       // stack[0] is return address
    472       canonical->arg1  = stack[1];
    473       canonical->arg2  = stack[2];
    474       canonical->arg3  = stack[3];
    475       canonical->arg4  = stack[4];
    476       canonical->arg5  = stack[5];
    477       canonical->arg6  = stack[6];
    478       canonical->arg7  = stack[7];
    479       canonical->arg8  = stack[8];
    480    } else {
    481       // GrP fixme hack handle syscall()
    482       // GrP fixme what about __syscall() ?
    483       // stack[0] is return address
    484       // DDD: the tool can't see that the params have been shifted!  Can
    485       //      lead to incorrect checking, I think, because the PRRAn/PSARn
    486       //      macros will mention the pre-shifted args.
    487       canonical->sysno = stack[1];
    488       vg_assert(canonical->sysno != 0);
    489       canonical->arg1  = stack[2];
    490       canonical->arg2  = stack[3];
    491       canonical->arg3  = stack[4];
    492       canonical->arg4  = stack[5];
    493       canonical->arg5  = stack[6];
    494       canonical->arg6  = stack[7];
    495       canonical->arg7  = stack[8];
    496       canonical->arg8  = stack[9];
    497 
    498       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
    499             VG_(getpid)(), /*tid,*/
    500             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
    501    }
    502 
    503    // Here we determine what kind of syscall it was by looking at the
    504    // interrupt kind, and then encode the syscall number using the 64-bit
    505    // encoding for Valgrind's internal use.
    506    //
    507    // DDD: Would it be better to stash the JMP kind into the Darwin
    508    // thread state rather than passing in the trc?
    509    switch (trc) {
    510    case VEX_TRC_JMP_SYS_INT128:
    511       // int $0x80 = Unix, 64-bit result
    512       vg_assert(canonical->sysno >= 0);
    513       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
    514       break;
    515    case VEX_TRC_JMP_SYS_SYSENTER:
    516       // syscall = Unix, 32-bit result
    517       // OR        Mach, 32-bit result
    518       if (canonical->sysno >= 0) {
    519          // GrP fixme hack:  0xffff == I386_SYSCALL_NUMBER_MASK
    520          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
    521                                                              & 0xffff);
    522       } else {
    523          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
    524       }
    525       break;
    526    case VEX_TRC_JMP_SYS_INT129:
    527       // int $0x81 = Mach, 32-bit result
    528       vg_assert(canonical->sysno < 0);
    529       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
    530       break;
    531    case VEX_TRC_JMP_SYS_INT130:
    532       // int $0x82 = mdep, 32-bit result
    533       vg_assert(canonical->sysno >= 0);
    534       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
    535       break;
    536    default:
    537       vg_assert(0);
    538       break;
    539    }
    540 
    541 #elif defined(VGP_amd64_darwin)
    542    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    543    UWord *stack = (UWord *)gst->guest_RSP;
    544 
    545    vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
    546 
    547    // GrP fixme hope syscalls aren't called with really shallow stacks...
    548    canonical->sysno = gst->guest_RAX;
    549    if (canonical->sysno != __NR_syscall) {
    550       // stack[0] is return address
    551       canonical->arg1  = gst->guest_RDI;
    552       canonical->arg2  = gst->guest_RSI;
    553       canonical->arg3  = gst->guest_RDX;
    554       canonical->arg4  = gst->guest_R10;  // not rcx with syscall insn
    555       canonical->arg5  = gst->guest_R8;
    556       canonical->arg6  = gst->guest_R9;
    557       canonical->arg7  = stack[1];
    558       canonical->arg8  = stack[2];
    559    } else {
    560       // GrP fixme hack handle syscall()
    561       // GrP fixme what about __syscall() ?
    562       // stack[0] is return address
    563       // DDD: the tool can't see that the params have been shifted!  Can
    564       //      lead to incorrect checking, I think, because the PRRAn/PSARn
    565       //      macros will mention the pre-shifted args.
    566       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
    567       vg_assert(canonical->sysno != __NR_syscall);
    568       canonical->arg1  = gst->guest_RSI;
    569       canonical->arg2  = gst->guest_RDX;
    570       canonical->arg3  = gst->guest_R10;  // not rcx with syscall insn
    571       canonical->arg4  = gst->guest_R8;
    572       canonical->arg5  = gst->guest_R9;
    573       canonical->arg6  = stack[1];
    574       canonical->arg7  = stack[2];
    575       canonical->arg8  = stack[3];
    576 
    577       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
    578             VG_(getpid)(), /*tid,*/
    579             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
    580    }
    581 
    582    // no canonical->sysno adjustment needed
    583 
    584 #elif defined(VGP_s390x_linux)
    585    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
    586    canonical->sysno = gst->guest_SYSNO;
    587    canonical->arg1  = gst->guest_r2;
    588    canonical->arg2  = gst->guest_r3;
    589    canonical->arg3  = gst->guest_r4;
    590    canonical->arg4  = gst->guest_r5;
    591    canonical->arg5  = gst->guest_r6;
    592    canonical->arg6  = gst->guest_r7;
    593    canonical->arg7  = 0;
    594    canonical->arg8  = 0;
    595 #else
    596 #  error "getSyscallArgsFromGuestState: unknown arch"
    597 #endif
    598 }
    599 
    600 static
    601 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
    602                                     /*OUT*/VexGuestArchState* gst_vanilla )
    603 {
    604 #if defined(VGP_x86_linux)
    605    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    606    gst->guest_EAX = canonical->sysno;
    607    gst->guest_EBX = canonical->arg1;
    608    gst->guest_ECX = canonical->arg2;
    609    gst->guest_EDX = canonical->arg3;
    610    gst->guest_ESI = canonical->arg4;
    611    gst->guest_EDI = canonical->arg5;
    612    gst->guest_EBP = canonical->arg6;
    613 
    614 #elif defined(VGP_amd64_linux)
    615    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    616    gst->guest_RAX = canonical->sysno;
    617    gst->guest_RDI = canonical->arg1;
    618    gst->guest_RSI = canonical->arg2;
    619    gst->guest_RDX = canonical->arg3;
    620    gst->guest_R10 = canonical->arg4;
    621    gst->guest_R8  = canonical->arg5;
    622    gst->guest_R9  = canonical->arg6;
    623 
    624 #elif defined(VGP_ppc32_linux)
    625    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    626    gst->guest_GPR0 = canonical->sysno;
    627    gst->guest_GPR3 = canonical->arg1;
    628    gst->guest_GPR4 = canonical->arg2;
    629    gst->guest_GPR5 = canonical->arg3;
    630    gst->guest_GPR6 = canonical->arg4;
    631    gst->guest_GPR7 = canonical->arg5;
    632    gst->guest_GPR8 = canonical->arg6;
    633 
    634 #elif defined(VGP_ppc64_linux)
    635    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    636    gst->guest_GPR0 = canonical->sysno;
    637    gst->guest_GPR3 = canonical->arg1;
    638    gst->guest_GPR4 = canonical->arg2;
    639    gst->guest_GPR5 = canonical->arg3;
    640    gst->guest_GPR6 = canonical->arg4;
    641    gst->guest_GPR7 = canonical->arg5;
    642    gst->guest_GPR8 = canonical->arg6;
    643 
    644 #elif defined(VGP_arm_linux)
    645    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    646    gst->guest_R7 = canonical->sysno;
    647    gst->guest_R0 = canonical->arg1;
    648    gst->guest_R1 = canonical->arg2;
    649    gst->guest_R2 = canonical->arg3;
    650    gst->guest_R3 = canonical->arg4;
    651    gst->guest_R4 = canonical->arg5;
    652    gst->guest_R5 = canonical->arg6;
    653 
    654 #elif defined(VGP_x86_darwin)
    655    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    656    UWord *stack = (UWord *)gst->guest_ESP;
    657 
    658    gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
    659 
    660    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
    661    // stack[0] is return address
    662    stack[1] = canonical->arg1;
    663    stack[2] = canonical->arg2;
    664    stack[3] = canonical->arg3;
    665    stack[4] = canonical->arg4;
    666    stack[5] = canonical->arg5;
    667    stack[6] = canonical->arg6;
    668    stack[7] = canonical->arg7;
    669    stack[8] = canonical->arg8;
    670 
    671 #elif defined(VGP_amd64_darwin)
    672    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    673    UWord *stack = (UWord *)gst->guest_RSP;
    674 
    675    gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
    676    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
    677 
    678    // stack[0] is return address
    679    gst->guest_RDI = canonical->arg1;
    680    gst->guest_RSI = canonical->arg2;
    681    gst->guest_RDX = canonical->arg3;
    682    gst->guest_RCX = canonical->arg4;
    683    gst->guest_R8  = canonical->arg5;
    684    gst->guest_R9  = canonical->arg6;
    685    stack[1]       = canonical->arg7;
    686    stack[2]       = canonical->arg8;
    687 
    688 #elif defined(VGP_s390x_linux)
    689    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
    690    gst->guest_SYSNO  = canonical->sysno;
    691    gst->guest_r2     = canonical->arg1;
    692    gst->guest_r3     = canonical->arg2;
    693    gst->guest_r4     = canonical->arg3;
    694    gst->guest_r5     = canonical->arg4;
    695    gst->guest_r6     = canonical->arg5;
    696    gst->guest_r7     = canonical->arg6;
    697 
    698 #else
    699 #  error "putSyscallArgsIntoGuestState: unknown arch"
    700 #endif
    701 }
    702 
    703 static
    704 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
    705                                       /*IN*/ VexGuestArchState* gst_vanilla )
    706 {
    707 #  if defined(VGP_x86_linux)
    708    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    709    canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
    710    canonical->what = SsComplete;
    711 
    712 #  elif defined(VGP_amd64_linux)
    713    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    714    canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
    715    canonical->what = SsComplete;
    716 
    717 #  elif defined(VGP_ppc32_linux)
    718    VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
    719    UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
    720    UInt                cr0so = (cr >> 28) & 1;
    721    canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
    722    canonical->what = SsComplete;
    723 
    724 #  elif defined(VGP_ppc64_linux)
    725    VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
    726    UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
    727    UInt                cr0so = (cr >> 28) & 1;
    728    canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
    729    canonical->what = SsComplete;
    730 
    731 #  elif defined(VGP_arm_linux)
    732    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    733    canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
    734    canonical->what = SsComplete;
    735 
    736 #  elif defined(VGP_x86_darwin)
    737    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
    738    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    739    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
    740    UInt err = 0;
    741    UInt wLO = 0;
    742    UInt wHI = 0;
    743    switch (gst->guest_SC_CLASS) {
    744       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    745          // int $0x80 = Unix, 64-bit result
    746          err = carry;
    747          wLO = gst->guest_EAX;
    748          wHI = gst->guest_EDX;
    749          break;
    750       case VG_DARWIN_SYSCALL_CLASS_MACH:
    751          // int $0x81 = Mach, 32-bit result
    752          wLO = gst->guest_EAX;
    753          break;
    754       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    755          // int $0x82 = mdep, 32-bit result
    756          wLO = gst->guest_EAX;
    757          break;
    758       default:
    759          vg_assert(0);
    760          break;
    761    }
    762    canonical->sres = VG_(mk_SysRes_x86_darwin)(
    763                         gst->guest_SC_CLASS, err ? True : False,
    764                         wHI, wLO
    765                      );
    766    canonical->what = SsComplete;
    767 
    768 #  elif defined(VGP_amd64_darwin)
    769    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
    770    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    771    ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
    772    ULong err = 0;
    773    ULong wLO = 0;
    774    ULong wHI = 0;
    775    switch (gst->guest_SC_CLASS) {
    776       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    777          // syscall = Unix, 128-bit result
    778          err = carry;
    779          wLO = gst->guest_RAX;
    780          wHI = gst->guest_RDX;
    781          break;
    782       case VG_DARWIN_SYSCALL_CLASS_MACH:
    783          // syscall = Mach, 64-bit result
    784          wLO = gst->guest_RAX;
    785          break;
    786       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    787          // syscall = mdep, 64-bit result
    788          wLO = gst->guest_RAX;
    789          break;
    790       default:
    791          vg_assert(0);
    792          break;
    793    }
    794    canonical->sres = VG_(mk_SysRes_amd64_darwin)(
    795                         gst->guest_SC_CLASS, err ? True : False,
    796                         wHI, wLO
    797                      );
    798    canonical->what = SsComplete;
    799 
    800 #  elif defined(VGP_s390x_linux)
    801    VexGuestS390XState* gst   = (VexGuestS390XState*)gst_vanilla;
    802    canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
    803    canonical->what = SsComplete;
    804 
    805 #  else
    806 #    error "getSyscallStatusFromGuestState: unknown arch"
    807 #  endif
    808 }
    809 
    810 static
    811 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
    812                                       /*IN*/ SyscallStatus*     canonical,
    813                                       /*OUT*/VexGuestArchState* gst_vanilla )
    814 {
    815 #  if defined(VGP_x86_linux)
    816    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    817    vg_assert(canonical->what == SsComplete);
    818    if (sr_isError(canonical->sres)) {
    819       /* This isn't exactly right, in that really a Failure with res
    820          not in the range 1 .. 4095 is unrepresentable in the
    821          Linux-x86 scheme.  Oh well. */
    822       gst->guest_EAX = - (Int)sr_Err(canonical->sres);
    823    } else {
    824       gst->guest_EAX = sr_Res(canonical->sres);
    825    }
    826    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    827              OFFSET_x86_EAX, sizeof(UWord) );
    828 
    829 #  elif defined(VGP_amd64_linux)
    830    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    831    vg_assert(canonical->what == SsComplete);
    832    if (sr_isError(canonical->sres)) {
    833       /* This isn't exactly right, in that really a Failure with res
    834          not in the range 1 .. 4095 is unrepresentable in the
    835          Linux-amd64 scheme.  Oh well. */
    836       gst->guest_RAX = - (Long)sr_Err(canonical->sres);
    837    } else {
    838       gst->guest_RAX = sr_Res(canonical->sres);
    839    }
    840    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    841              OFFSET_amd64_RAX, sizeof(UWord) );
    842 
    843 #  elif defined(VGP_ppc32_linux)
    844    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    845    UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
    846    vg_assert(canonical->what == SsComplete);
    847    if (sr_isError(canonical->sres)) {
    848       /* set CR0.SO */
    849       LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
    850       gst->guest_GPR3 = sr_Err(canonical->sres);
    851    } else {
    852       /* clear CR0.SO */
    853       LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
    854       gst->guest_GPR3 = sr_Res(canonical->sres);
    855    }
    856    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    857              OFFSET_ppc32_GPR3, sizeof(UWord) );
    858    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    859              OFFSET_ppc32_CR0_0, sizeof(UChar) );
    860 
    861 #  elif defined(VGP_ppc64_linux)
    862    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    863    UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
    864    vg_assert(canonical->what == SsComplete);
    865    if (sr_isError(canonical->sres)) {
    866       /* set CR0.SO */
    867       LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
    868       gst->guest_GPR3 = sr_Err(canonical->sres);
    869    } else {
    870       /* clear CR0.SO */
    871       LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
    872       gst->guest_GPR3 = sr_Res(canonical->sres);
    873    }
    874    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    875              OFFSET_ppc64_GPR3, sizeof(UWord) );
    876    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    877              OFFSET_ppc64_CR0_0, sizeof(UChar) );
    878 
    879 #  elif defined(VGP_arm_linux)
    880    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    881    vg_assert(canonical->what == SsComplete);
    882    if (sr_isError(canonical->sres)) {
    883       /* This isn't exactly right, in that really a Failure with res
    884          not in the range 1 .. 4095 is unrepresentable in the
    885          Linux-arm scheme.  Oh well. */
    886       gst->guest_R0 = - (Int)sr_Err(canonical->sres);
    887    } else {
    888       gst->guest_R0 = sr_Res(canonical->sres);
    889    }
    890    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    891              OFFSET_arm_R0, sizeof(UWord) );
    892 
    893 #elif defined(VGP_x86_darwin)
    894    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    895    SysRes sres = canonical->sres;
    896    vg_assert(canonical->what == SsComplete);
    897    /* Unfortunately here we have to break abstraction and look
    898       directly inside 'res', in order to decide what to do. */
    899    switch (sres._mode) {
    900       case SysRes_MACH: // int $0x81 = Mach, 32-bit result
    901       case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
    902          gst->guest_EAX = sres._wLO;
    903          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    904                    OFFSET_x86_EAX, sizeof(UInt) );
    905          break;
    906       case SysRes_UNIX_OK:  // int $0x80 = Unix, 64-bit result
    907       case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
    908          gst->guest_EAX = sres._wLO;
    909          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    910                    OFFSET_x86_EAX, sizeof(UInt) );
    911          gst->guest_EDX = sres._wHI;
    912          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    913                    OFFSET_x86_EDX, sizeof(UInt) );
    914          LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
    915                                       gst );
    916          // GrP fixme sets defined for entire eflags, not just bit c
    917          // DDD: this breaks exp-ptrcheck.
    918          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    919                    offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
    920          break;
    921       default:
    922          vg_assert(0);
    923          break;
    924    }
    925 
    926 #elif defined(VGP_amd64_darwin)
    927    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    928    SysRes sres = canonical->sres;
    929    vg_assert(canonical->what == SsComplete);
    930    /* Unfortunately here we have to break abstraction and look
    931       directly inside 'res', in order to decide what to do. */
    932    switch (sres._mode) {
    933       case SysRes_MACH: // syscall = Mach, 64-bit result
    934       case SysRes_MDEP: // syscall = mdep, 64-bit result
    935          gst->guest_RAX = sres._wLO;
    936          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    937                    OFFSET_amd64_RAX, sizeof(ULong) );
    938          break;
    939       case SysRes_UNIX_OK:  // syscall = Unix, 128-bit result
    940       case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
    941          gst->guest_RAX = sres._wLO;
    942          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    943                    OFFSET_amd64_RAX, sizeof(ULong) );
    944          gst->guest_RDX = sres._wHI;
    945          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    946                    OFFSET_amd64_RDX, sizeof(ULong) );
    947          LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
    948                                         gst );
    949          // GrP fixme sets defined for entire rflags, not just bit c
    950          // DDD: this breaks exp-ptrcheck.
    951          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    952                    offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
    953          break;
    954       default:
    955          vg_assert(0);
    956          break;
    957    }
    958 
    959 #  elif defined(VGP_s390x_linux)
    960    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
    961    vg_assert(canonical->what == SsComplete);
    962    if (sr_isError(canonical->sres)) {
    963       gst->guest_r2 = - (Long)sr_Err(canonical->sres);
    964    } else {
    965       gst->guest_r2 = sr_Res(canonical->sres);
    966    }
    967 
    968 #  else
    969 #    error "putSyscallStatusIntoGuestState: unknown arch"
    970 #  endif
    971 }
    972 
    973 
    974 /* Tell me the offsets in the guest state of the syscall params, so
    975    that the scalar argument checkers don't have to have this info
    976    hardwired. */
    977 
    978 static
    979 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
    980 {
    981 #if defined(VGP_x86_linux)
    982    layout->o_sysno  = OFFSET_x86_EAX;
    983    layout->o_arg1   = OFFSET_x86_EBX;
    984    layout->o_arg2   = OFFSET_x86_ECX;
    985    layout->o_arg3   = OFFSET_x86_EDX;
    986    layout->o_arg4   = OFFSET_x86_ESI;
    987    layout->o_arg5   = OFFSET_x86_EDI;
    988    layout->o_arg6   = OFFSET_x86_EBP;
    989    layout->uu_arg7  = -1; /* impossible value */
    990    layout->uu_arg8  = -1; /* impossible value */
    991 
    992 #elif defined(VGP_amd64_linux)
    993    layout->o_sysno  = OFFSET_amd64_RAX;
    994    layout->o_arg1   = OFFSET_amd64_RDI;
    995    layout->o_arg2   = OFFSET_amd64_RSI;
    996    layout->o_arg3   = OFFSET_amd64_RDX;
    997    layout->o_arg4   = OFFSET_amd64_R10;
    998    layout->o_arg5   = OFFSET_amd64_R8;
    999    layout->o_arg6   = OFFSET_amd64_R9;
   1000    layout->uu_arg7  = -1; /* impossible value */
   1001    layout->uu_arg8  = -1; /* impossible value */
   1002 
   1003 #elif defined(VGP_ppc32_linux)
   1004    layout->o_sysno  = OFFSET_ppc32_GPR0;
   1005    layout->o_arg1   = OFFSET_ppc32_GPR3;
   1006    layout->o_arg2   = OFFSET_ppc32_GPR4;
   1007    layout->o_arg3   = OFFSET_ppc32_GPR5;
   1008    layout->o_arg4   = OFFSET_ppc32_GPR6;
   1009    layout->o_arg5   = OFFSET_ppc32_GPR7;
   1010    layout->o_arg6   = OFFSET_ppc32_GPR8;
   1011    layout->uu_arg7  = -1; /* impossible value */
   1012    layout->uu_arg8  = -1; /* impossible value */
   1013 
   1014 #elif defined(VGP_ppc64_linux)
   1015    layout->o_sysno  = OFFSET_ppc64_GPR0;
   1016    layout->o_arg1   = OFFSET_ppc64_GPR3;
   1017    layout->o_arg2   = OFFSET_ppc64_GPR4;
   1018    layout->o_arg3   = OFFSET_ppc64_GPR5;
   1019    layout->o_arg4   = OFFSET_ppc64_GPR6;
   1020    layout->o_arg5   = OFFSET_ppc64_GPR7;
   1021    layout->o_arg6   = OFFSET_ppc64_GPR8;
   1022    layout->uu_arg7  = -1; /* impossible value */
   1023    layout->uu_arg8  = -1; /* impossible value */
   1024 
   1025 #elif defined(VGP_arm_linux)
   1026    layout->o_sysno  = OFFSET_arm_R7;
   1027    layout->o_arg1   = OFFSET_arm_R0;
   1028    layout->o_arg2   = OFFSET_arm_R1;
   1029    layout->o_arg3   = OFFSET_arm_R2;
   1030    layout->o_arg4   = OFFSET_arm_R3;
   1031    layout->o_arg5   = OFFSET_arm_R4;
   1032    layout->o_arg6   = OFFSET_arm_R5;
   1033    layout->uu_arg7  = -1; /* impossible value */
   1034    layout->uu_arg8  = -1; /* impossible value */
   1035 
   1036 #elif defined(VGP_x86_darwin)
   1037    layout->o_sysno  = OFFSET_x86_EAX;
   1038    // syscall parameters are on stack in C convention
   1039    layout->s_arg1   = sizeof(UWord) * 1;
   1040    layout->s_arg2   = sizeof(UWord) * 2;
   1041    layout->s_arg3   = sizeof(UWord) * 3;
   1042    layout->s_arg4   = sizeof(UWord) * 4;
   1043    layout->s_arg5   = sizeof(UWord) * 5;
   1044    layout->s_arg6   = sizeof(UWord) * 6;
   1045    layout->s_arg7   = sizeof(UWord) * 7;
   1046    layout->s_arg8   = sizeof(UWord) * 8;
   1047 
   1048 #elif defined(VGP_amd64_darwin)
   1049    layout->o_sysno  = OFFSET_amd64_RAX;
   1050    layout->o_arg1   = OFFSET_amd64_RDI;
   1051    layout->o_arg2   = OFFSET_amd64_RSI;
   1052    layout->o_arg3   = OFFSET_amd64_RDX;
   1053    layout->o_arg4   = OFFSET_amd64_RCX;
   1054    layout->o_arg5   = OFFSET_amd64_R8;
   1055    layout->o_arg6   = OFFSET_amd64_R9;
   1056    layout->s_arg7   = sizeof(UWord) * 1;
   1057    layout->s_arg8   = sizeof(UWord) * 2;
   1058 
   1059 #elif defined(VGP_s390x_linux)
   1060    layout->o_sysno  = OFFSET_s390x_SYSNO;
   1061    layout->o_arg1   = OFFSET_s390x_r2;
   1062    layout->o_arg2   = OFFSET_s390x_r3;
   1063    layout->o_arg3   = OFFSET_s390x_r4;
   1064    layout->o_arg4   = OFFSET_s390x_r5;
   1065    layout->o_arg5   = OFFSET_s390x_r6;
   1066    layout->o_arg6   = OFFSET_s390x_r7;
   1067    layout->uu_arg7  = -1; /* impossible value */
   1068    layout->uu_arg8  = -1; /* impossible value */
   1069 #else
   1070 #  error "getSyscallLayout: unknown arch"
   1071 #endif
   1072 }
   1073 
   1074 
   1075 /* ---------------------------------------------------------------------
   1076    The main driver logic
   1077    ------------------------------------------------------------------ */
   1078 
   1079 /* Finding the handlers for a given syscall, or faking up one
   1080    when no handler is found. */
   1081 
   1082 static
   1083 void bad_before ( ThreadId              tid,
   1084                   SyscallArgLayout*     layout,
   1085                   /*MOD*/SyscallArgs*   args,
   1086                   /*OUT*/SyscallStatus* status,
   1087                   /*OUT*/UWord*         flags )
   1088 {
   1089    VG_(dmsg)("WARNING: unhandled syscall: %s\n",
   1090       VG_SYSNUM_STRING_EXTRA(args->sysno));
   1091    if (VG_(clo_verbosity) > 1) {
   1092       VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1093    }
   1094    VG_(dmsg)("You may be able to write your own handler.\n");
   1095    VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
   1096    VG_(dmsg)("Nevertheless we consider this a bug.  Please report\n");
   1097    VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
   1098 
   1099    SET_STATUS_Failure(VKI_ENOSYS);
   1100 }
   1101 
   1102 static SyscallTableEntry bad_sys =
   1103    { bad_before, NULL };
   1104 
   1105 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
   1106 {
   1107    const SyscallTableEntry* sys = NULL;
   1108 
   1109 #  if defined(VGO_linux)
   1110    sys = ML_(get_linux_syscall_entry)( syscallno );
   1111 
   1112 #  elif defined(VGO_darwin)
   1113    Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
   1114 
   1115    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
   1116    case VG_DARWIN_SYSCALL_CLASS_UNIX:
   1117       if (idx >= 0 && idx < ML_(syscall_table_size) &&
   1118           ML_(syscall_table)[idx].before != NULL)
   1119          sys = &ML_(syscall_table)[idx];
   1120          break;
   1121    case VG_DARWIN_SYSCALL_CLASS_MACH:
   1122       if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
   1123           ML_(mach_trap_table)[idx].before != NULL)
   1124          sys = &ML_(mach_trap_table)[idx];
   1125          break;
   1126    case VG_DARWIN_SYSCALL_CLASS_MDEP:
   1127       if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
   1128           ML_(mdep_trap_table)[idx].before != NULL)
   1129          sys = &ML_(mdep_trap_table)[idx];
   1130          break;
   1131    default:
   1132       vg_assert(0);
   1133       break;
   1134    }
   1135 
   1136 #  else
   1137 #    error Unknown OS
   1138 #  endif
   1139 
   1140    return sys == NULL  ? &bad_sys  : sys;
   1141 }
   1142 
   1143 
   1144 /* Add and remove signals from mask so that we end up telling the
   1145    kernel the state we actually want rather than what the client
   1146    wants. */
   1147 static void sanitize_client_sigmask(vki_sigset_t *mask)
   1148 {
   1149    VG_(sigdelset)(mask, VKI_SIGKILL);
   1150    VG_(sigdelset)(mask, VKI_SIGSTOP);
   1151    VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
   1152 }
   1153 
   1154 typedef
   1155    struct {
   1156       SyscallArgs   orig_args;
   1157       SyscallArgs   args;
   1158       SyscallStatus status;
   1159       UWord         flags;
   1160    }
   1161    SyscallInfo;
   1162 
   1163 SyscallInfo syscallInfo[VG_N_THREADS];
   1164 
   1165 
   1166 /* The scheduler needs to be able to zero out these records after a
   1167    fork, hence this is exported from m_syswrap. */
   1168 void VG_(clear_syscallInfo) ( Int tid )
   1169 {
   1170    vg_assert(tid >= 0 && tid < VG_N_THREADS);
   1171    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
   1172    syscallInfo[tid].status.what = SsIdle;
   1173 }
   1174 
   1175 static void ensure_initialised ( void )
   1176 {
   1177    Int i;
   1178    static Bool init_done = False;
   1179    if (init_done)
   1180       return;
   1181    init_done = True;
   1182    for (i = 0; i < VG_N_THREADS; i++) {
   1183       VG_(clear_syscallInfo)( i );
   1184    }
   1185 }
   1186 
   1187 /* --- This is the main function of this file. --- */
   1188 
   1189 void VG_(client_syscall) ( ThreadId tid, UInt trc )
   1190 {
   1191    Word                     sysno;
   1192    ThreadState*             tst;
   1193    const SyscallTableEntry* ent;
   1194    SyscallArgLayout         layout;
   1195    SyscallInfo*             sci;
   1196 
   1197    ensure_initialised();
   1198 
   1199    vg_assert(VG_(is_valid_tid)(tid));
   1200    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   1201    vg_assert(VG_(is_running_thread)(tid));
   1202 
   1203    tst = VG_(get_ThreadState)(tid);
   1204 
   1205    /* BEGIN ensure root thread's stack is suitably mapped */
   1206    /* In some rare circumstances, we may do the syscall without the
   1207       bottom page of the stack being mapped, because the stack pointer
   1208       was moved down just a few instructions before the syscall
   1209       instruction, and there have been no memory references since
   1210       then, that would cause a call to VG_(extend_stack) to have
   1211       happened.
   1212 
   1213       In native execution that's OK: the kernel automagically extends
   1214       the stack's mapped area down to cover the stack pointer (or sp -
   1215       redzone, really).  In simulated normal execution that's OK too,
   1216       since any signals we get from accessing below the mapped area of
   1217       the (guest's) stack lead us to VG_(extend_stack), where we
   1218       simulate the kernel's stack extension logic.  But that leaves
   1219       the problem of entering a syscall with the SP unmapped.  Because
   1220       the kernel doesn't know that the segment immediately above SP is
   1221       supposed to be a grow-down segment, it causes the syscall to
   1222       fail, and thereby causes a divergence between native behaviour
   1223       (syscall succeeds) and simulated behaviour (syscall fails).
   1224 
   1225       This is quite a rare failure mode.  It has only been seen
   1226       affecting calls to sys_readlink on amd64-linux, and even then it
   1227       requires a certain code sequence around the syscall to trigger
   1228       it.  Here is one:
   1229 
   1230       extern int my_readlink ( const char* path );
   1231       asm(
   1232       ".text\n"
   1233       ".globl my_readlink\n"
   1234       "my_readlink:\n"
   1235       "\tsubq    $0x1008,%rsp\n"
   1236       "\tmovq    %rdi,%rdi\n"              // path is in rdi
   1237       "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
   1238       "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
   1239       "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
   1240       "\tsyscall\n"
   1241       "\taddq    $0x1008,%rsp\n"
   1242       "\tret\n"
   1243       ".previous\n"
   1244       );
   1245 
   1246       For more details, see bug #156404
   1247       (https://bugs.kde.org/show_bug.cgi?id=156404).
   1248 
   1249       The fix is actually very simple.  We simply need to call
   1250       VG_(extend_stack) for this thread, handing it the lowest
   1251       possible valid address for stack (sp - redzone), to ensure the
   1252       pages all the way down to that address, are mapped.  Because
   1253       this is a potentially expensive and frequent operation, we
   1254       filter in two ways:
   1255 
   1256       First, only the main thread (tid=1) has a growdown stack.  So
   1257       ignore all others.  It is conceivable, although highly unlikely,
   1258       that the main thread exits, and later another thread is
   1259       allocated tid=1, but that's harmless, I believe;
   1260       VG_(extend_stack) will do nothing when applied to a non-root
   1261       thread.
   1262 
   1263       Secondly, first call VG_(am_find_nsegment) directly, to see if
   1264       the page holding (sp - redzone) is mapped correctly.  If so, do
   1265       nothing.  This is almost always the case.  VG_(extend_stack)
   1266       calls VG_(am_find_nsegment) twice, so this optimisation -- and
   1267       that's all it is -- more or less halves the number of calls to
   1268       VG_(am_find_nsegment) required.
   1269 
   1270       TODO: the test "seg->kind == SkAnonC" is really inadequate,
   1271       because although it tests whether the segment is mapped
   1272       _somehow_, it doesn't check that it has the right permissions
   1273       (r,w, maybe x) ?  We could test that here, but it will also be
   1274       necessary to fix the corresponding test in VG_(extend_stack).
   1275 
   1276       All this guff is of course Linux-specific.  Hence the ifdef.
   1277    */
   1278 #  if defined(VGO_linux)
   1279    if (tid == 1/*ROOT THREAD*/) {
   1280       Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
   1281       NSegment const* seg = VG_(am_find_nsegment)(stackMin);
   1282       if (seg && seg->kind == SkAnonC) {
   1283          /* stackMin is already mapped.  Nothing to do. */
   1284       } else {
   1285          (void)VG_(extend_stack)( stackMin,
   1286                                   tst->client_stack_szB );
   1287       }
   1288    }
   1289 #  endif
   1290    /* END ensure root thread's stack is suitably mapped */
   1291 
   1292    /* First off, get the syscall args and number.  This is a
   1293       platform-dependent action. */
   1294 
   1295    sci = & syscallInfo[tid];
   1296    vg_assert(sci->status.what == SsIdle);
   1297 
   1298    getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
   1299 
   1300    /* Copy .orig_args to .args.  The pre-handler may modify .args, but
   1301       we want to keep the originals too, just in case. */
   1302    sci->args = sci->orig_args;
   1303 
   1304    /* Save the syscall number in the thread state in case the syscall
   1305       is interrupted by a signal. */
   1306    sysno = sci->orig_args.sysno;
   1307 
   1308    /* It's sometimes useful, as a crude debugging hack, to get a
   1309       stack trace at each (or selected) syscalls. */
   1310    if (0 && sysno == __NR_ioctl) {
   1311       VG_(umsg)("\nioctl:\n");
   1312       VG_(get_and_pp_StackTrace)(tid, 10);
   1313       VG_(umsg)("\n");
   1314    }
   1315 
   1316 #  if defined(VGO_darwin)
   1317    /* Record syscall class.  But why?  Because the syscall might be
   1318       interrupted by a signal, and in the signal handler (which will
   1319       be m_signals.async_signalhandler) we will need to build a SysRes
   1320       reflecting the syscall return result.  In order to do that we
   1321       need to know the syscall class.  Hence stash it in the guest
   1322       state of this thread.  This madness is not needed on Linux
   1323       because it only has a single syscall return convention and so
   1324       there is no ambiguity involved in converting the post-signal
   1325       machine state into a SysRes. */
   1326    tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
   1327 #  endif
   1328 
   1329    /* The default what-to-do-next thing is hand the syscall to the
   1330       kernel, so we pre-set that here.  Set .sres to something
   1331       harmless looking (is irrelevant because .what is not
   1332       SsComplete.) */
   1333    sci->status.what = SsHandToKernel;
   1334    sci->status.sres = VG_(mk_SysRes_Error)(0);
   1335    sci->flags       = 0;
   1336 
   1337    /* Fetch the syscall's handlers.  If no handlers exist for this
   1338       syscall, we are given dummy handlers which force an immediate
   1339       return with ENOSYS. */
   1340    ent = get_syscall_entry(sysno);
   1341 
   1342    /* Fetch the layout information, which tells us where in the guest
   1343       state the syscall args reside.  This is a platform-dependent
   1344       action.  This info is needed so that the scalar syscall argument
   1345       checks (PRE_REG_READ calls) know which bits of the guest state
   1346       they need to inspect. */
   1347    getSyscallArgLayout( &layout );
   1348 
   1349    /* Make sure the tmp signal mask matches the real signal mask;
   1350       sigsuspend may change this. */
   1351    vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
   1352 
   1353    /* Right, we're finally ready to Party.  Call the pre-handler and
   1354       see what we get back.  At this point:
   1355 
   1356         sci->status.what  is Unset (we don't know yet).
   1357         sci->orig_args    contains the original args.
   1358         sci->args         is the same as sci->orig_args.
   1359         sci->flags        is zero.
   1360    */
   1361 
   1362    PRINT("SYSCALL[%d,%d](%s) ",
   1363       VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
   1364 
   1365    /* Do any pre-syscall actions */
   1366    if (VG_(needs).syscall_wrapper) {
   1367       UWord tmpv[8];
   1368       tmpv[0] = sci->orig_args.arg1;
   1369       tmpv[1] = sci->orig_args.arg2;
   1370       tmpv[2] = sci->orig_args.arg3;
   1371       tmpv[3] = sci->orig_args.arg4;
   1372       tmpv[4] = sci->orig_args.arg5;
   1373       tmpv[5] = sci->orig_args.arg6;
   1374       tmpv[6] = sci->orig_args.arg7;
   1375       tmpv[7] = sci->orig_args.arg8;
   1376       VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
   1377                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
   1378    }
   1379 
   1380    vg_assert(ent);
   1381    vg_assert(ent->before);
   1382    (ent->before)( tid,
   1383                   &layout,
   1384                   &sci->args, &sci->status, &sci->flags );
   1385 
   1386    /* The pre-handler may have modified:
   1387          sci->args
   1388          sci->status
   1389          sci->flags
   1390       All else remains unchanged.
   1391       Although the args may be modified, pre handlers are not allowed
   1392       to change the syscall number.
   1393    */
   1394    /* Now we proceed according to what the pre-handler decided. */
   1395    vg_assert(sci->status.what == SsHandToKernel
   1396              || sci->status.what == SsComplete);
   1397    vg_assert(sci->args.sysno == sci->orig_args.sysno);
   1398 
   1399    if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
   1400       /* The pre-handler completed the syscall itself, declaring
   1401          success. */
   1402       if (sci->flags & SfNoWriteResult) {
   1403          PRINT(" --> [pre-success] NoWriteResult");
   1404       } else {
   1405          PRINT(" --> [pre-success] Success(0x%llx:0x%llx)",
   1406                (ULong)sr_ResHI(sci->status.sres),
   1407                (ULong)sr_Res(sci->status.sres));
   1408       }
   1409       /* In this case the allowable flags are to ask for a signal-poll
   1410          and/or a yield after the call.  Changing the args isn't
   1411          allowed. */
   1412       vg_assert(0 == (sci->flags
   1413                       & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
   1414       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1415    }
   1416 
   1417    else
   1418    if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
   1419       /* The pre-handler decided to fail syscall itself. */
   1420       PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres));
   1421       /* In this case, the pre-handler is also allowed to ask for the
   1422          post-handler to be run anyway.  Changing the args is not
   1423          allowed. */
   1424       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
   1425       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1426    }
   1427 
   1428    else
   1429    if (sci->status.what != SsHandToKernel) {
   1430       /* huh?! */
   1431       vg_assert(0);
   1432    }
   1433 
   1434    else /* (sci->status.what == HandToKernel) */ {
   1435       /* Ok, this is the usual case -- and the complicated one.  There
   1436          are two subcases: sync and async.  async is the general case
   1437          and is to be used when there is any possibility that the
   1438          syscall might block [a fact that the pre-handler must tell us
   1439          via the sci->flags field.]  Because the tidying-away /
   1440          context-switch overhead of the async case could be large, if
   1441          we are sure that the syscall will not block, we fast-track it
   1442          by doing it directly in this thread, which is a lot
   1443          simpler. */
   1444 
   1445       /* Check that the given flags are allowable: MayBlock, PollAfter
   1446          and PostOnFail are ok. */
   1447       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
   1448 
   1449       if (sci->flags & SfMayBlock) {
   1450 
   1451          /* Syscall may block, so run it asynchronously */
   1452          vki_sigset_t mask;
   1453 
   1454          PRINT(" --> [async] ... \n");
   1455 
   1456          mask = tst->sig_mask;
   1457          sanitize_client_sigmask(&mask);
   1458 
   1459          /* Gack.  More impedance matching.  Copy the possibly
   1460             modified syscall args back into the guest state. */
   1461          /* JRS 2009-Mar-16: if the syscall args are possibly modified,
   1462             then this assertion is senseless:
   1463               vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1464             The case that exposed it was sys_posix_spawn on Darwin,
   1465             which heavily modifies its arguments but then lets the call
   1466             go through anyway, with SfToBlock set, hence we end up here. */
   1467          putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
   1468 
   1469          /* Drop the bigLock */
   1470          VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
   1471          /* Urr.  We're now in a race against other threads trying to
   1472             acquire the bigLock.  I guess that doesn't matter provided
   1473             that do_syscall_for_client only touches thread-local
   1474             state. */
   1475 
   1476          /* Do the call, which operates directly on the guest state,
   1477             not on our abstracted copies of the args/result. */
   1478          do_syscall_for_client(sysno, tst, &mask);
   1479 
   1480          /* do_syscall_for_client may not return if the syscall was
   1481             interrupted by a signal.  In that case, flow of control is
   1482             first to m_signals.async_sighandler, which calls
   1483             VG_(fixup_guest_state_after_syscall_interrupted), which
   1484             fixes up the guest state, and possibly calls
   1485             VG_(post_syscall).  Once that's done, control drops back
   1486             to the scheduler.  */
   1487 
   1488          /* Darwin: do_syscall_for_client may not return if the
   1489             syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
   1490             responded by starting the thread at wqthread_hijack(reuse=1)
   1491             (to run another workqueue item). In that case, wqthread_hijack
   1492             calls ML_(wqthread_continue), which is similar to
   1493             VG_(fixup_guest_state_after_syscall_interrupted). */
   1494 
   1495          /* Reacquire the lock */
   1496          VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
   1497 
   1498          /* Even more impedance matching.  Extract the syscall status
   1499             from the guest state. */
   1500          getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
   1501          vg_assert(sci->status.what == SsComplete);
   1502 
   1503          /* Be decorative, if required. */
   1504          if (VG_(clo_trace_syscalls)) {
   1505             Bool failed = sr_isError(sci->status.sres);
   1506             if (failed) {
   1507                PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)",
   1508                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
   1509                      (ULong)sr_Err(sci->status.sres));
   1510             } else {
   1511                PRINT("SYSCALL[%d,%d](%s) ... [async] --> "
   1512                      "Success(0x%llx:0x%llx)",
   1513                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
   1514                      (ULong)sr_ResHI(sci->status.sres),
   1515                      (ULong)sr_Res(sci->status.sres) );
   1516             }
   1517          }
   1518 
   1519       } else {
   1520 
   1521          /* run the syscall directly */
   1522          /* The pre-handler may have modified the syscall args, but
   1523             since we're passing values in ->args directly to the
   1524             kernel, there's no point in flushing them back to the
   1525             guest state.  Indeed doing so could be construed as
   1526             incorrect. */
   1527          SysRes sres
   1528             = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
   1529                                      sci->args.arg3, sci->args.arg4,
   1530                                      sci->args.arg5, sci->args.arg6,
   1531                                      sci->args.arg7, sci->args.arg8 );
   1532          sci->status = convert_SysRes_to_SyscallStatus(sres);
   1533 
   1534          /* Be decorative, if required. */
   1535          if (VG_(clo_trace_syscalls)) {
   1536             Bool failed = sr_isError(sci->status.sres);
   1537             if (failed) {
   1538                PRINT("[sync] --> Failure(0x%llx)",
   1539                      (ULong)sr_Err(sci->status.sres) );
   1540             } else {
   1541                PRINT("[sync] --> Success(0x%llx:0x%llx)",
   1542                      (ULong)sr_ResHI(sci->status.sres),
   1543                      (ULong)sr_Res(sci->status.sres) );
   1544             }
   1545          }
   1546       }
   1547    }
   1548 
   1549    vg_assert(sci->status.what == SsComplete);
   1550 
   1551    vg_assert(VG_(is_running_thread)(tid));
   1552 
   1553    /* Dump the syscall result back in the guest state.  This is
   1554       a platform-specific action. */
   1555    if (!(sci->flags & SfNoWriteResult))
   1556       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
   1557 
   1558    /* Situation now:
   1559       - the guest state is now correctly modified following the syscall
   1560       - modified args, original args and syscall status are still
   1561         available in the syscallInfo[] entry for this syscall.
   1562 
   1563       Now go on to do the post-syscall actions (read on down ..)
   1564    */
   1565    PRINT(" ");
   1566    VG_(post_syscall)(tid);
   1567    PRINT("\n");
   1568 }
   1569 
   1570 
   1571 /* Perform post syscall actions.  The expected state on entry is
   1572    precisely as at the end of VG_(client_syscall), that is:
   1573 
   1574    - guest state up to date following the syscall
   1575    - modified args, original args and syscall status are still
   1576      available in the syscallInfo[] entry for this syscall.
   1577    - syscall status matches what's in the guest state.
   1578 
   1579    There are two ways to get here: the normal way -- being called by
   1580    VG_(client_syscall), and the unusual way, from
   1581    VG_(fixup_guest_state_after_syscall_interrupted).
   1582    Darwin: there's a third way, ML_(wqthread_continue).
   1583 */
   1584 void VG_(post_syscall) (ThreadId tid)
   1585 {
   1586    SyscallInfo*             sci;
   1587    const SyscallTableEntry* ent;
   1588    SyscallStatus            test_status;
   1589    ThreadState*             tst;
   1590    Word sysno;
   1591 
   1592    /* Preliminaries */
   1593    vg_assert(VG_(is_valid_tid)(tid));
   1594    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   1595    vg_assert(VG_(is_running_thread)(tid));
   1596 
   1597    tst = VG_(get_ThreadState)(tid);
   1598    sci = & syscallInfo[tid];
   1599 
   1600    /* m_signals.sigvgkill_handler might call here even when not in
   1601       a syscall. */
   1602    if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
   1603       sci->status.what = SsIdle;
   1604       return;
   1605    }
   1606 
   1607    /* Validate current syscallInfo entry.  In particular we require
   1608       that the current .status matches what's actually in the guest
   1609       state.  At least in the normal case where we have actually
   1610       previously written the result into the guest state. */
   1611    vg_assert(sci->status.what == SsComplete);
   1612 
   1613    getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
   1614    if (!(sci->flags & SfNoWriteResult))
   1615       vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
   1616    /* Failure of the above assertion on Darwin can indicate a problem
   1617       in the syscall wrappers that pre-fail or pre-succeed the
   1618       syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
   1619       when they really should call SET_STATUS_from_SysRes.  The former
   1620       create a UNIX-class syscall result on Darwin, which may not be
   1621       correct for the syscall; if that's the case then this assertion
   1622       fires.  See PRE(thread_fast_set_cthread_self) for an example.  On
   1623       non-Darwin platforms this assertion is should never fail, and this
   1624       comment is completely irrelevant. */
   1625    /* Ok, looks sane */
   1626 
   1627    /* Get the system call number.  Because the pre-handler isn't
   1628       allowed to mess with it, it should be the same for both the
   1629       original and potentially-modified args. */
   1630    vg_assert(sci->args.sysno == sci->orig_args.sysno);
   1631    sysno = sci->args.sysno;
   1632    ent = get_syscall_entry(sysno);
   1633 
   1634    /* pre: status == Complete (asserted above) */
   1635    /* Consider either success or failure.  Now run the post handler if:
   1636       - it exists, and
   1637       - Success or (Failure and PostOnFail is set)
   1638    */
   1639    if (ent->after
   1640        && ((!sr_isError(sci->status.sres))
   1641            || (sr_isError(sci->status.sres)
   1642                && (sci->flags & SfPostOnFail) ))) {
   1643 
   1644       (ent->after)( tid, &sci->args, &sci->status );
   1645    }
   1646 
   1647    /* Because the post handler might have changed the status (eg, the
   1648       post-handler for sys_open can change the result from success to
   1649       failure if the kernel supplied a fd that it doesn't like), once
   1650       again dump the syscall result back in the guest state.*/
   1651    if (!(sci->flags & SfNoWriteResult))
   1652       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
   1653 
   1654    /* Do any post-syscall actions required by the tool. */
   1655    if (VG_(needs).syscall_wrapper) {
   1656       UWord tmpv[8];
   1657       tmpv[0] = sci->orig_args.arg1;
   1658       tmpv[1] = sci->orig_args.arg2;
   1659       tmpv[2] = sci->orig_args.arg3;
   1660       tmpv[3] = sci->orig_args.arg4;
   1661       tmpv[4] = sci->orig_args.arg5;
   1662       tmpv[5] = sci->orig_args.arg6;
   1663       tmpv[6] = sci->orig_args.arg7;
   1664       tmpv[7] = sci->orig_args.arg8;
   1665       VG_TDICT_CALL(tool_post_syscall, tid,
   1666                     sysno,
   1667                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
   1668                     sci->status.sres);
   1669    }
   1670 
   1671    /* The syscall is done. */
   1672    vg_assert(sci->status.what == SsComplete);
   1673    sci->status.what = SsIdle;
   1674 
   1675    /* The pre/post wrappers may have concluded that pending signals
   1676       might have been created, and will have set SfPollAfter to
   1677       request a poll for them once the syscall is done. */
   1678    if (sci->flags & SfPollAfter)
   1679       VG_(poll_signals)(tid);
   1680 
   1681    /* Similarly, the wrappers might have asked for a yield
   1682       afterwards. */
   1683    if (sci->flags & SfYieldAfter)
   1684       VG_(vg_yield)();
   1685 }
   1686 
   1687 
   1688 /* ---------------------------------------------------------------------
   1689    Dealing with syscalls which get interrupted by a signal:
   1690    VG_(fixup_guest_state_after_syscall_interrupted)
   1691    ------------------------------------------------------------------ */
   1692 
   1693 /* Syscalls done on behalf of the client are finally handed off to the
   1694    kernel in VG_(client_syscall) above, either by calling
   1695    do_syscall_for_client (the async case), or by calling
   1696    VG_(do_syscall6) (the sync case).
   1697 
   1698    If the syscall is not interrupted by a signal (it may block and
   1699    later unblock, but that's irrelevant here) then those functions
   1700    eventually return and so control is passed to VG_(post_syscall).
   1701    NB: not sure if the sync case can actually get interrupted, as it
   1702    operates with all signals masked.
   1703 
   1704    However, the syscall may get interrupted by an async-signal.  In
   1705    that case do_syscall_for_client/VG_(do_syscall6) do not
   1706    return.  Instead we wind up in m_signals.async_sighandler.  We need
   1707    to fix up the guest state to make it look like the syscall was
   1708    interrupted for guest.  So async_sighandler calls here, and this
   1709    does the fixup.  Note that from here we wind up calling
   1710    VG_(post_syscall) too.
   1711 */
   1712 
   1713 
   1714 /* These are addresses within ML_(do_syscall_for_client_WRK).  See
   1715    syscall-$PLAT.S for details.
   1716 */
   1717 #if defined(VGO_linux)
   1718   extern const Addr ML_(blksys_setup);
   1719   extern const Addr ML_(blksys_restart);
   1720   extern const Addr ML_(blksys_complete);
   1721   extern const Addr ML_(blksys_committed);
   1722   extern const Addr ML_(blksys_finished);
   1723 #elif defined(VGO_darwin)
   1724   /* Darwin requires extra uglyness */
   1725   extern const Addr ML_(blksys_setup_MACH);
   1726   extern const Addr ML_(blksys_restart_MACH);
   1727   extern const Addr ML_(blksys_complete_MACH);
   1728   extern const Addr ML_(blksys_committed_MACH);
   1729   extern const Addr ML_(blksys_finished_MACH);
   1730   extern const Addr ML_(blksys_setup_MDEP);
   1731   extern const Addr ML_(blksys_restart_MDEP);
   1732   extern const Addr ML_(blksys_complete_MDEP);
   1733   extern const Addr ML_(blksys_committed_MDEP);
   1734   extern const Addr ML_(blksys_finished_MDEP);
   1735   extern const Addr ML_(blksys_setup_UNIX);
   1736   extern const Addr ML_(blksys_restart_UNIX);
   1737   extern const Addr ML_(blksys_complete_UNIX);
   1738   extern const Addr ML_(blksys_committed_UNIX);
   1739   extern const Addr ML_(blksys_finished_UNIX);
   1740 #else
   1741 # error "Unknown OS"
   1742 #endif
   1743 
   1744 
   1745 /* Back up guest state to restart a system call. */
   1746 
   1747 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
   1748 {
   1749 #if defined(VGP_x86_linux)
   1750    arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
   1751 
   1752    /* Make sure our caller is actually sane, and we're really backing
   1753       back over a syscall.
   1754 
   1755       int $0x80 == CD 80
   1756    */
   1757    {
   1758       UChar *p = (UChar *)arch->vex.guest_EIP;
   1759 
   1760       if (p[0] != 0xcd || p[1] != 0x80)
   1761          VG_(message)(Vg_DebugMsg,
   1762                       "?! restarting over syscall at %#x %02x %02x\n",
   1763                       arch->vex.guest_EIP, p[0], p[1]);
   1764 
   1765       vg_assert(p[0] == 0xcd && p[1] == 0x80);
   1766    }
   1767 
   1768 #elif defined(VGP_amd64_linux)
   1769    arch->vex.guest_RIP -= 2;             // sizeof(syscall)
   1770 
   1771    /* Make sure our caller is actually sane, and we're really backing
   1772       back over a syscall.
   1773 
   1774       syscall == 0F 05
   1775    */
   1776    {
   1777       UChar *p = (UChar *)arch->vex.guest_RIP;
   1778 
   1779       if (p[0] != 0x0F || p[1] != 0x05)
   1780          VG_(message)(Vg_DebugMsg,
   1781                       "?! restarting over syscall at %#llx %02x %02x\n",
   1782                       arch->vex.guest_RIP, p[0], p[1]);
   1783 
   1784       vg_assert(p[0] == 0x0F && p[1] == 0x05);
   1785    }
   1786 
   1787 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
   1788    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
   1789 
   1790    /* Make sure our caller is actually sane, and we're really backing
   1791       back over a syscall.
   1792 
   1793       sc == 44 00 00 02
   1794    */
   1795    {
   1796       UChar *p = (UChar *)arch->vex.guest_CIA;
   1797 
   1798       if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
   1799          VG_(message)(Vg_DebugMsg,
   1800                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
   1801                       arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
   1802 
   1803       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
   1804    }
   1805 
   1806 #elif defined(VGP_arm_linux)
   1807    if (arch->vex.guest_R15T & 1) {
   1808       // Thumb mode.  SVC is a encoded as
   1809       //   1101 1111 imm8
   1810       // where imm8 is the SVC number, and we only accept 0.
   1811       arch->vex.guest_R15T -= 2;   // sizeof(thumb 16 bit insn)
   1812       UChar* p     = (UChar*)(arch->vex.guest_R15T - 1);
   1813       Bool   valid = p[0] == 0 && p[1] == 0xDF;
   1814       if (!valid) {
   1815          VG_(message)(Vg_DebugMsg,
   1816                       "?! restarting over (Thumb) syscall that is not syscall "
   1817                       "at %#llx %02x %02x\n",
   1818                       arch->vex.guest_R15T - 1ULL, p[0], p[1]);
   1819       }
   1820       vg_assert(valid);
   1821       // FIXME: NOTE, this really isn't right.  We need to back up
   1822       // ITSTATE to what it was before the SVC instruction, but we
   1823       // don't know what it was.  At least assert that it is now
   1824       // zero, because if it is nonzero then it must also have
   1825       // been nonzero for the SVC itself, which means it was
   1826       // conditional.  Urk.
   1827       vg_assert(arch->vex.guest_ITSTATE == 0);
   1828    } else {
   1829       // ARM mode.  SVC is encoded as
   1830       //   cond 1111 imm24
   1831       // where imm24 is the SVC number, and we only accept 0.
   1832       arch->vex.guest_R15T -= 4;   // sizeof(arm instr)
   1833       UChar* p     = (UChar*)arch->vex.guest_R15T;
   1834       Bool   valid = p[0] == 0 && p[1] == 0 && p[2] == 0
   1835                      && (p[3] & 0xF) == 0xF;
   1836       if (!valid) {
   1837          VG_(message)(Vg_DebugMsg,
   1838                       "?! restarting over (ARM) syscall that is not syscall "
   1839                       "at %#llx %02x %02x %02x %02x\n",
   1840                       arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]);
   1841       }
   1842       vg_assert(valid);
   1843    }
   1844 
   1845 #elif defined(VGP_x86_darwin)
   1846    arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
   1847 
   1848    /* Make sure our caller is actually sane, and we're really backing
   1849       back over a syscall.
   1850 
   1851       int $0x80 == CD 80
   1852       int $0x81 == CD 81
   1853       int $0x82 == CD 82
   1854       sysenter  == 0F 34
   1855    */
   1856    {
   1857        UChar *p = (UChar *)arch->vex.guest_EIP;
   1858        Bool  ok = (p[0] == 0xCD && p[1] == 0x80)
   1859                   || (p[0] == 0xCD && p[1] == 0x81)
   1860                   || (p[0] == 0xCD && p[1] == 0x82)
   1861                   || (p[0] == 0x0F && p[1] == 0x34);
   1862        if (!ok)
   1863            VG_(message)(Vg_DebugMsg,
   1864                         "?! restarting over syscall at %#x %02x %02x\n",
   1865                         arch->vex.guest_EIP, p[0], p[1]);
   1866        vg_assert(ok);
   1867    }
   1868 
   1869 #elif defined(VGP_amd64_darwin)
   1870    // DDD: #warning GrP fixme amd64 restart unimplemented
   1871    vg_assert(0);
   1872 
   1873 #elif defined(VGP_s390x_linux)
   1874    arch->vex.guest_IA -= 2;             // sizeof(syscall)
   1875 
   1876    /* Make sure our caller is actually sane, and we're really backing
   1877       back over a syscall.
   1878 
   1879       syscall == 0A <num>
   1880    */
   1881    {
   1882       UChar *p = (UChar *)arch->vex.guest_IA;
   1883       if (p[0] != 0x0A)
   1884          VG_(message)(Vg_DebugMsg,
   1885                       "?! restarting over syscall at %#llx %02x %02x\n",
   1886                       arch->vex.guest_IA, p[0], p[1]);
   1887 
   1888       vg_assert(p[0] == 0x0A);
   1889    }
   1890 #else
   1891 #  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
   1892 #endif
   1893 }
   1894 
   1895 /*
   1896    Fix up the guest state when a syscall is interrupted by a signal
   1897    and so has been forced to return 'sysret'.
   1898 
   1899    To do this, we determine the precise state of the syscall by
   1900    looking at the (real) IP at the time the signal happened.  The
   1901    syscall sequence looks like:
   1902 
   1903      1. unblock signals
   1904      2. perform syscall
   1905      3. save result to guest state (EAX, RAX, R3+CR0.SO)
   1906      4. re-block signals
   1907 
   1908    If a signal
   1909    happens at      Then     Why?
   1910    [1-2)           restart  nothing has happened (restart syscall)
   1911    [2]             restart  syscall hasn't started, or kernel wants to restart
   1912    [2-3)           save     syscall complete, but results not saved
   1913    [3-4)           syscall complete, results saved
   1914 
   1915    Sometimes we never want to restart an interrupted syscall (because
   1916    sigaction says not to), so we only restart if "restart" is True.
   1917 
   1918    This will also call VG_(post_syscall) if the syscall has actually
   1919    completed (either because it was interrupted, or because it
   1920    actually finished).  It will not call VG_(post_syscall) if the
   1921    syscall is set up for restart, which means that the pre-wrapper may
   1922    get called multiple times.
   1923 */
   1924 
   1925 void
   1926 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
   1927                                                   Addr     ip,
   1928                                                   SysRes   sres,
   1929                                                   Bool     restart)
   1930 {
   1931    /* Note that we don't know the syscall number here, since (1) in
   1932       general there's no reliable way to get hold of it short of
   1933       stashing it in the guest state before the syscall, and (2) in
   1934       any case we don't need to know it for the actions done by this
   1935       routine.
   1936 
   1937       Furthermore, 'sres' is only used in the case where the syscall
   1938       is complete, but the result has not been committed to the guest
   1939       state yet.  In any other situation it will be meaningless and
   1940       therefore ignored. */
   1941 
   1942    ThreadState*     tst;
   1943    SyscallStatus    canonical;
   1944    ThreadArchState* th_regs;
   1945    SyscallInfo*     sci;
   1946 
   1947    /* Compute some Booleans indicating which range we're in. */
   1948    Bool outside_range,
   1949         in_setup_to_restart,      // [1,2) in the .S files
   1950         at_restart,               // [2]   in the .S files
   1951         in_complete_to_committed, // [3,4) in the .S files
   1952         in_committed_to_finished; // [4,5) in the .S files
   1953 
   1954 #  if defined(VGO_linux)
   1955    outside_range
   1956       = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
   1957    in_setup_to_restart
   1958       = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
   1959    at_restart
   1960       = ip == ML_(blksys_restart);
   1961    in_complete_to_committed
   1962       = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
   1963    in_committed_to_finished
   1964       = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
   1965 #  elif defined(VGO_darwin)
   1966    outside_range
   1967       =  (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
   1968       && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
   1969       && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
   1970    in_setup_to_restart
   1971       =  (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
   1972       || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
   1973       || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
   1974    at_restart
   1975       =  (ip == ML_(blksys_restart_MACH))
   1976       || (ip == ML_(blksys_restart_MDEP))
   1977       || (ip == ML_(blksys_restart_UNIX));
   1978    in_complete_to_committed
   1979       =  (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
   1980       || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
   1981       || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
   1982    in_committed_to_finished
   1983       =  (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
   1984       || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
   1985       || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
   1986    /* Wasn't that just So Much Fun?  Does your head hurt yet?  Mine does. */
   1987 #  else
   1988 #    error "Unknown OS"
   1989 #  endif
   1990 
   1991    if (VG_(clo_trace_signals))
   1992       VG_(message)( Vg_DebugMsg,
   1993                     "interrupted_syscall: tid=%d, ip=0x%llx, "
   1994                     "restart=%s, sres.isErr=%s, sres.val=%lld\n",
   1995                     (Int)tid,
   1996                     (ULong)ip,
   1997                     restart ? "True" : "False",
   1998                     sr_isError(sres) ? "True" : "False",
   1999                     (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
   2000 
   2001    vg_assert(VG_(is_valid_tid)(tid));
   2002    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   2003    vg_assert(VG_(is_running_thread)(tid));
   2004 
   2005    tst     = VG_(get_ThreadState)(tid);
   2006    th_regs = &tst->arch;
   2007    sci     = & syscallInfo[tid];
   2008 
   2009    /* Figure out what the state of the syscall was by examining the
   2010       (real) IP at the time of the signal, and act accordingly. */
   2011    if (outside_range) {
   2012       if (VG_(clo_trace_signals))
   2013          VG_(message)( Vg_DebugMsg,
   2014                        "  not in syscall at all: hmm, very suspicious\n" );
   2015       /* Looks like we weren't in a syscall at all.  Hmm. */
   2016       vg_assert(sci->status.what != SsIdle);
   2017       return;
   2018    }
   2019 
   2020    /* We should not be here unless this thread had first started up
   2021       the machinery for a syscall by calling VG_(client_syscall).
   2022       Hence: */
   2023    vg_assert(sci->status.what != SsIdle);
   2024 
   2025    /* now, do one of four fixup actions, depending on where the IP has
   2026       got to. */
   2027 
   2028    if (in_setup_to_restart) {
   2029       /* syscall hasn't even started; go around again */
   2030       if (VG_(clo_trace_signals))
   2031          VG_(message)( Vg_DebugMsg, "  not started: restarting\n");
   2032       vg_assert(sci->status.what == SsHandToKernel);
   2033       ML_(fixup_guest_state_to_restart_syscall)(th_regs);
   2034    }
   2035 
   2036    else
   2037    if (at_restart) {
   2038       /* We're either about to run the syscall, or it was interrupted
   2039          and the kernel restarted it.  Restart if asked, otherwise
   2040          EINTR it. */
   2041       if (restart) {
   2042          if (VG_(clo_trace_signals))
   2043             VG_(message)( Vg_DebugMsg, "  at syscall instr: restarting\n");
   2044          ML_(fixup_guest_state_to_restart_syscall)(th_regs);
   2045       } else {
   2046          if (VG_(clo_trace_signals))
   2047             VG_(message)( Vg_DebugMsg, "  at syscall instr: returning EINTR\n");
   2048          canonical = convert_SysRes_to_SyscallStatus(
   2049                         VG_(mk_SysRes_Error)( VKI_EINTR )
   2050                      );
   2051          if (!(sci->flags & SfNoWriteResult))
   2052             putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
   2053          sci->status = canonical;
   2054          VG_(post_syscall)(tid);
   2055       }
   2056    }
   2057 
   2058    else
   2059    if (in_complete_to_committed) {
   2060       /* Syscall complete, but result hasn't been written back yet.
   2061          Write the SysRes we were supplied with back to the guest
   2062          state. */
   2063       if (VG_(clo_trace_signals))
   2064          VG_(message)( Vg_DebugMsg,
   2065                        "  completed, but uncommitted: committing\n");
   2066       canonical = convert_SysRes_to_SyscallStatus( sres );
   2067       if (!(sci->flags & SfNoWriteResult))
   2068          putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
   2069       sci->status = canonical;
   2070       VG_(post_syscall)(tid);
   2071    }
   2072 
   2073    else
   2074    if (in_committed_to_finished) {
   2075       /* Result committed, but the signal mask has not been restored;
   2076          we expect our caller (the signal handler) will have fixed
   2077          this up. */
   2078       if (VG_(clo_trace_signals))
   2079          VG_(message)( Vg_DebugMsg,
   2080                        "  completed and committed: nothing to do\n");
   2081       getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
   2082       vg_assert(sci->status.what == SsComplete);
   2083       VG_(post_syscall)(tid);
   2084    }
   2085 
   2086    else
   2087       VG_(core_panic)("?? strange syscall interrupt state?");
   2088 
   2089    /* In all cases, the syscall is now finished (even if we called
   2090       ML_(fixup_guest_state_to_restart_syscall), since that just
   2091       re-positions the guest's IP for another go at it).  So we need
   2092       to record that fact. */
   2093    sci->status.what = SsIdle;
   2094 }
   2095 
   2096 
   2097 #if defined(VGO_darwin)
   2098 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
   2099 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
   2100 // This longjmps back to the scheduler.
   2101 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
   2102 {
   2103    ThreadState*     tst;
   2104    SyscallInfo*     sci;
   2105 
   2106    VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
   2107 
   2108    PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n",
   2109          VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
   2110 
   2111    vg_assert(VG_(is_valid_tid)(tid));
   2112    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   2113    vg_assert(VG_(is_running_thread)(tid));
   2114 
   2115    tst     = VG_(get_ThreadState)(tid);
   2116    sci     = & syscallInfo[tid];
   2117    vg_assert(sci->status.what != SsIdle);
   2118    vg_assert(tst->os_state.wq_jmpbuf_valid);  // check this BEFORE post_syscall
   2119 
   2120    // Pretend the syscall completed normally, but don't touch the thread state.
   2121    sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
   2122    sci->flags |= SfNoWriteResult;
   2123    VG_(post_syscall)(tid);
   2124 
   2125    sci->status.what = SsIdle;
   2126 
   2127    vg_assert(tst->sched_jmpbuf_valid);
   2128    VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
   2129 
   2130    /* NOTREACHED */
   2131    vg_assert(0);
   2132 }
   2133 #endif
   2134 
   2135 
   2136 /* ---------------------------------------------------------------------
   2137    A place to store the where-to-call-when-really-done pointer
   2138    ------------------------------------------------------------------ */
   2139 
   2140 // When the final thread is done, where shall I call to shutdown the
   2141 // system cleanly?  Is set once at startup (in m_main) and never
   2142 // changes after that.  Is basically a pointer to the exit
   2143 // continuation.  This is all just a nasty hack to avoid calling
   2144 // directly from m_syswrap to m_main at exit, since that would cause
   2145 // m_main to become part of a module cycle, which is silly.
   2146 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
   2147        (ThreadId,VgSchedReturnCode)
   2148    = NULL;
   2149 
   2150 /*--------------------------------------------------------------------*/
   2151 /*--- end                                                          ---*/
   2152 /*--------------------------------------------------------------------*/
   2153