Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Handle system calls.                          syswrap-main.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2017 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "libvex_guest_offsets.h"
     32 #include "libvex_trc_values.h"
     33 #include "pub_core_basics.h"
     34 #include "pub_core_aspacemgr.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_vkiscnums.h"
     37 #include "pub_core_threadstate.h"
     38 #include "pub_core_libcbase.h"
     39 #include "pub_core_libcassert.h"
     40 #include "pub_core_libcprint.h"
     41 #include "pub_core_libcproc.h"      // For VG_(getpid)()
     42 #include "pub_core_libcsignal.h"
     43 #include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
     44                                     //   and VG_(vg_yield)
     45 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     46 #include "pub_core_tooliface.h"
     47 #include "pub_core_options.h"
     48 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
     49 #include "pub_core_syscall.h"
     50 #include "pub_core_machine.h"
     51 #include "pub_core_mallocfree.h"
     52 #include "pub_core_syswrap.h"
     53 #include "pub_core_gdbserver.h"     // VG_(gdbserver_report_syscall)
     54 
     55 #include "priv_types_n_macros.h"
     56 #include "priv_syswrap-main.h"
     57 
     58 #if defined(VGO_darwin)
     59 #include "priv_syswrap-darwin.h"
     60 #endif
     61 
     62 /* Useful info which needs to be recorded somewhere:
     63    Use of registers in syscalls is:
     64 
     65           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
     66    LINUX:
     67    x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
     68    amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
     69    ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
     70    ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
     71    arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
     72    mips32 v0    a0   a1   a2   a3 stack stack n/a  n/a  v0        (== NUM)
     73    mips64 v0    a0   a1   a2   a3   a4   a5   a6   a7   v0        (== NUM)
     74    arm64  x8    x0   x1   x2   x3   x4   x5   n/a  n/a  x0 ??     (== ARG1??)
     75 
     76    On s390x the svc instruction is used for system calls. The system call
     77    number is encoded in the instruction (8 bit immediate field). Since Linux
     78    2.6 it is also allowed to use svc 0 with the system call number in r1.
     79    This was introduced for system calls >255, but works for all. It is
     80    also possible to see the svc 0 together with an EXecute instruction, that
     81    fills in the immediate field.
     82    s390x r1/SVC r2   r3   r4   r5   r6   r7   n/a  n/a  r2        (== ARG1)
     83 
     84           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
     85    DARWIN:
     86    x86    eax   +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
     87    amd64  rax   rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
     88 
     89    For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
     90    amd64-darwin.  Apparently 0(%esp) is some kind of return address
     91    (perhaps for syscalls done with "sysenter"?)  I don't think it is
     92    relevant for syscalls done with "int $0x80/1/2".
     93 
     94    SOLARIS:
     95    x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
     96    amd64  rax rdi  rsi  rdx  r10  r8   r9   +8   +16  rdx:rax, rflags.c
     97 
     98    "+N" denotes "in memory at N(%esp)". Solaris also supports fasttrap
     99    syscalls. Fasttraps do not take any parameters (except of the sysno in eax)
    100    and never fail (if the sysno is valid).
    101 */
    102 
    103 /* This is the top level of the system-call handler module.  All
    104    system calls are channelled through here, doing two things:
    105 
    106    * notify the tool of the events (mem/reg reads, writes) happening
    107 
    108    * perform the syscall, usually by passing it along to the kernel
    109      unmodified.
    110 
    111    A magical piece of assembly code, do_syscall_for_client_WRK, in
    112    syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
    113    kernel, whilst having the simulator retain control.
    114 */
    115 
    116 /* The main function is VG_(client_syscall).  The simulation calls it
    117    whenever a client thread wants to do a syscall.  The following is a
    118    sketch of what it does.
    119 
    120    * Ensures the root thread's stack is suitably mapped.  Tedious and
    121      arcane.  See big big comment in VG_(client_syscall).
    122 
    123    * First, it rounds up the syscall number and args (which is a
    124      platform dependent activity) and puts them in a struct ("args")
    125      and also a copy in "orig_args".
    126 
    127      The pre/post wrappers refer to these structs and so no longer
    128      need magic macros to access any specific registers.  This struct
    129      is stored in thread-specific storage.
    130 
    131 
    132    * The pre-wrapper is called, passing it a pointer to struct
    133      "args".
    134 
    135 
    136    * The pre-wrapper examines the args and pokes the tool
    137      appropriately.  It may modify the args; this is why "orig_args"
    138      is also stored.
    139 
    140      The pre-wrapper may choose to 'do' the syscall itself, and
    141      concludes one of three outcomes:
    142 
    143        Success(N)    -- syscall is already complete, with success;
    144                         result is N
    145 
    146        Fail(N)       -- syscall is already complete, with failure;
    147                         error code is N
    148 
    149        HandToKernel  -- (the usual case): this needs to be given to
    150                         the kernel to be done, using the values in
    151                         the possibly-modified "args" struct.
    152 
    153      In addition, the pre-wrapper may set some flags:
    154 
    155        MayBlock   -- only applicable when outcome==HandToKernel
    156 
    157        PostOnFail -- only applicable when outcome==HandToKernel or Fail
    158 
    159 
    160    * If the pre-outcome is HandToKernel, the syscall is duly handed
    161      off to the kernel (perhaps involving some thread switchery, but
    162      that's not important).  This reduces the possible set of outcomes
    163      to either Success(N) or Fail(N).
    164 
    165 
    166    * The outcome (Success(N) or Fail(N)) is written back to the guest
    167      register(s).  This is platform specific:
    168 
    169      x86:    Success(N) ==>  eax = N
    170              Fail(N)    ==>  eax = -N
    171 
    172      ditto amd64
    173 
    174      ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
    175              Fail(N) ==>     r3 = N, CR0.SO = 1
    176 
    177      Darwin:
    178      x86:    Success(N) ==>  edx:eax = N, cc = 0
    179              Fail(N)    ==>  edx:eax = N, cc = 1
    180 
    181      s390x:  Success(N) ==>  r2 = N
    182              Fail(N)    ==>  r2 = -N
    183 
    184      Solaris:
    185      x86:    Success(N) ==>  edx:eax = N, cc = 0
    186              Fail(N)    ==>      eax = N, cc = 1
    187      Same applies for fasttraps except they never fail.
    188 
    189    * The post wrapper is called if:
    190 
    191      - it exists, and
    192      - outcome==Success or (outcome==Fail and PostOnFail is set)
    193 
    194      The post wrapper is passed the adulterated syscall args (struct
    195      "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
    196 
    197    There are several other complications, primarily to do with
    198    syscalls getting interrupted, explained in comments in the code.
    199 */
    200 
    201 /* CAVEATS for writing wrappers.  It is important to follow these!
    202 
    203    The macros defined in priv_types_n_macros.h are designed to help
    204    decouple the wrapper logic from the actual representation of
    205    syscall args/results, since these wrappers are designed to work on
    206    multiple platforms.
    207 
    208    Sometimes a PRE wrapper will complete the syscall itself, without
    209    handing it to the kernel.  It will use one of SET_STATUS_Success,
    210    SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
    211    value.  It is critical to appreciate that use of the macro does not
    212    immediately cause the underlying guest state to be updated -- that
    213    is done by the driver logic in this file, when the wrapper returns.
    214 
    215    As a result, PRE wrappers of the following form will malfunction:
    216 
    217    PRE(fooble)
    218    {
    219       ... do stuff ...
    220       SET_STATUS_Somehow(...)
    221 
    222       // do something that assumes guest state is up to date
    223    }
    224 
    225    In particular, direct or indirect calls to VG_(poll_signals) after
    226    setting STATUS can cause the guest state to be read (in order to
    227    build signal frames).  Do not do this.  If you want a signal poll
    228    after the syscall goes through, do "*flags |= SfPollAfter" and the
    229    driver logic will do it for you.
    230 
    231    -----------
    232 
    233    Another critical requirement following introduction of new address
    234    space manager (JRS, 20050923):
    235 
    236    In a situation where the mappedness of memory has changed, aspacem
    237    should be notified BEFORE the tool.  Hence the following is
    238    correct:
    239 
    240       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
    241       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
    242       if (d)
    243          VG_(discard_translations)(s->start, s->end+1 - s->start);
    244 
    245    whilst this is wrong:
    246 
    247       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
    248       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
    249       if (d)
    250          VG_(discard_translations)(s->start, s->end+1 - s->start);
    251 
    252    The reason is that the tool may itself ask aspacem for more shadow
    253    memory as a result of the VG_TRACK call.  In such a situation it is
    254    critical that aspacem's segment array is up to date -- hence the
    255    need to notify aspacem first.
    256 
    257    -----------
    258 
    259    Also .. take care to call VG_(discard_translations) whenever
    260    memory with execute permissions is unmapped.
    261 */
    262 
    263 
    264 /* ---------------------------------------------------------------------
    265    Do potentially blocking syscall for the client, and mess with
    266    signal masks at the same time.
    267    ------------------------------------------------------------------ */
    268 
    269 /* Perform a syscall on behalf of a client thread, using a specific
    270    signal mask.  On completion, the signal mask is set to restore_mask
    271    (which presumably blocks almost everything).  If a signal happens
    272    during the syscall, the handler should call
    273    VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
    274    thread's context to do the right thing.
    275 
    276    The _WRK function is handwritten assembly, implemented per-platform
    277    in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
    278    properties.  See comments at the top of
    279    VG_(fixup_guest_state_after_syscall_interrupted) below for details.
    280 
    281    This function (these functions) are required to return zero in case
    282    of success (even if the syscall itself failed), and nonzero if the
    283    sigprocmask-swizzling calls failed.  We don't actually care about
    284    the failure values from sigprocmask, although most of the assembly
    285    implementations do attempt to return that, using the convention
    286    0 for success, or 0x8000 | error-code for failure.
    287 */
    288 #if defined(VGO_linux)
    289 extern
    290 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
    291                                       void* guest_state,
    292                                       const vki_sigset_t *syscall_mask,
    293                                       const vki_sigset_t *restore_mask,
    294                                       Word sigsetSzB );
    295 #elif defined(VGO_darwin)
    296 extern
    297 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
    298                                            void* guest_state,
    299                                            const vki_sigset_t *syscall_mask,
    300                                            const vki_sigset_t *restore_mask,
    301                                            Word sigsetSzB ); /* unused */
    302 extern
    303 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
    304                                            void* guest_state,
    305                                            const vki_sigset_t *syscall_mask,
    306                                            const vki_sigset_t *restore_mask,
    307                                            Word sigsetSzB ); /* unused */
    308 extern
    309 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
    310                                            void* guest_state,
    311                                            const vki_sigset_t *syscall_mask,
    312                                            const vki_sigset_t *restore_mask,
    313                                            Word sigsetSzB ); /* unused */
    314 #elif defined(VGO_solaris)
    315 extern
    316 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
    317                                       void* guest_state,
    318                                       const vki_sigset_t *syscall_mask,
    319                                       const vki_sigset_t *restore_mask,
    320                                       UChar *cflag);
    321 UWord ML_(do_syscall_for_client_dret_WRK)( Word syscallno,
    322                                            void* guest_state,
    323                                            const vki_sigset_t *syscall_mask,
    324                                            const vki_sigset_t *restore_mask,
    325                                            UChar *cflag);
    326 #else
    327 #  error "Unknown OS"
    328 #endif
    329 
    330 
    331 static
    332 void do_syscall_for_client ( Int syscallno,
    333                              ThreadState* tst,
    334                              const vki_sigset_t* syscall_mask )
    335 {
    336    vki_sigset_t saved;
    337    UWord err;
    338 #  if defined(VGO_linux)
    339    err = ML_(do_syscall_for_client_WRK)(
    340             syscallno, &tst->arch.vex,
    341             syscall_mask, &saved, sizeof(vki_sigset_t)
    342          );
    343 #  elif defined(VGO_darwin)
    344    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
    345       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    346          err = ML_(do_syscall_for_client_unix_WRK)(
    347                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    348                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    349                );
    350          break;
    351       case VG_DARWIN_SYSCALL_CLASS_MACH:
    352          err = ML_(do_syscall_for_client_mach_WRK)(
    353                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    354                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    355                );
    356          break;
    357       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    358          err = ML_(do_syscall_for_client_mdep_WRK)(
    359                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    360                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    361                );
    362          break;
    363       default:
    364          vg_assert(0);
    365          /*NOTREACHED*/
    366          break;
    367    }
    368 #  elif defined(VGO_solaris)
    369    UChar cflag;
    370 
    371    /* Fasttraps or anything else cannot go through this path. */
    372    vg_assert(VG_SOLARIS_SYSNO_CLASS(syscallno)
    373              == VG_SOLARIS_SYSCALL_CLASS_CLASSIC);
    374 
    375    /* If the syscall is a door_return call then it has to be handled very
    376       differently. */
    377    if (tst->os_state.in_door_return)
    378       err = ML_(do_syscall_for_client_dret_WRK)(
    379                 syscallno, &tst->arch.vex,
    380                 syscall_mask, &saved, &cflag
    381             );
    382    else
    383       err = ML_(do_syscall_for_client_WRK)(
    384                 syscallno, &tst->arch.vex,
    385                 syscall_mask, &saved, &cflag
    386             );
    387 
    388    /* Save the carry flag. */
    389 #  if defined(VGP_x86_solaris)
    390    LibVEX_GuestX86_put_eflag_c(cflag, &tst->arch.vex);
    391 #  elif defined(VGP_amd64_solaris)
    392    LibVEX_GuestAMD64_put_rflag_c(cflag, &tst->arch.vex);
    393 #  else
    394 #    error "Unknown platform"
    395 #  endif
    396 
    397 #  else
    398 #    error "Unknown OS"
    399 #  endif
    400    vg_assert2(
    401       err == 0,
    402       "ML_(do_syscall_for_client_WRK): sigprocmask error %lu",
    403       err & 0xFFF
    404    );
    405 }
    406 
    407 
    408 /* ---------------------------------------------------------------------
    409    Impedance matchers and misc helpers
    410    ------------------------------------------------------------------ */
    411 
    412 static
    413 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
    414 {
    415    return a1->sysno == a2->sysno
    416           && a1->arg1 == a2->arg1
    417           && a1->arg2 == a2->arg2
    418           && a1->arg3 == a2->arg3
    419           && a1->arg4 == a2->arg4
    420           && a1->arg5 == a2->arg5
    421           && a1->arg6 == a2->arg6
    422           && a1->arg7 == a2->arg7
    423           && a1->arg8 == a2->arg8;
    424 }
    425 
    426 static
    427 Bool eq_SyscallStatus ( UInt sysno, SyscallStatus* s1, SyscallStatus* s2 )
    428 {
    429    /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
    430    if (s1->what == s2->what && sr_EQ( sysno, s1->sres, s2->sres ))
    431       return True;
    432 #  if defined(VGO_darwin)
    433    /* Darwin-specific debugging guff */
    434    vg_assert(s1->what == s2->what);
    435    VG_(printf)("eq_SyscallStatus:\n");
    436    VG_(printf)("  {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
    437    VG_(printf)("  {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
    438    vg_assert(0);
    439 #  endif
    440    return False;
    441 }
    442 
    443 /* Convert between SysRes and SyscallStatus, to the extent possible. */
    444 
    445 static
    446 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
    447 {
    448    SyscallStatus status;
    449    status.what = SsComplete;
    450    status.sres = res;
    451    return status;
    452 }
    453 
    454 
    455 /* Impedance matchers.  These convert syscall arg or result data from
    456    the platform-specific in-guest-state format to the canonical
    457    formats, and back. */
    458 
    459 static
    460 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
    461                                     /*IN*/ VexGuestArchState* gst_vanilla,
    462                                     /*IN*/ UInt trc )
    463 {
    464 #if defined(VGP_x86_linux)
    465    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    466    canonical->sysno = gst->guest_EAX;
    467    canonical->arg1  = gst->guest_EBX;
    468    canonical->arg2  = gst->guest_ECX;
    469    canonical->arg3  = gst->guest_EDX;
    470    canonical->arg4  = gst->guest_ESI;
    471    canonical->arg5  = gst->guest_EDI;
    472    canonical->arg6  = gst->guest_EBP;
    473    canonical->arg7  = 0;
    474    canonical->arg8  = 0;
    475 
    476 #elif defined(VGP_amd64_linux)
    477    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    478    canonical->sysno = gst->guest_RAX;
    479    canonical->arg1  = gst->guest_RDI;
    480    canonical->arg2  = gst->guest_RSI;
    481    canonical->arg3  = gst->guest_RDX;
    482    canonical->arg4  = gst->guest_R10;
    483    canonical->arg5  = gst->guest_R8;
    484    canonical->arg6  = gst->guest_R9;
    485    canonical->arg7  = 0;
    486    canonical->arg8  = 0;
    487 
    488 #elif defined(VGP_ppc32_linux)
    489    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    490    canonical->sysno = gst->guest_GPR0;
    491    canonical->arg1  = gst->guest_GPR3;
    492    canonical->arg2  = gst->guest_GPR4;
    493    canonical->arg3  = gst->guest_GPR5;
    494    canonical->arg4  = gst->guest_GPR6;
    495    canonical->arg5  = gst->guest_GPR7;
    496    canonical->arg6  = gst->guest_GPR8;
    497    canonical->arg7  = 0;
    498    canonical->arg8  = 0;
    499 
    500 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
    501    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    502    canonical->sysno = gst->guest_GPR0;
    503    canonical->arg1  = gst->guest_GPR3;
    504    canonical->arg2  = gst->guest_GPR4;
    505    canonical->arg3  = gst->guest_GPR5;
    506    canonical->arg4  = gst->guest_GPR6;
    507    canonical->arg5  = gst->guest_GPR7;
    508    canonical->arg6  = gst->guest_GPR8;
    509    canonical->arg7  = 0;
    510    canonical->arg8  = 0;
    511 
    512 #elif defined(VGP_arm_linux)
    513    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    514    canonical->sysno = gst->guest_R7;
    515    canonical->arg1  = gst->guest_R0;
    516    canonical->arg2  = gst->guest_R1;
    517    canonical->arg3  = gst->guest_R2;
    518    canonical->arg4  = gst->guest_R3;
    519    canonical->arg5  = gst->guest_R4;
    520    canonical->arg6  = gst->guest_R5;
    521    canonical->arg7  = 0;
    522    canonical->arg8  = 0;
    523 
    524 #elif defined(VGP_arm64_linux)
    525    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
    526    canonical->sysno = gst->guest_X8;
    527    canonical->arg1  = gst->guest_X0;
    528    canonical->arg2  = gst->guest_X1;
    529    canonical->arg3  = gst->guest_X2;
    530    canonical->arg4  = gst->guest_X3;
    531    canonical->arg5  = gst->guest_X4;
    532    canonical->arg6  = gst->guest_X5;
    533    canonical->arg7  = 0;
    534    canonical->arg8  = 0;
    535 
    536 #elif defined(VGP_mips32_linux)
    537    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
    538    canonical->sysno = gst->guest_r2;    // v0
    539    if (canonical->sysno == __NR_exit) {
    540       canonical->arg1 = gst->guest_r4;    // a0
    541       canonical->arg2 = 0;
    542       canonical->arg3 = 0;
    543       canonical->arg4 = 0;
    544       canonical->arg5 = 0;
    545       canonical->arg6 = 0;
    546       canonical->arg8 = 0;
    547    } else if (canonical->sysno != __NR_syscall) {
    548       canonical->arg1  = gst->guest_r4;    // a0
    549       canonical->arg2  = gst->guest_r5;    // a1
    550       canonical->arg3  = gst->guest_r6;    // a2
    551       canonical->arg4  = gst->guest_r7;    // a3
    552       canonical->arg5  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP)
    553       canonical->arg6  = *((UInt*) (gst->guest_r29 + 20));    // 20(guest_SP)
    554       canonical->arg7  = *((UInt*) (gst->guest_r29 + 24));    // 24(guest_SP)
    555       canonical->arg8 = 0;
    556    } else {
    557       // Fixme hack handle syscall()
    558       canonical->sysno = gst->guest_r4;    // a0
    559       canonical->arg1  = gst->guest_r5;    // a1
    560       canonical->arg2  = gst->guest_r6;    // a2
    561       canonical->arg3  = gst->guest_r7;    // a3
    562       canonical->arg4  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
    563       canonical->arg5  = *((UInt*) (gst->guest_r29 + 20));    // 20(guest_SP/sp)
    564       canonical->arg6  = *((UInt*) (gst->guest_r29 + 24));    // 24(guest_SP/sp)
    565       canonical->arg8 = __NR_syscall;
    566    }
    567 
    568 #elif defined(VGP_mips64_linux)
    569    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
    570    canonical->sysno = gst->guest_r2;    // v0
    571    canonical->arg1  = gst->guest_r4;    // a0
    572    canonical->arg2  = gst->guest_r5;    // a1
    573    canonical->arg3  = gst->guest_r6;    // a2
    574    canonical->arg4  = gst->guest_r7;    // a3
    575    canonical->arg5  = gst->guest_r8;    // a4
    576    canonical->arg6  = gst->guest_r9;    // a5
    577 
    578 #elif defined(VGP_x86_darwin)
    579    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    580    UWord *stack = (UWord *)gst->guest_ESP;
    581    // GrP fixme hope syscalls aren't called with really shallow stacks...
    582    canonical->sysno = gst->guest_EAX;
    583    if (canonical->sysno != 0) {
    584       // stack[0] is return address
    585       canonical->arg1  = stack[1];
    586       canonical->arg2  = stack[2];
    587       canonical->arg3  = stack[3];
    588       canonical->arg4  = stack[4];
    589       canonical->arg5  = stack[5];
    590       canonical->arg6  = stack[6];
    591       canonical->arg7  = stack[7];
    592       canonical->arg8  = stack[8];
    593    } else {
    594       // GrP fixme hack handle syscall()
    595       // GrP fixme what about __syscall() ?
    596       // stack[0] is return address
    597       // DDD: the tool can't see that the params have been shifted!  Can
    598       //      lead to incorrect checking, I think, because the PRRAn/PSARn
    599       //      macros will mention the pre-shifted args.
    600       canonical->sysno = stack[1];
    601       vg_assert(canonical->sysno != 0);
    602       canonical->arg1  = stack[2];
    603       canonical->arg2  = stack[3];
    604       canonical->arg3  = stack[4];
    605       canonical->arg4  = stack[5];
    606       canonical->arg5  = stack[6];
    607       canonical->arg6  = stack[7];
    608       canonical->arg7  = stack[8];
    609       canonical->arg8  = stack[9];
    610 
    611       PRINT("SYSCALL[%d,?](0) syscall(%s, ...); please stand by...\n",
    612             VG_(getpid)(), /*tid,*/
    613             VG_SYSNUM_STRING(canonical->sysno));
    614    }
    615 
    616    // Here we determine what kind of syscall it was by looking at the
    617    // interrupt kind, and then encode the syscall number using the 64-bit
    618    // encoding for Valgrind's internal use.
    619    //
    620    // DDD: Would it be better to stash the JMP kind into the Darwin
    621    // thread state rather than passing in the trc?
    622    switch (trc) {
    623    case VEX_TRC_JMP_SYS_INT128:
    624       // int $0x80 = Unix, 64-bit result
    625       vg_assert(canonical->sysno >= 0);
    626       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
    627       break;
    628    case VEX_TRC_JMP_SYS_SYSENTER:
    629       // syscall = Unix, 32-bit result
    630       // OR        Mach, 32-bit result
    631       if (canonical->sysno >= 0) {
    632          // GrP fixme hack:  0xffff == I386_SYSCALL_NUMBER_MASK
    633          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
    634                                                              & 0xffff);
    635       } else {
    636          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
    637       }
    638       break;
    639    case VEX_TRC_JMP_SYS_INT129:
    640       // int $0x81 = Mach, 32-bit result
    641       vg_assert(canonical->sysno < 0);
    642       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
    643       break;
    644    case VEX_TRC_JMP_SYS_INT130:
    645       // int $0x82 = mdep, 32-bit result
    646       vg_assert(canonical->sysno >= 0);
    647       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
    648       break;
    649    default:
    650       vg_assert(0);
    651       break;
    652    }
    653 
    654 #elif defined(VGP_amd64_darwin)
    655    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    656    UWord *stack = (UWord *)gst->guest_RSP;
    657 
    658    vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
    659 
    660    // GrP fixme hope syscalls aren't called with really shallow stacks...
    661    canonical->sysno = gst->guest_RAX;
    662    if (canonical->sysno != __NR_syscall) {
    663       // stack[0] is return address
    664       canonical->arg1  = gst->guest_RDI;
    665       canonical->arg2  = gst->guest_RSI;
    666       canonical->arg3  = gst->guest_RDX;
    667       canonical->arg4  = gst->guest_R10;  // not rcx with syscall insn
    668       canonical->arg5  = gst->guest_R8;
    669       canonical->arg6  = gst->guest_R9;
    670       canonical->arg7  = stack[1];
    671       canonical->arg8  = stack[2];
    672    } else {
    673       // GrP fixme hack handle syscall()
    674       // GrP fixme what about __syscall() ?
    675       // stack[0] is return address
    676       // DDD: the tool can't see that the params have been shifted!  Can
    677       //      lead to incorrect checking, I think, because the PRRAn/PSARn
    678       //      macros will mention the pre-shifted args.
    679       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
    680       vg_assert(canonical->sysno != __NR_syscall);
    681       canonical->arg1  = gst->guest_RSI;
    682       canonical->arg2  = gst->guest_RDX;
    683       canonical->arg3  = gst->guest_R10;  // not rcx with syscall insn
    684       canonical->arg4  = gst->guest_R8;
    685       canonical->arg5  = gst->guest_R9;
    686       canonical->arg6  = stack[1];
    687       canonical->arg7  = stack[2];
    688       canonical->arg8  = stack[3];
    689 
    690       PRINT("SYSCALL[%d,?](0) syscall(%s, ...); please stand by...\n",
    691             VG_(getpid)(), /*tid,*/
    692             VG_SYSNUM_STRING(canonical->sysno));
    693    }
    694 
    695    // no canonical->sysno adjustment needed
    696 
    697 #elif defined(VGP_s390x_linux)
    698    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
    699    canonical->sysno = gst->guest_SYSNO;
    700    canonical->arg1  = gst->guest_r2;
    701    canonical->arg2  = gst->guest_r3;
    702    canonical->arg3  = gst->guest_r4;
    703    canonical->arg4  = gst->guest_r5;
    704    canonical->arg5  = gst->guest_r6;
    705    canonical->arg6  = gst->guest_r7;
    706    canonical->arg7  = 0;
    707    canonical->arg8  = 0;
    708 
    709 #elif defined(VGP_x86_solaris)
    710    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    711    UWord *stack = (UWord *)gst->guest_ESP;
    712    canonical->sysno = gst->guest_EAX;
    713    /* stack[0] is a return address. */
    714    canonical->arg1  = stack[1];
    715    canonical->arg2  = stack[2];
    716    canonical->arg3  = stack[3];
    717    canonical->arg4  = stack[4];
    718    canonical->arg5  = stack[5];
    719    canonical->arg6  = stack[6];
    720    canonical->arg7  = stack[7];
    721    canonical->arg8  = stack[8];
    722 
    723    switch (trc) {
    724    case VEX_TRC_JMP_SYS_INT145:
    725    case VEX_TRC_JMP_SYS_SYSENTER:
    726    case VEX_TRC_JMP_SYS_SYSCALL:
    727    /* These three are not actually valid syscall instructions on Solaris.
    728       Pretend for now that we handle them as normal syscalls. */
    729    case VEX_TRC_JMP_SYS_INT128:
    730    case VEX_TRC_JMP_SYS_INT129:
    731    case VEX_TRC_JMP_SYS_INT130:
    732       /* int $0x91, sysenter, syscall = normal syscall */
    733       break;
    734    case VEX_TRC_JMP_SYS_INT210:
    735       /* int $0xD2 = fasttrap */
    736       canonical->sysno
    737          = VG_SOLARIS_SYSCALL_CONSTRUCT_FASTTRAP(canonical->sysno);
    738       break;
    739    default:
    740       vg_assert(0);
    741       break;
    742    }
    743 
    744 #elif defined(VGP_amd64_solaris)
    745    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    746    UWord *stack = (UWord *)gst->guest_RSP;
    747    canonical->sysno = gst->guest_RAX;
    748    /* stack[0] is a return address. */
    749    canonical->arg1 = gst->guest_RDI;
    750    canonical->arg2 = gst->guest_RSI;
    751    canonical->arg3 = gst->guest_RDX;
    752    canonical->arg4 = gst->guest_R10;  /* Not RCX with syscall. */
    753    canonical->arg5 = gst->guest_R8;
    754    canonical->arg6 = gst->guest_R9;
    755    canonical->arg7 = stack[1];
    756    canonical->arg8 = stack[2];
    757 
    758    switch (trc) {
    759    case VEX_TRC_JMP_SYS_SYSCALL:
    760       /* syscall = normal syscall */
    761       break;
    762    case VEX_TRC_JMP_SYS_INT210:
    763       /* int $0xD2 = fasttrap */
    764       canonical->sysno
    765          = VG_SOLARIS_SYSCALL_CONSTRUCT_FASTTRAP(canonical->sysno);
    766       break;
    767    default:
    768       vg_assert(0);
    769       break;
    770    }
    771 
    772 #else
    773 #  error "getSyscallArgsFromGuestState: unknown arch"
    774 #endif
    775 }
    776 
    777 static
    778 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
    779                                     /*OUT*/VexGuestArchState* gst_vanilla )
    780 {
    781 #if defined(VGP_x86_linux)
    782    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    783    gst->guest_EAX = canonical->sysno;
    784    gst->guest_EBX = canonical->arg1;
    785    gst->guest_ECX = canonical->arg2;
    786    gst->guest_EDX = canonical->arg3;
    787    gst->guest_ESI = canonical->arg4;
    788    gst->guest_EDI = canonical->arg5;
    789    gst->guest_EBP = canonical->arg6;
    790 
    791 #elif defined(VGP_amd64_linux)
    792    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    793    gst->guest_RAX = canonical->sysno;
    794    gst->guest_RDI = canonical->arg1;
    795    gst->guest_RSI = canonical->arg2;
    796    gst->guest_RDX = canonical->arg3;
    797    gst->guest_R10 = canonical->arg4;
    798    gst->guest_R8  = canonical->arg5;
    799    gst->guest_R9  = canonical->arg6;
    800 
    801 #elif defined(VGP_ppc32_linux)
    802    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    803    gst->guest_GPR0 = canonical->sysno;
    804    gst->guest_GPR3 = canonical->arg1;
    805    gst->guest_GPR4 = canonical->arg2;
    806    gst->guest_GPR5 = canonical->arg3;
    807    gst->guest_GPR6 = canonical->arg4;
    808    gst->guest_GPR7 = canonical->arg5;
    809    gst->guest_GPR8 = canonical->arg6;
    810 
    811 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
    812    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    813    gst->guest_GPR0 = canonical->sysno;
    814    gst->guest_GPR3 = canonical->arg1;
    815    gst->guest_GPR4 = canonical->arg2;
    816    gst->guest_GPR5 = canonical->arg3;
    817    gst->guest_GPR6 = canonical->arg4;
    818    gst->guest_GPR7 = canonical->arg5;
    819    gst->guest_GPR8 = canonical->arg6;
    820 
    821 #elif defined(VGP_arm_linux)
    822    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    823    gst->guest_R7 = canonical->sysno;
    824    gst->guest_R0 = canonical->arg1;
    825    gst->guest_R1 = canonical->arg2;
    826    gst->guest_R2 = canonical->arg3;
    827    gst->guest_R3 = canonical->arg4;
    828    gst->guest_R4 = canonical->arg5;
    829    gst->guest_R5 = canonical->arg6;
    830 
    831 #elif defined(VGP_arm64_linux)
    832    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
    833    gst->guest_X8 = canonical->sysno;
    834    gst->guest_X0 = canonical->arg1;
    835    gst->guest_X1 = canonical->arg2;
    836    gst->guest_X2 = canonical->arg3;
    837    gst->guest_X3 = canonical->arg4;
    838    gst->guest_X4 = canonical->arg5;
    839    gst->guest_X5 = canonical->arg6;
    840 
    841 #elif defined(VGP_x86_darwin)
    842    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    843    UWord *stack = (UWord *)gst->guest_ESP;
    844 
    845    gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
    846 
    847    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
    848    // stack[0] is return address
    849    stack[1] = canonical->arg1;
    850    stack[2] = canonical->arg2;
    851    stack[3] = canonical->arg3;
    852    stack[4] = canonical->arg4;
    853    stack[5] = canonical->arg5;
    854    stack[6] = canonical->arg6;
    855    stack[7] = canonical->arg7;
    856    stack[8] = canonical->arg8;
    857 
    858 #elif defined(VGP_amd64_darwin)
    859    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    860    UWord *stack = (UWord *)gst->guest_RSP;
    861 
    862    gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
    863    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
    864 
    865    // stack[0] is return address
    866    gst->guest_RDI = canonical->arg1;
    867    gst->guest_RSI = canonical->arg2;
    868    gst->guest_RDX = canonical->arg3;
    869    gst->guest_RCX = canonical->arg4;
    870    gst->guest_R8  = canonical->arg5;
    871    gst->guest_R9  = canonical->arg6;
    872    stack[1]       = canonical->arg7;
    873    stack[2]       = canonical->arg8;
    874 
    875 #elif defined(VGP_s390x_linux)
    876    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
    877    gst->guest_SYSNO  = canonical->sysno;
    878    gst->guest_r2     = canonical->arg1;
    879    gst->guest_r3     = canonical->arg2;
    880    gst->guest_r4     = canonical->arg3;
    881    gst->guest_r5     = canonical->arg4;
    882    gst->guest_r6     = canonical->arg5;
    883    gst->guest_r7     = canonical->arg6;
    884 
    885 #elif defined(VGP_mips32_linux)
    886    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
    887    if (canonical->arg8 != __NR_syscall) {
    888       gst->guest_r2 = canonical->sysno;
    889       gst->guest_r4 = canonical->arg1;
    890       gst->guest_r5 = canonical->arg2;
    891       gst->guest_r6 = canonical->arg3;
    892       gst->guest_r7 = canonical->arg4;
    893       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5; // 16(guest_GPR29/sp)
    894       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6; // 20(sp)
    895    } else {
    896       canonical->arg8 = 0;
    897       gst->guest_r2 = __NR_syscall;
    898       gst->guest_r4 = canonical->sysno;
    899       gst->guest_r5 = canonical->arg1;
    900       gst->guest_r6 = canonical->arg2;
    901       gst->guest_r7 = canonical->arg3;
    902       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4; // 16(guest_GPR29/sp)
    903       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5; // 20(sp)
    904       *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6; // 24(sp)
    905    }
    906 
    907 #elif defined(VGP_mips64_linux)
    908    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
    909    gst->guest_r2 = canonical->sysno;
    910    gst->guest_r4 = canonical->arg1;
    911    gst->guest_r5 = canonical->arg2;
    912    gst->guest_r6 = canonical->arg3;
    913    gst->guest_r7 = canonical->arg4;
    914    gst->guest_r8 = canonical->arg5;
    915    gst->guest_r9 = canonical->arg6;
    916 
    917 #elif defined(VGP_x86_solaris)
    918    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    919    UWord *stack = (UWord *)gst->guest_ESP;
    920 
    921    /* Fasttraps or anything else cannot go through this way. */
    922    vg_assert(VG_SOLARIS_SYSNO_CLASS(canonical->sysno)
    923              == VG_SOLARIS_SYSCALL_CLASS_CLASSIC);
    924    gst->guest_EAX = canonical->sysno;
    925    /* stack[0] is a return address. */
    926    stack[1] = canonical->arg1;
    927    stack[2] = canonical->arg2;
    928    stack[3] = canonical->arg3;
    929    stack[4] = canonical->arg4;
    930    stack[5] = canonical->arg5;
    931    stack[6] = canonical->arg6;
    932    stack[7] = canonical->arg7;
    933    stack[8] = canonical->arg8;
    934 
    935 #elif defined(VGP_amd64_solaris)
    936    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    937    UWord *stack = (UWord *)gst->guest_RSP;
    938 
    939    /* Fasttraps or anything else cannot go through this way. */
    940    vg_assert(VG_SOLARIS_SYSNO_CLASS(canonical->sysno)
    941              == VG_SOLARIS_SYSCALL_CLASS_CLASSIC);
    942    gst->guest_RAX = canonical->sysno;
    943    /* stack[0] is a return address. */
    944    gst->guest_RDI = canonical->arg1;
    945    gst->guest_RSI = canonical->arg2;
    946    gst->guest_RDX = canonical->arg3;
    947    gst->guest_R10 = canonical->arg4;
    948    gst->guest_R8  = canonical->arg5;
    949    gst->guest_R9  = canonical->arg6;
    950    stack[1] = canonical->arg7;
    951    stack[2] = canonical->arg8;
    952 
    953 #else
    954 #  error "putSyscallArgsIntoGuestState: unknown arch"
    955 #endif
    956 }
    957 
    958 static
    959 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
    960                                       /*IN*/ VexGuestArchState* gst_vanilla )
    961 {
    962 #  if defined(VGP_x86_linux)
    963    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    964    canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
    965    canonical->what = SsComplete;
    966 
    967 #  elif defined(VGP_amd64_linux)
    968    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    969    canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
    970    canonical->what = SsComplete;
    971 
    972 #  elif defined(VGP_ppc32_linux)
    973    VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
    974    UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
    975    UInt                cr0so = (cr >> 28) & 1;
    976    canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
    977    canonical->what = SsComplete;
    978 
    979 #  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
    980    VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
    981    UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
    982    UInt                cr0so = (cr >> 28) & 1;
    983    canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
    984    canonical->what = SsComplete;
    985 
    986 #  elif defined(VGP_arm_linux)
    987    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    988    canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
    989    canonical->what = SsComplete;
    990 
    991 #  elif defined(VGP_arm64_linux)
    992    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
    993    canonical->sres = VG_(mk_SysRes_arm64_linux)( gst->guest_X0 );
    994    canonical->what = SsComplete;
    995 
    996 #  elif defined(VGP_mips32_linux)
    997    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
    998    UInt                v0 = gst->guest_r2;    // v0
    999    UInt                v1 = gst->guest_r3;    // v1
   1000    UInt                a3 = gst->guest_r7;    // a3
   1001    canonical->sres = VG_(mk_SysRes_mips32_linux)( v0, v1, a3 );
   1002    canonical->what = SsComplete;
   1003 
   1004 #  elif defined(VGP_mips64_linux)
   1005    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
   1006    ULong                v0 = gst->guest_r2;    // v0
   1007    ULong                v1 = gst->guest_r3;    // v1
   1008    ULong                a3 = gst->guest_r7;    // a3
   1009    canonical->sres = VG_(mk_SysRes_mips64_linux)(v0, v1, a3);
   1010    canonical->what = SsComplete;
   1011 
   1012 #  elif defined(VGP_x86_darwin)
   1013    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
   1014    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
   1015    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
   1016    UInt err = 0;
   1017    UInt wLO = 0;
   1018    UInt wHI = 0;
   1019    switch (gst->guest_SC_CLASS) {
   1020       case VG_DARWIN_SYSCALL_CLASS_UNIX:
   1021          // int $0x80 = Unix, 64-bit result
   1022          err = carry;
   1023          wLO = gst->guest_EAX;
   1024          wHI = gst->guest_EDX;
   1025          break;
   1026       case VG_DARWIN_SYSCALL_CLASS_MACH:
   1027          // int $0x81 = Mach, 32-bit result
   1028          wLO = gst->guest_EAX;
   1029          break;
   1030       case VG_DARWIN_SYSCALL_CLASS_MDEP:
   1031          // int $0x82 = mdep, 32-bit result
   1032          wLO = gst->guest_EAX;
   1033          break;
   1034       default:
   1035          vg_assert(0);
   1036          break;
   1037    }
   1038    canonical->sres = VG_(mk_SysRes_x86_darwin)(
   1039                         gst->guest_SC_CLASS, err ? True : False,
   1040                         wHI, wLO
   1041                      );
   1042    canonical->what = SsComplete;
   1043 
   1044 #  elif defined(VGP_amd64_darwin)
   1045    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
   1046    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
   1047    ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
   1048    ULong err = 0;
   1049    ULong wLO = 0;
   1050    ULong wHI = 0;
   1051    switch (gst->guest_SC_CLASS) {
   1052       case VG_DARWIN_SYSCALL_CLASS_UNIX:
   1053          // syscall = Unix, 128-bit result
   1054          err = carry;
   1055          wLO = gst->guest_RAX;
   1056          wHI = gst->guest_RDX;
   1057          break;
   1058       case VG_DARWIN_SYSCALL_CLASS_MACH:
   1059          // syscall = Mach, 64-bit result
   1060          wLO = gst->guest_RAX;
   1061          break;
   1062       case VG_DARWIN_SYSCALL_CLASS_MDEP:
   1063          // syscall = mdep, 64-bit result
   1064          wLO = gst->guest_RAX;
   1065          break;
   1066       default:
   1067          vg_assert(0);
   1068          break;
   1069    }
   1070    canonical->sres = VG_(mk_SysRes_amd64_darwin)(
   1071                         gst->guest_SC_CLASS, err ? True : False,
   1072                         wHI, wLO
   1073                      );
   1074    canonical->what = SsComplete;
   1075 
   1076 #  elif defined(VGP_s390x_linux)
   1077    VexGuestS390XState* gst   = (VexGuestS390XState*)gst_vanilla;
   1078    canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
   1079    canonical->what = SsComplete;
   1080 
   1081 #  elif defined(VGP_x86_solaris)
   1082    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
   1083    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
   1084 
   1085    canonical->sres = VG_(mk_SysRes_x86_solaris)(carry ? True : False,
   1086                                                 gst->guest_EAX,
   1087                                                 carry ? 0 : gst->guest_EDX);
   1088    canonical->what = SsComplete;
   1089 
   1090 #  elif defined(VGP_amd64_solaris)
   1091    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
   1092    UInt carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
   1093 
   1094    canonical->sres = VG_(mk_SysRes_amd64_solaris)(carry ? True : False,
   1095                                                   gst->guest_RAX,
   1096                                                   carry ? 0 : gst->guest_RDX);
   1097    canonical->what = SsComplete;
   1098 
   1099 #  else
   1100 #    error "getSyscallStatusFromGuestState: unknown arch"
   1101 #  endif
   1102 }
   1103 
   1104 static
   1105 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
   1106                                       /*IN*/ SyscallStatus*     canonical,
   1107                                       /*OUT*/VexGuestArchState* gst_vanilla )
   1108 {
   1109 #  if defined(VGP_x86_linux)
   1110    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
   1111    vg_assert(canonical->what == SsComplete);
   1112    if (sr_isError(canonical->sres)) {
   1113       /* This isn't exactly right, in that really a Failure with res
   1114          not in the range 1 .. 4095 is unrepresentable in the
   1115          Linux-x86 scheme.  Oh well. */
   1116       gst->guest_EAX = - (Int)sr_Err(canonical->sres);
   1117    } else {
   1118       gst->guest_EAX = sr_Res(canonical->sres);
   1119    }
   1120    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1121              OFFSET_x86_EAX, sizeof(UWord) );
   1122 
   1123 #  elif defined(VGP_amd64_linux)
   1124    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
   1125    vg_assert(canonical->what == SsComplete);
   1126    if (sr_isError(canonical->sres)) {
   1127       /* This isn't exactly right, in that really a Failure with res
   1128          not in the range 1 .. 4095 is unrepresentable in the
   1129          Linux-amd64 scheme.  Oh well. */
   1130       gst->guest_RAX = - (Long)sr_Err(canonical->sres);
   1131    } else {
   1132       gst->guest_RAX = sr_Res(canonical->sres);
   1133    }
   1134    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1135              OFFSET_amd64_RAX, sizeof(UWord) );
   1136 
   1137 #  elif defined(VGP_ppc32_linux)
   1138    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
   1139    UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
   1140    vg_assert(canonical->what == SsComplete);
   1141    if (sr_isError(canonical->sres)) {
   1142       /* set CR0.SO */
   1143       LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
   1144       gst->guest_GPR3 = sr_Err(canonical->sres);
   1145    } else {
   1146       /* clear CR0.SO */
   1147       LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
   1148       gst->guest_GPR3 = sr_Res(canonical->sres);
   1149    }
   1150    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1151              OFFSET_ppc32_GPR3, sizeof(UWord) );
   1152    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1153              OFFSET_ppc32_CR0_0, sizeof(UChar) );
   1154 
   1155 #  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
   1156    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
   1157    UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
   1158    vg_assert(canonical->what == SsComplete);
   1159    if (sr_isError(canonical->sres)) {
   1160       /* set CR0.SO */
   1161       LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
   1162       gst->guest_GPR3 = sr_Err(canonical->sres);
   1163    } else {
   1164       /* clear CR0.SO */
   1165       LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
   1166       gst->guest_GPR3 = sr_Res(canonical->sres);
   1167    }
   1168    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1169              OFFSET_ppc64_GPR3, sizeof(UWord) );
   1170    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1171              OFFSET_ppc64_CR0_0, sizeof(UChar) );
   1172 
   1173 #  elif defined(VGP_arm_linux)
   1174    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
   1175    vg_assert(canonical->what == SsComplete);
   1176    if (sr_isError(canonical->sres)) {
   1177       /* This isn't exactly right, in that really a Failure with res
   1178          not in the range 1 .. 4095 is unrepresentable in the
   1179          Linux-arm scheme.  Oh well. */
   1180       gst->guest_R0 = - (Int)sr_Err(canonical->sres);
   1181    } else {
   1182       gst->guest_R0 = sr_Res(canonical->sres);
   1183    }
   1184    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1185              OFFSET_arm_R0, sizeof(UWord) );
   1186 
   1187 #  elif defined(VGP_arm64_linux)
   1188    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
   1189    vg_assert(canonical->what == SsComplete);
   1190    if (sr_isError(canonical->sres)) {
   1191       /* This isn't exactly right, in that really a Failure with res
   1192          not in the range 1 .. 4095 is unrepresentable in the
   1193          Linux-arm64 scheme.  Oh well. */
   1194       gst->guest_X0 = - (Long)sr_Err(canonical->sres);
   1195    } else {
   1196       gst->guest_X0 = sr_Res(canonical->sres);
   1197    }
   1198    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1199              OFFSET_arm64_X0, sizeof(UWord) );
   1200 
   1201 #elif defined(VGP_x86_darwin)
   1202    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
   1203    SysRes sres = canonical->sres;
   1204    vg_assert(canonical->what == SsComplete);
   1205    /* Unfortunately here we have to break abstraction and look
   1206       directly inside 'res', in order to decide what to do. */
   1207    switch (sres._mode) {
   1208       case SysRes_MACH: // int $0x81 = Mach, 32-bit result
   1209       case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
   1210          gst->guest_EAX = sres._wLO;
   1211          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1212                    OFFSET_x86_EAX, sizeof(UInt) );
   1213          break;
   1214       case SysRes_UNIX_OK:  // int $0x80 = Unix, 64-bit result
   1215       case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
   1216          gst->guest_EAX = sres._wLO;
   1217          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1218                    OFFSET_x86_EAX, sizeof(UInt) );
   1219          gst->guest_EDX = sres._wHI;
   1220          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1221                    OFFSET_x86_EDX, sizeof(UInt) );
   1222          LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
   1223                                       gst );
   1224          // GrP fixme sets defined for entire eflags, not just bit c
   1225          // DDD: this breaks exp-ptrcheck.
   1226          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1227                    offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
   1228          break;
   1229       default:
   1230          vg_assert(0);
   1231          break;
   1232    }
   1233 
   1234 #elif defined(VGP_amd64_darwin)
   1235    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
   1236    SysRes sres = canonical->sres;
   1237    vg_assert(canonical->what == SsComplete);
   1238    /* Unfortunately here we have to break abstraction and look
   1239       directly inside 'res', in order to decide what to do. */
   1240    switch (sres._mode) {
   1241       case SysRes_MACH: // syscall = Mach, 64-bit result
   1242       case SysRes_MDEP: // syscall = mdep, 64-bit result
   1243          gst->guest_RAX = sres._wLO;
   1244          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1245                    OFFSET_amd64_RAX, sizeof(ULong) );
   1246          break;
   1247       case SysRes_UNIX_OK:  // syscall = Unix, 128-bit result
   1248       case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
   1249          gst->guest_RAX = sres._wLO;
   1250          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1251                    OFFSET_amd64_RAX, sizeof(ULong) );
   1252          gst->guest_RDX = sres._wHI;
   1253          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1254                    OFFSET_amd64_RDX, sizeof(ULong) );
   1255          LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
   1256                                         gst );
   1257          // GrP fixme sets defined for entire rflags, not just bit c
   1258          // DDD: this breaks exp-ptrcheck.
   1259          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1260                    offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
   1261          break;
   1262       default:
   1263          vg_assert(0);
   1264          break;
   1265    }
   1266 
   1267 #  elif defined(VGP_s390x_linux)
   1268    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
   1269    vg_assert(canonical->what == SsComplete);
   1270    if (sr_isError(canonical->sres)) {
   1271       gst->guest_r2 = - (Long)sr_Err(canonical->sres);
   1272    } else {
   1273       gst->guest_r2 = sr_Res(canonical->sres);
   1274    }
   1275 
   1276 #  elif defined(VGP_mips32_linux)
   1277    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
   1278    vg_assert(canonical->what == SsComplete);
   1279    if (sr_isError(canonical->sres)) {
   1280       gst->guest_r2 = (Int)sr_Err(canonical->sres);
   1281       gst->guest_r7 = (Int)sr_Err(canonical->sres);
   1282    } else {
   1283       gst->guest_r2 = sr_Res(canonical->sres);
   1284       gst->guest_r3 = sr_ResEx(canonical->sres);
   1285       gst->guest_r7 = (Int)sr_Err(canonical->sres);
   1286    }
   1287    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1288              OFFSET_mips32_r2, sizeof(UWord) );
   1289    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1290              OFFSET_mips32_r3, sizeof(UWord) );
   1291    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1292              OFFSET_mips32_r7, sizeof(UWord) );
   1293 
   1294 #  elif defined(VGP_mips64_linux)
   1295    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
   1296    vg_assert(canonical->what == SsComplete);
   1297    if (sr_isError(canonical->sres)) {
   1298       gst->guest_r2 = (Int)sr_Err(canonical->sres);
   1299       gst->guest_r7 = (Int)sr_Err(canonical->sres);
   1300    } else {
   1301       gst->guest_r2 = sr_Res(canonical->sres);
   1302       gst->guest_r3 = sr_ResEx(canonical->sres);
   1303       gst->guest_r7 = (Int)sr_Err(canonical->sres);
   1304    }
   1305    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1306              OFFSET_mips64_r2, sizeof(UWord) );
   1307    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1308              OFFSET_mips64_r3, sizeof(UWord) );
   1309    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1310              OFFSET_mips64_r7, sizeof(UWord) );
   1311 
   1312 #  elif defined(VGP_x86_solaris)
   1313    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
   1314    SysRes sres = canonical->sres;
   1315    vg_assert(canonical->what == SsComplete);
   1316 
   1317    if (sr_isError(sres)) {
   1318       gst->guest_EAX = sr_Err(sres);
   1319       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_x86_EAX,
   1320                sizeof(UInt));
   1321       LibVEX_GuestX86_put_eflag_c(1, gst);
   1322    }
   1323    else {
   1324       gst->guest_EAX = sr_Res(sres);
   1325       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_x86_EAX,
   1326                sizeof(UInt));
   1327       gst->guest_EDX = sr_ResHI(sres);
   1328       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_x86_EDX,
   1329                sizeof(UInt));
   1330       LibVEX_GuestX86_put_eflag_c(0, gst);
   1331    }
   1332    /* Make CC_DEP1 and CC_DEP2 defined.  This is inaccurate because it makes
   1333       other eflags defined too (see README.solaris). */
   1334    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestX86State,
   1335             guest_CC_DEP1), sizeof(UInt));
   1336    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestX86State,
   1337             guest_CC_DEP2), sizeof(UInt));
   1338 
   1339 #  elif defined(VGP_amd64_solaris)
   1340    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
   1341    SysRes sres = canonical->sres;
   1342    vg_assert(canonical->what == SsComplete);
   1343 
   1344    if (sr_isError(sres)) {
   1345       gst->guest_RAX = sr_Err(sres);
   1346       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_amd64_RAX,
   1347                sizeof(ULong));
   1348       LibVEX_GuestAMD64_put_rflag_c(1, gst);
   1349    }
   1350    else {
   1351       gst->guest_RAX = sr_Res(sres);
   1352       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_amd64_RAX,
   1353                sizeof(ULong));
   1354       gst->guest_RDX = sr_ResHI(sres);
   1355       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_amd64_RDX,
   1356                sizeof(ULong));
   1357       LibVEX_GuestAMD64_put_rflag_c(0, gst);
   1358    }
   1359    /* Make CC_DEP1 and CC_DEP2 defined.  This is inaccurate because it makes
   1360       other eflags defined too (see README.solaris). */
   1361    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestAMD64State,
   1362             guest_CC_DEP1), sizeof(ULong));
   1363    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestAMD64State,
   1364             guest_CC_DEP2), sizeof(ULong));
   1365 
   1366 #  else
   1367 #    error "putSyscallStatusIntoGuestState: unknown arch"
   1368 #  endif
   1369 }
   1370 
   1371 
   1372 /* Tell me the offsets in the guest state of the syscall params, so
   1373    that the scalar argument checkers don't have to have this info
   1374    hardwired. */
   1375 
   1376 static
   1377 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
   1378 {
   1379    VG_(bzero_inline)(layout, sizeof(*layout));
   1380 
   1381 #if defined(VGP_x86_linux)
   1382    layout->o_sysno  = OFFSET_x86_EAX;
   1383    layout->o_arg1   = OFFSET_x86_EBX;
   1384    layout->o_arg2   = OFFSET_x86_ECX;
   1385    layout->o_arg3   = OFFSET_x86_EDX;
   1386    layout->o_arg4   = OFFSET_x86_ESI;
   1387    layout->o_arg5   = OFFSET_x86_EDI;
   1388    layout->o_arg6   = OFFSET_x86_EBP;
   1389    layout->uu_arg7  = -1; /* impossible value */
   1390    layout->uu_arg8  = -1; /* impossible value */
   1391 
   1392 #elif defined(VGP_amd64_linux)
   1393    layout->o_sysno  = OFFSET_amd64_RAX;
   1394    layout->o_arg1   = OFFSET_amd64_RDI;
   1395    layout->o_arg2   = OFFSET_amd64_RSI;
   1396    layout->o_arg3   = OFFSET_amd64_RDX;
   1397    layout->o_arg4   = OFFSET_amd64_R10;
   1398    layout->o_arg5   = OFFSET_amd64_R8;
   1399    layout->o_arg6   = OFFSET_amd64_R9;
   1400    layout->uu_arg7  = -1; /* impossible value */
   1401    layout->uu_arg8  = -1; /* impossible value */
   1402 
   1403 #elif defined(VGP_ppc32_linux)
   1404    layout->o_sysno  = OFFSET_ppc32_GPR0;
   1405    layout->o_arg1   = OFFSET_ppc32_GPR3;
   1406    layout->o_arg2   = OFFSET_ppc32_GPR4;
   1407    layout->o_arg3   = OFFSET_ppc32_GPR5;
   1408    layout->o_arg4   = OFFSET_ppc32_GPR6;
   1409    layout->o_arg5   = OFFSET_ppc32_GPR7;
   1410    layout->o_arg6   = OFFSET_ppc32_GPR8;
   1411    layout->uu_arg7  = -1; /* impossible value */
   1412    layout->uu_arg8  = -1; /* impossible value */
   1413 
   1414 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
   1415    layout->o_sysno  = OFFSET_ppc64_GPR0;
   1416    layout->o_arg1   = OFFSET_ppc64_GPR3;
   1417    layout->o_arg2   = OFFSET_ppc64_GPR4;
   1418    layout->o_arg3   = OFFSET_ppc64_GPR5;
   1419    layout->o_arg4   = OFFSET_ppc64_GPR6;
   1420    layout->o_arg5   = OFFSET_ppc64_GPR7;
   1421    layout->o_arg6   = OFFSET_ppc64_GPR8;
   1422    layout->uu_arg7  = -1; /* impossible value */
   1423    layout->uu_arg8  = -1; /* impossible value */
   1424 
   1425 #elif defined(VGP_arm_linux)
   1426    layout->o_sysno  = OFFSET_arm_R7;
   1427    layout->o_arg1   = OFFSET_arm_R0;
   1428    layout->o_arg2   = OFFSET_arm_R1;
   1429    layout->o_arg3   = OFFSET_arm_R2;
   1430    layout->o_arg4   = OFFSET_arm_R3;
   1431    layout->o_arg5   = OFFSET_arm_R4;
   1432    layout->o_arg6   = OFFSET_arm_R5;
   1433    layout->uu_arg7  = -1; /* impossible value */
   1434    layout->uu_arg8  = -1; /* impossible value */
   1435 
   1436 #elif defined(VGP_arm64_linux)
   1437    layout->o_sysno  = OFFSET_arm64_X8;
   1438    layout->o_arg1   = OFFSET_arm64_X0;
   1439    layout->o_arg2   = OFFSET_arm64_X1;
   1440    layout->o_arg3   = OFFSET_arm64_X2;
   1441    layout->o_arg4   = OFFSET_arm64_X3;
   1442    layout->o_arg5   = OFFSET_arm64_X4;
   1443    layout->o_arg6   = OFFSET_arm64_X5;
   1444    layout->uu_arg7  = -1; /* impossible value */
   1445    layout->uu_arg8  = -1; /* impossible value */
   1446 
   1447 #elif defined(VGP_mips32_linux)
   1448    layout->o_sysno  = OFFSET_mips32_r2;
   1449    layout->o_arg1   = OFFSET_mips32_r4;
   1450    layout->o_arg2   = OFFSET_mips32_r5;
   1451    layout->o_arg3   = OFFSET_mips32_r6;
   1452    layout->o_arg4   = OFFSET_mips32_r7;
   1453    layout->s_arg5   = sizeof(UWord) * 4;
   1454    layout->s_arg6   = sizeof(UWord) * 5;
   1455    layout->s_arg7   = sizeof(UWord) * 6;
   1456    layout->uu_arg8  = -1; /* impossible value */
   1457 
   1458 #elif defined(VGP_mips64_linux)
   1459    layout->o_sysno  = OFFSET_mips64_r2;
   1460    layout->o_arg1   = OFFSET_mips64_r4;
   1461    layout->o_arg2   = OFFSET_mips64_r5;
   1462    layout->o_arg3   = OFFSET_mips64_r6;
   1463    layout->o_arg4   = OFFSET_mips64_r7;
   1464    layout->o_arg5   = OFFSET_mips64_r8;
   1465    layout->o_arg6   = OFFSET_mips64_r9;
   1466    layout->uu_arg7  = -1; /* impossible value */
   1467    layout->uu_arg8  = -1; /* impossible value */
   1468 
   1469 #elif defined(VGP_x86_darwin)
   1470    layout->o_sysno  = OFFSET_x86_EAX;
   1471    // syscall parameters are on stack in C convention
   1472    layout->s_arg1   = sizeof(UWord) * 1;
   1473    layout->s_arg2   = sizeof(UWord) * 2;
   1474    layout->s_arg3   = sizeof(UWord) * 3;
   1475    layout->s_arg4   = sizeof(UWord) * 4;
   1476    layout->s_arg5   = sizeof(UWord) * 5;
   1477    layout->s_arg6   = sizeof(UWord) * 6;
   1478    layout->s_arg7   = sizeof(UWord) * 7;
   1479    layout->s_arg8   = sizeof(UWord) * 8;
   1480 
   1481 #elif defined(VGP_amd64_darwin)
   1482    layout->o_sysno  = OFFSET_amd64_RAX;
   1483    layout->o_arg1   = OFFSET_amd64_RDI;
   1484    layout->o_arg2   = OFFSET_amd64_RSI;
   1485    layout->o_arg3   = OFFSET_amd64_RDX;
   1486    layout->o_arg4   = OFFSET_amd64_RCX;
   1487    layout->o_arg5   = OFFSET_amd64_R8;
   1488    layout->o_arg6   = OFFSET_amd64_R9;
   1489    layout->s_arg7   = sizeof(UWord) * 1;
   1490    layout->s_arg8   = sizeof(UWord) * 2;
   1491 
   1492 #elif defined(VGP_s390x_linux)
   1493    layout->o_sysno  = OFFSET_s390x_SYSNO;
   1494    layout->o_arg1   = OFFSET_s390x_r2;
   1495    layout->o_arg2   = OFFSET_s390x_r3;
   1496    layout->o_arg3   = OFFSET_s390x_r4;
   1497    layout->o_arg4   = OFFSET_s390x_r5;
   1498    layout->o_arg5   = OFFSET_s390x_r6;
   1499    layout->o_arg6   = OFFSET_s390x_r7;
   1500    layout->uu_arg7  = -1; /* impossible value */
   1501    layout->uu_arg8  = -1; /* impossible value */
   1502 
   1503 #elif defined(VGP_x86_solaris)
   1504    layout->o_sysno  = OFFSET_x86_EAX;
   1505    /* Syscall parameters are on the stack. */
   1506    layout->s_arg1   = sizeof(UWord) * 1;
   1507    layout->s_arg2   = sizeof(UWord) * 2;
   1508    layout->s_arg3   = sizeof(UWord) * 3;
   1509    layout->s_arg4   = sizeof(UWord) * 4;
   1510    layout->s_arg5   = sizeof(UWord) * 5;
   1511    layout->s_arg6   = sizeof(UWord) * 6;
   1512    layout->s_arg7   = sizeof(UWord) * 7;
   1513    layout->s_arg8   = sizeof(UWord) * 8;
   1514 
   1515 #elif defined(VGP_amd64_solaris)
   1516    layout->o_sysno  = OFFSET_amd64_RAX;
   1517    layout->o_arg1   = OFFSET_amd64_RDI;
   1518    layout->o_arg2   = OFFSET_amd64_RSI;
   1519    layout->o_arg3   = OFFSET_amd64_RDX;
   1520    layout->o_arg4   = OFFSET_amd64_R10;
   1521    layout->o_arg5   = OFFSET_amd64_R8;
   1522    layout->o_arg6   = OFFSET_amd64_R9;
   1523    layout->s_arg7   = sizeof(UWord) * 1;
   1524    layout->s_arg8   = sizeof(UWord) * 2;
   1525 
   1526 #else
   1527 #  error "getSyscallLayout: unknown arch"
   1528 #endif
   1529 }
   1530 
   1531 
   1532 /* ---------------------------------------------------------------------
   1533    The main driver logic
   1534    ------------------------------------------------------------------ */
   1535 
   1536 /* Finding the handlers for a given syscall, or faking up one
   1537    when no handler is found. */
   1538 
   1539 static
   1540 void bad_before ( ThreadId              tid,
   1541                   SyscallArgLayout*     layout,
   1542                   /*MOD*/SyscallArgs*   args,
   1543                   /*OUT*/SyscallStatus* status,
   1544                   /*OUT*/UWord*         flags )
   1545 {
   1546    VG_(dmsg)("WARNING: unhandled %s syscall: %s\n",
   1547       VG_PLATFORM, VG_SYSNUM_STRING(args->sysno));
   1548    if (VG_(clo_verbosity) > 1) {
   1549       VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1550    }
   1551    VG_(dmsg)("You may be able to write your own handler.\n");
   1552    VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
   1553    VG_(dmsg)("Nevertheless we consider this a bug.  Please report\n");
   1554    VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
   1555 
   1556    SET_STATUS_Failure(VKI_ENOSYS);
   1557 
   1558 #  if defined(VGO_solaris)
   1559    VG_(exit)(1);
   1560 #  endif
   1561 }
   1562 
   1563 static SyscallTableEntry bad_sys =
   1564    { bad_before, NULL };
   1565 
   1566 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
   1567 {
   1568    const SyscallTableEntry* sys = NULL;
   1569 
   1570 #  if defined(VGO_linux)
   1571    sys = ML_(get_linux_syscall_entry)( syscallno );
   1572 
   1573 #  elif defined(VGO_darwin)
   1574    Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
   1575 
   1576    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
   1577    case VG_DARWIN_SYSCALL_CLASS_UNIX:
   1578       if (idx >= 0 && idx < ML_(syscall_table_size) &&
   1579           ML_(syscall_table)[idx].before != NULL)
   1580          sys = &ML_(syscall_table)[idx];
   1581          break;
   1582    case VG_DARWIN_SYSCALL_CLASS_MACH:
   1583       if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
   1584           ML_(mach_trap_table)[idx].before != NULL)
   1585          sys = &ML_(mach_trap_table)[idx];
   1586          break;
   1587    case VG_DARWIN_SYSCALL_CLASS_MDEP:
   1588       if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
   1589           ML_(mdep_trap_table)[idx].before != NULL)
   1590          sys = &ML_(mdep_trap_table)[idx];
   1591          break;
   1592    default:
   1593       vg_assert(0);
   1594       break;
   1595    }
   1596 
   1597 #  elif defined(VGO_solaris)
   1598    sys = ML_(get_solaris_syscall_entry)(syscallno);
   1599 
   1600 #  else
   1601 #    error Unknown OS
   1602 #  endif
   1603 
   1604    return sys == NULL  ? &bad_sys  : sys;
   1605 }
   1606 
   1607 
   1608 /* Add and remove signals from mask so that we end up telling the
   1609    kernel the state we actually want rather than what the client
   1610    wants. */
   1611 void VG_(sanitize_client_sigmask)(vki_sigset_t *mask)
   1612 {
   1613    VG_(sigdelset)(mask, VKI_SIGKILL);
   1614    VG_(sigdelset)(mask, VKI_SIGSTOP);
   1615    VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
   1616 }
   1617 
   1618 typedef
   1619    struct {
   1620       SyscallArgs   orig_args;
   1621       SyscallArgs   args;
   1622       SyscallStatus status;
   1623       UWord         flags;
   1624    }
   1625    SyscallInfo;
   1626 
   1627 SyscallInfo *syscallInfo;
   1628 
   1629 /* The scheduler needs to be able to zero out these records after a
   1630    fork, hence this is exported from m_syswrap. */
   1631 void VG_(clear_syscallInfo) ( Int tid )
   1632 {
   1633    vg_assert(syscallInfo);
   1634    vg_assert(tid >= 0 && tid < VG_N_THREADS);
   1635    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
   1636    syscallInfo[tid].status.what = SsIdle;
   1637 }
   1638 
   1639 Bool VG_(is_in_syscall) ( Int tid )
   1640 {
   1641    vg_assert(tid >= 0 && tid < VG_N_THREADS);
   1642    return (syscallInfo[tid].status.what != SsIdle);
   1643 }
   1644 
   1645 static void ensure_initialised ( void )
   1646 {
   1647    Int i;
   1648    static Bool init_done = False;
   1649    if (init_done)
   1650       return;
   1651    init_done = True;
   1652 
   1653    syscallInfo = VG_(malloc)("scinfo", VG_N_THREADS * sizeof syscallInfo[0]);
   1654 
   1655    for (i = 0; i < VG_N_THREADS; i++) {
   1656       VG_(clear_syscallInfo)( i );
   1657    }
   1658 }
   1659 
   1660 /* --- This is the main function of this file. --- */
   1661 
   1662 void VG_(client_syscall) ( ThreadId tid, UInt trc )
   1663 {
   1664    Word                     sysno;
   1665    ThreadState*             tst;
   1666    const SyscallTableEntry* ent;
   1667    SyscallArgLayout         layout;
   1668    SyscallInfo*             sci;
   1669 
   1670    ensure_initialised();
   1671 
   1672    vg_assert(VG_(is_valid_tid)(tid));
   1673    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   1674    vg_assert(VG_(is_running_thread)(tid));
   1675 
   1676 #  if !defined(VGO_darwin)
   1677    // Resync filtering is meaningless on non-Darwin targets.
   1678    vg_assert(VG_(clo_resync_filter) == 0);
   1679 #  endif
   1680 
   1681    tst = VG_(get_ThreadState)(tid);
   1682 
   1683    /* BEGIN ensure root thread's stack is suitably mapped */
   1684    /* In some rare circumstances, we may do the syscall without the
   1685       bottom page of the stack being mapped, because the stack pointer
   1686       was moved down just a few instructions before the syscall
   1687       instruction, and there have been no memory references since
   1688       then, that would cause a call to VG_(extend_stack) to have
   1689       happened.
   1690 
   1691       In native execution that's OK: the kernel automagically extends
   1692       the stack's mapped area down to cover the stack pointer (or sp -
   1693       redzone, really).  In simulated normal execution that's OK too,
   1694       since any signals we get from accessing below the mapped area of
   1695       the (guest's) stack lead us to VG_(extend_stack), where we
   1696       simulate the kernel's stack extension logic.  But that leaves
   1697       the problem of entering a syscall with the SP unmapped.  Because
   1698       the kernel doesn't know that the segment immediately above SP is
   1699       supposed to be a grow-down segment, it causes the syscall to
   1700       fail, and thereby causes a divergence between native behaviour
   1701       (syscall succeeds) and simulated behaviour (syscall fails).
   1702 
   1703       This is quite a rare failure mode.  It has only been seen
   1704       affecting calls to sys_readlink on amd64-linux, and even then it
   1705       requires a certain code sequence around the syscall to trigger
   1706       it.  Here is one:
   1707 
   1708       extern int my_readlink ( const char* path );
   1709       asm(
   1710       ".text\n"
   1711       ".globl my_readlink\n"
   1712       "my_readlink:\n"
   1713       "\tsubq    $0x1008,%rsp\n"
   1714       "\tmovq    %rdi,%rdi\n"              // path is in rdi
   1715       "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
   1716       "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
   1717       "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
   1718       "\tsyscall\n"
   1719       "\taddq    $0x1008,%rsp\n"
   1720       "\tret\n"
   1721       ".previous\n"
   1722       );
   1723 
   1724       For more details, see bug #156404
   1725       (https://bugs.kde.org/show_bug.cgi?id=156404).
   1726 
   1727       The fix is actually very simple.  We simply need to call
   1728       VG_(extend_stack) for this thread, handing it the lowest
   1729       possible valid address for stack (sp - redzone), to ensure the
   1730       pages all the way down to that address, are mapped.  Because
   1731       this is a potentially expensive and frequent operation, we
   1732       do the following:
   1733 
   1734       Only the main thread (tid=1) has a growdown stack.  So
   1735       ignore all others.  It is conceivable, although highly unlikely,
   1736       that the main thread exits, and later another thread is
   1737       allocated tid=1, but that's harmless, I believe;
   1738       VG_(extend_stack) will do nothing when applied to a non-root
   1739       thread.
   1740 
   1741       All this guff is of course Linux-specific.  Hence the ifdef.
   1742    */
   1743 #  if defined(VGO_linux)
   1744    if (tid == 1/*ROOT THREAD*/) {
   1745       Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
   1746 
   1747       /* The precise thing to do here would be to extend the stack only
   1748          if the system call can be proven to access unmapped user stack
   1749          memory. That is an enormous amount of work even if a proper
   1750          spec of system calls was available.
   1751 
   1752          In the case where the system call does not access user memory
   1753          the stack pointer here can have any value. A legitimate testcase
   1754          that exercises this is none/tests/s390x/stmg.c:
   1755          The stack pointer happens to be in the reservation segment near
   1756          the end of the addressable memory and there is no SkAnonC segment
   1757          above.
   1758 
   1759          So the approximation we're taking here is to extend the stack only
   1760          if the client stack pointer does not look bogus. */
   1761       if (VG_(am_addr_is_in_extensible_client_stack)(stackMin))
   1762          VG_(extend_stack)( tid, stackMin );
   1763    }
   1764 #  endif
   1765    /* END ensure root thread's stack is suitably mapped */
   1766 
   1767    /* First off, get the syscall args and number.  This is a
   1768       platform-dependent action. */
   1769 
   1770    sci = & syscallInfo[tid];
   1771    vg_assert(sci->status.what == SsIdle);
   1772 
   1773    getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
   1774 
   1775    /* Copy .orig_args to .args.  The pre-handler may modify .args, but
   1776       we want to keep the originals too, just in case. */
   1777    sci->args = sci->orig_args;
   1778 
   1779    /* Save the syscall number in the thread state in case the syscall
   1780       is interrupted by a signal. */
   1781    sysno = sci->orig_args.sysno;
   1782 
   1783    /* It's sometimes useful, as a crude debugging hack, to get a
   1784       stack trace at each (or selected) syscalls. */
   1785    if (0 && sysno == __NR_ioctl) {
   1786       VG_(umsg)("\nioctl:\n");
   1787       VG_(get_and_pp_StackTrace)(tid, 10);
   1788       VG_(umsg)("\n");
   1789    }
   1790 
   1791 #  if defined(VGO_darwin)
   1792    /* Record syscall class.  But why?  Because the syscall might be
   1793       interrupted by a signal, and in the signal handler (which will
   1794       be m_signals.async_signalhandler) we will need to build a SysRes
   1795       reflecting the syscall return result.  In order to do that we
   1796       need to know the syscall class.  Hence stash it in the guest
   1797       state of this thread.  This madness is not needed on Linux
   1798       because it only has a single syscall return convention and so
   1799       there is no ambiguity involved in converting the post-signal
   1800       machine state into a SysRes. */
   1801    tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
   1802 #  endif
   1803 
   1804    /* The default what-to-do-next thing is hand the syscall to the
   1805       kernel, so we pre-set that here.  Set .sres to something
   1806       harmless looking (is irrelevant because .what is not
   1807       SsComplete.) */
   1808    sci->status.what = SsHandToKernel;
   1809    sci->status.sres = VG_(mk_SysRes_Error)(0);
   1810    sci->flags       = 0;
   1811 
   1812    /* Fetch the syscall's handlers.  If no handlers exist for this
   1813       syscall, we are given dummy handlers which force an immediate
   1814       return with ENOSYS. */
   1815    ent = get_syscall_entry(sysno);
   1816 
   1817    /* Fetch the layout information, which tells us where in the guest
   1818       state the syscall args reside.  This is a platform-dependent
   1819       action.  This info is needed so that the scalar syscall argument
   1820       checks (PRE_REG_READ calls) know which bits of the guest state
   1821       they need to inspect. */
   1822    getSyscallArgLayout( &layout );
   1823 
   1824    /* Make sure the tmp signal mask matches the real signal mask;
   1825       sigsuspend may change this. */
   1826    vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
   1827 
   1828    /* Right, we're finally ready to Party.  Call the pre-handler and
   1829       see what we get back.  At this point:
   1830 
   1831         sci->status.what  is Unset (we don't know yet).
   1832         sci->orig_args    contains the original args.
   1833         sci->args         is the same as sci->orig_args.
   1834         sci->flags        is zero.
   1835    */
   1836 
   1837    PRINT("SYSCALL[%d,%u](%s) ",
   1838       VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
   1839 
   1840    /* Do any pre-syscall actions */
   1841    if (VG_(needs).syscall_wrapper) {
   1842       UWord tmpv[8];
   1843       tmpv[0] = sci->orig_args.arg1;
   1844       tmpv[1] = sci->orig_args.arg2;
   1845       tmpv[2] = sci->orig_args.arg3;
   1846       tmpv[3] = sci->orig_args.arg4;
   1847       tmpv[4] = sci->orig_args.arg5;
   1848       tmpv[5] = sci->orig_args.arg6;
   1849       tmpv[6] = sci->orig_args.arg7;
   1850       tmpv[7] = sci->orig_args.arg8;
   1851       VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
   1852                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
   1853    }
   1854 
   1855    vg_assert(ent);
   1856    vg_assert(ent->before);
   1857    (ent->before)( tid,
   1858                   &layout,
   1859                   &sci->args, &sci->status, &sci->flags );
   1860 
   1861    /* If needed, gdbserver will report syscall entry to GDB */
   1862    VG_(gdbserver_report_syscall)(True, sysno, tid);
   1863 
   1864    /* The pre-handler may have modified:
   1865          sci->args
   1866          sci->status
   1867          sci->flags
   1868       All else remains unchanged.
   1869       Although the args may be modified, pre handlers are not allowed
   1870       to change the syscall number.
   1871    */
   1872    /* Now we proceed according to what the pre-handler decided. */
   1873    vg_assert(sci->status.what == SsHandToKernel
   1874              || sci->status.what == SsComplete);
   1875    vg_assert(sci->args.sysno == sci->orig_args.sysno);
   1876 
   1877    if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
   1878       /* The pre-handler completed the syscall itself, declaring
   1879          success. */
   1880       if (sci->flags & SfNoWriteResult) {
   1881          PRINT(" --> [pre-success] NoWriteResult");
   1882       } else {
   1883          PRINT(" --> [pre-success] %s", VG_(sr_as_string)(sci->status.sres));
   1884       }
   1885       /* In this case the allowable flags are to ask for a signal-poll
   1886          and/or a yield after the call.  Changing the args isn't
   1887          allowed. */
   1888       vg_assert(0 == (sci->flags
   1889                       & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
   1890       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1891    }
   1892 
   1893    else
   1894    if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
   1895       /* The pre-handler decided to fail syscall itself. */
   1896       PRINT(" --> [pre-fail] %s", VG_(sr_as_string)(sci->status.sres));
   1897       /* In this case, the pre-handler is also allowed to ask for the
   1898          post-handler to be run anyway.  Changing the args is not
   1899          allowed. */
   1900       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
   1901       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1902    }
   1903 
   1904    else
   1905    if (sci->status.what != SsHandToKernel) {
   1906       /* huh?! */
   1907       vg_assert(0);
   1908    }
   1909 
   1910    else /* (sci->status.what == HandToKernel) */ {
   1911       /* Ok, this is the usual case -- and the complicated one.  There
   1912          are two subcases: sync and async.  async is the general case
   1913          and is to be used when there is any possibility that the
   1914          syscall might block [a fact that the pre-handler must tell us
   1915          via the sci->flags field.]  Because the tidying-away /
   1916          context-switch overhead of the async case could be large, if
   1917          we are sure that the syscall will not block, we fast-track it
   1918          by doing it directly in this thread, which is a lot
   1919          simpler. */
   1920 
   1921       /* Check that the given flags are allowable: MayBlock, PollAfter
   1922          and PostOnFail are ok. */
   1923       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
   1924 
   1925       if (sci->flags & SfMayBlock) {
   1926 
   1927          /* Syscall may block, so run it asynchronously */
   1928          vki_sigset_t mask;
   1929 
   1930          PRINT(" --> [async] ... \n");
   1931 
   1932          mask = tst->sig_mask;
   1933          VG_(sanitize_client_sigmask)(&mask);
   1934 
   1935          /* Gack.  More impedance matching.  Copy the possibly
   1936             modified syscall args back into the guest state. */
   1937          /* JRS 2009-Mar-16: if the syscall args are possibly modified,
   1938             then this assertion is senseless:
   1939               vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1940             The case that exposed it was sys_posix_spawn on Darwin,
   1941             which heavily modifies its arguments but then lets the call
   1942             go through anyway, with SfToBlock set, hence we end up here. */
   1943          putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
   1944 
   1945          /* SfNoWriteResult flag is invalid for blocking signals because
   1946             do_syscall_for_client() directly modifies the guest state. */
   1947          vg_assert(!(sci->flags & SfNoWriteResult));
   1948 
   1949          /* Drop the bigLock */
   1950          VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
   1951          /* Urr.  We're now in a race against other threads trying to
   1952             acquire the bigLock.  I guess that doesn't matter provided
   1953             that do_syscall_for_client only touches thread-local
   1954             state. */
   1955 
   1956          /* Do the call, which operates directly on the guest state,
   1957             not on our abstracted copies of the args/result. */
   1958          do_syscall_for_client(sysno, tst, &mask);
   1959 
   1960          /* do_syscall_for_client may not return if the syscall was
   1961             interrupted by a signal.  In that case, flow of control is
   1962             first to m_signals.async_sighandler, which calls
   1963             VG_(fixup_guest_state_after_syscall_interrupted), which
   1964             fixes up the guest state, and possibly calls
   1965             VG_(post_syscall).  Once that's done, control drops back
   1966             to the scheduler.  */
   1967 
   1968          /* Darwin: do_syscall_for_client may not return if the
   1969             syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
   1970             responded by starting the thread at wqthread_hijack(reuse=1)
   1971             (to run another workqueue item). In that case, wqthread_hijack
   1972             calls ML_(wqthread_continue), which is similar to
   1973             VG_(fixup_guest_state_after_syscall_interrupted). */
   1974 
   1975          /* Reacquire the lock */
   1976          VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
   1977 
   1978          /* Even more impedance matching.  Extract the syscall status
   1979             from the guest state. */
   1980          getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
   1981          vg_assert(sci->status.what == SsComplete);
   1982 
   1983          /* Be decorative, if required. */
   1984          if (VG_(clo_trace_syscalls)) {
   1985             PRINT("SYSCALL[%d,%u](%s) ... [async] --> %s",
   1986                   VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
   1987                   VG_(sr_as_string)(sci->status.sres));
   1988          }
   1989 
   1990       } else {
   1991 
   1992          /* run the syscall directly */
   1993          /* The pre-handler may have modified the syscall args, but
   1994             since we're passing values in ->args directly to the
   1995             kernel, there's no point in flushing them back to the
   1996             guest state.  Indeed doing so could be construed as
   1997             incorrect. */
   1998          SysRes sres
   1999             = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
   2000                                      sci->args.arg3, sci->args.arg4,
   2001                                      sci->args.arg5, sci->args.arg6,
   2002                                      sci->args.arg7, sci->args.arg8 );
   2003          sci->status = convert_SysRes_to_SyscallStatus(sres);
   2004 
   2005          /* Be decorative, if required. */
   2006          if (VG_(clo_trace_syscalls)) {
   2007            PRINT("[sync] --> %s", VG_(sr_as_string)(sci->status.sres));
   2008          }
   2009       }
   2010    }
   2011 
   2012    vg_assert(sci->status.what == SsComplete);
   2013 
   2014    vg_assert(VG_(is_running_thread)(tid));
   2015 
   2016    /* Dump the syscall result back in the guest state.  This is
   2017       a platform-specific action. */
   2018    if (!(sci->flags & SfNoWriteResult))
   2019       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
   2020 
   2021    /* If needed, gdbserver will report syscall return to GDB */
   2022    VG_(gdbserver_report_syscall)(False, sysno, tid);
   2023 
   2024    /* Situation now:
   2025       - the guest state is now correctly modified following the syscall
   2026       - modified args, original args and syscall status are still
   2027         available in the syscallInfo[] entry for this syscall.
   2028 
   2029       Now go on to do the post-syscall actions (read on down ..)
   2030    */
   2031    PRINT(" ");
   2032    VG_(post_syscall)(tid);
   2033    PRINT("\n");
   2034 }
   2035 
   2036 
   2037 /* Perform post syscall actions.  The expected state on entry is
   2038    precisely as at the end of VG_(client_syscall), that is:
   2039 
   2040    - guest state up to date following the syscall
   2041    - modified args, original args and syscall status are still
   2042      available in the syscallInfo[] entry for this syscall.
   2043    - syscall status matches what's in the guest state.
   2044 
   2045    There are two ways to get here: the normal way -- being called by
   2046    VG_(client_syscall), and the unusual way, from
   2047    VG_(fixup_guest_state_after_syscall_interrupted).
   2048    Darwin: there's a third way, ML_(wqthread_continue).
   2049 */
   2050 void VG_(post_syscall) (ThreadId tid)
   2051 {
   2052    SyscallInfo*             sci;
   2053    const SyscallTableEntry* ent;
   2054    SyscallStatus            test_status;
   2055    ThreadState*             tst;
   2056    Word sysno;
   2057 
   2058    /* Preliminaries */
   2059    vg_assert(VG_(is_valid_tid)(tid));
   2060    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   2061    vg_assert(VG_(is_running_thread)(tid));
   2062 
   2063    tst = VG_(get_ThreadState)(tid);
   2064    sci = & syscallInfo[tid];
   2065 
   2066    /* m_signals.sigvgkill_handler might call here even when not in
   2067       a syscall. */
   2068    if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
   2069       sci->status.what = SsIdle;
   2070       return;
   2071    }
   2072 
   2073    /* Validate current syscallInfo entry.  In particular we require
   2074       that the current .status matches what's actually in the guest
   2075       state.  At least in the normal case where we have actually
   2076       previously written the result into the guest state. */
   2077    vg_assert(sci->status.what == SsComplete);
   2078 
   2079    /* Get the system call number.  Because the pre-handler isn't
   2080       allowed to mess with it, it should be the same for both the
   2081       original and potentially-modified args. */
   2082    vg_assert(sci->args.sysno == sci->orig_args.sysno);
   2083    sysno = sci->args.sysno;
   2084 
   2085    getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
   2086    if (!(sci->flags & SfNoWriteResult))
   2087       vg_assert(eq_SyscallStatus( sysno, &sci->status, &test_status ));
   2088    /* Failure of the above assertion on Darwin can indicate a problem
   2089       in the syscall wrappers that pre-fail or pre-succeed the
   2090       syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
   2091       when they really should call SET_STATUS_from_SysRes.  The former
   2092       create a UNIX-class syscall result on Darwin, which may not be
   2093       correct for the syscall; if that's the case then this assertion
   2094       fires.  See PRE(thread_fast_set_cthread_self) for an example.  On
   2095       non-Darwin platforms this assertion is should never fail, and this
   2096       comment is completely irrelevant. */
   2097    /* Ok, looks sane */
   2098 
   2099    /* pre: status == Complete (asserted above) */
   2100    /* Consider either success or failure.  Now run the post handler if:
   2101       - it exists, and
   2102       - Success or (Failure and PostOnFail is set)
   2103    */
   2104    ent = get_syscall_entry(sysno);
   2105    if (ent->after
   2106        && ((!sr_isError(sci->status.sres))
   2107            || (sr_isError(sci->status.sres)
   2108                && (sci->flags & SfPostOnFail) ))) {
   2109 
   2110       (ent->after)( tid, &sci->args, &sci->status );
   2111    }
   2112 
   2113    /* Because the post handler might have changed the status (eg, the
   2114       post-handler for sys_open can change the result from success to
   2115       failure if the kernel supplied a fd that it doesn't like), once
   2116       again dump the syscall result back in the guest state.*/
   2117    if (!(sci->flags & SfNoWriteResult))
   2118       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
   2119 
   2120    /* Do any post-syscall actions required by the tool. */
   2121    if (VG_(needs).syscall_wrapper) {
   2122       UWord tmpv[8];
   2123       tmpv[0] = sci->orig_args.arg1;
   2124       tmpv[1] = sci->orig_args.arg2;
   2125       tmpv[2] = sci->orig_args.arg3;
   2126       tmpv[3] = sci->orig_args.arg4;
   2127       tmpv[4] = sci->orig_args.arg5;
   2128       tmpv[5] = sci->orig_args.arg6;
   2129       tmpv[6] = sci->orig_args.arg7;
   2130       tmpv[7] = sci->orig_args.arg8;
   2131       VG_TDICT_CALL(tool_post_syscall, tid,
   2132                     sysno,
   2133                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
   2134                     sci->status.sres);
   2135    }
   2136 
   2137    /* The syscall is done. */
   2138    vg_assert(sci->status.what == SsComplete);
   2139    sci->status.what = SsIdle;
   2140 
   2141    /* The pre/post wrappers may have concluded that pending signals
   2142       might have been created, and will have set SfPollAfter to
   2143       request a poll for them once the syscall is done. */
   2144    if (sci->flags & SfPollAfter)
   2145       VG_(poll_signals)(tid);
   2146 
   2147    /* Similarly, the wrappers might have asked for a yield
   2148       afterwards. */
   2149    if (sci->flags & SfYieldAfter)
   2150       VG_(vg_yield)();
   2151 }
   2152 
   2153 
   2154 /* ---------------------------------------------------------------------
   2155    Dealing with syscalls which get interrupted by a signal:
   2156    VG_(fixup_guest_state_after_syscall_interrupted)
   2157    ------------------------------------------------------------------ */
   2158 
   2159 /* Syscalls done on behalf of the client are finally handed off to the
   2160    kernel in VG_(client_syscall) above, either by calling
   2161    do_syscall_for_client (the async case), or by calling
   2162    VG_(do_syscall6) (the sync case).
   2163 
   2164    If the syscall is not interrupted by a signal (it may block and
   2165    later unblock, but that's irrelevant here) then those functions
   2166    eventually return and so control is passed to VG_(post_syscall).
   2167    NB: not sure if the sync case can actually get interrupted, as it
   2168    operates with all signals masked.
   2169 
   2170    However, the syscall may get interrupted by an async-signal.  In
   2171    that case do_syscall_for_client/VG_(do_syscall6) do not
   2172    return.  Instead we wind up in m_signals.async_sighandler.  We need
   2173    to fix up the guest state to make it look like the syscall was
   2174    interrupted for guest.  So async_sighandler calls here, and this
   2175    does the fixup.  Note that from here we wind up calling
   2176    VG_(post_syscall) too.
   2177 */
   2178 
   2179 
   2180 /* These are addresses within ML_(do_syscall_for_client_WRK).  See
   2181    syscall-$PLAT.S for details.
   2182 */
   2183 #if defined(VGO_linux)
   2184   extern const Addr ML_(blksys_setup);
   2185   extern const Addr ML_(blksys_restart);
   2186   extern const Addr ML_(blksys_complete);
   2187   extern const Addr ML_(blksys_committed);
   2188   extern const Addr ML_(blksys_finished);
   2189 #elif defined(VGO_darwin)
   2190   /* Darwin requires extra uglyness */
   2191   extern const Addr ML_(blksys_setup_MACH);
   2192   extern const Addr ML_(blksys_restart_MACH);
   2193   extern const Addr ML_(blksys_complete_MACH);
   2194   extern const Addr ML_(blksys_committed_MACH);
   2195   extern const Addr ML_(blksys_finished_MACH);
   2196   extern const Addr ML_(blksys_setup_MDEP);
   2197   extern const Addr ML_(blksys_restart_MDEP);
   2198   extern const Addr ML_(blksys_complete_MDEP);
   2199   extern const Addr ML_(blksys_committed_MDEP);
   2200   extern const Addr ML_(blksys_finished_MDEP);
   2201   extern const Addr ML_(blksys_setup_UNIX);
   2202   extern const Addr ML_(blksys_restart_UNIX);
   2203   extern const Addr ML_(blksys_complete_UNIX);
   2204   extern const Addr ML_(blksys_committed_UNIX);
   2205   extern const Addr ML_(blksys_finished_UNIX);
   2206 #elif defined(VGO_solaris)
   2207   extern const Addr ML_(blksys_setup);
   2208   extern const Addr ML_(blksys_complete);
   2209   extern const Addr ML_(blksys_committed);
   2210   extern const Addr ML_(blksys_finished);
   2211   extern const Addr ML_(blksys_setup_DRET);
   2212   extern const Addr ML_(blksys_complete_DRET);
   2213   extern const Addr ML_(blksys_committed_DRET);
   2214   extern const Addr ML_(blksys_finished_DRET);
   2215 #else
   2216 # error "Unknown OS"
   2217 #endif
   2218 
   2219 
   2220 /* Back up guest state to restart a system call. */
   2221 
   2222 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
   2223 {
   2224 #if defined(VGP_x86_linux)
   2225    arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
   2226 
   2227    /* Make sure our caller is actually sane, and we're really backing
   2228       back over a syscall.
   2229 
   2230       int $0x80 == CD 80
   2231    */
   2232    {
   2233       UChar *p = (UChar *)arch->vex.guest_EIP;
   2234 
   2235       if (p[0] != 0xcd || p[1] != 0x80)
   2236          VG_(message)(Vg_DebugMsg,
   2237                       "?! restarting over syscall at %#x %02x %02x\n",
   2238                       arch->vex.guest_EIP, p[0], p[1]);
   2239 
   2240       vg_assert(p[0] == 0xcd && p[1] == 0x80);
   2241    }
   2242 
   2243 #elif defined(VGP_amd64_linux)
   2244    arch->vex.guest_RIP -= 2;             // sizeof(syscall)
   2245 
   2246    /* Make sure our caller is actually sane, and we're really backing
   2247       back over a syscall.
   2248 
   2249       syscall == 0F 05
   2250    */
   2251    {
   2252       UChar *p = (UChar *)arch->vex.guest_RIP;
   2253 
   2254       if (p[0] != 0x0F || p[1] != 0x05)
   2255          VG_(message)(Vg_DebugMsg,
   2256                       "?! restarting over syscall at %#llx %02x %02x\n",
   2257                       arch->vex.guest_RIP, p[0], p[1]);
   2258 
   2259       vg_assert(p[0] == 0x0F && p[1] == 0x05);
   2260    }
   2261 
   2262 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux)
   2263    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
   2264 
   2265    /* Make sure our caller is actually sane, and we're really backing
   2266       back over a syscall.
   2267 
   2268       sc == 44 00 00 02
   2269    */
   2270    {
   2271       UChar *p = (UChar *)arch->vex.guest_CIA;
   2272 
   2273       if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
   2274          VG_(message)(Vg_DebugMsg,
   2275                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
   2276                       (ULong)arch->vex.guest_CIA, p[0], p[1], p[2], p[3]);
   2277 
   2278       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
   2279    }
   2280 
   2281 #elif defined(VGP_ppc64le_linux)
   2282    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
   2283 
   2284    /* Make sure our caller is actually sane, and we're really backing
   2285       back over a syscall.
   2286 
   2287       sc == 44 00 00 02
   2288    */
   2289    {
   2290       UChar *p = (UChar *)arch->vex.guest_CIA;
   2291 
   2292       if (p[3] != 0x44 || p[2] != 0x0 || p[1] != 0x0 || p[0] != 0x02)
   2293          VG_(message)(Vg_DebugMsg,
   2294                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
   2295                       arch->vex.guest_CIA, p[3], p[2], p[1], p[0]);
   2296 
   2297       vg_assert(p[3] == 0x44 && p[2] == 0x0 && p[1] == 0x0 && p[0] == 0x2);
   2298    }
   2299 
   2300 #elif defined(VGP_arm_linux)
   2301    if (arch->vex.guest_R15T & 1) {
   2302       // Thumb mode.  SVC is a encoded as
   2303       //   1101 1111 imm8
   2304       // where imm8 is the SVC number, and we only accept 0.
   2305       arch->vex.guest_R15T -= 2;   // sizeof(thumb 16 bit insn)
   2306       UChar* p     = (UChar*)(arch->vex.guest_R15T - 1);
   2307       Bool   valid = p[0] == 0 && p[1] == 0xDF;
   2308       if (!valid) {
   2309          VG_(message)(Vg_DebugMsg,
   2310                       "?! restarting over (Thumb) syscall that is not syscall "
   2311                       "at %#x %02x %02x\n",
   2312                       arch->vex.guest_R15T - 1, p[0], p[1]);
   2313       }
   2314       vg_assert(valid);
   2315       // FIXME: NOTE, this really isn't right.  We need to back up
   2316       // ITSTATE to what it was before the SVC instruction, but we
   2317       // don't know what it was.  At least assert that it is now
   2318       // zero, because if it is nonzero then it must also have
   2319       // been nonzero for the SVC itself, which means it was
   2320       // conditional.  Urk.
   2321       vg_assert(arch->vex.guest_ITSTATE == 0);
   2322    } else {
   2323       // ARM mode.  SVC is encoded as
   2324       //   cond 1111 imm24
   2325       // where imm24 is the SVC number, and we only accept 0.
   2326       arch->vex.guest_R15T -= 4;   // sizeof(arm instr)
   2327       UChar* p     = (UChar*)arch->vex.guest_R15T;
   2328       Bool   valid = p[0] == 0 && p[1] == 0 && p[2] == 0
   2329                      && (p[3] & 0xF) == 0xF;
   2330       if (!valid) {
   2331          VG_(message)(Vg_DebugMsg,
   2332                       "?! restarting over (ARM) syscall that is not syscall "
   2333                       "at %#x %02x %02x %02x %02x\n",
   2334                       arch->vex.guest_R15T, p[0], p[1], p[2], p[3]);
   2335       }
   2336       vg_assert(valid);
   2337    }
   2338 
   2339 #elif defined(VGP_arm64_linux)
   2340    arch->vex.guest_PC -= 4;             // sizeof(arm64 instr)
   2341 
   2342    /* Make sure our caller is actually sane, and we're really backing
   2343       back over a syscall.
   2344 
   2345       svc #0 == d4 00 00 01
   2346    */
   2347    {
   2348       UChar *p = (UChar *)arch->vex.guest_PC;
   2349 
   2350       if (p[0] != 0x01 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0xD4)
   2351          VG_(message)(
   2352             Vg_DebugMsg,
   2353             "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
   2354             arch->vex.guest_PC, p[0], p[1], p[2], p[3]
   2355           );
   2356 
   2357       vg_assert(p[0] == 0x01 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0xD4);
   2358    }
   2359 
   2360 #elif defined(VGP_x86_darwin)
   2361    arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
   2362 
   2363    /* Make sure our caller is actually sane, and we're really backing
   2364       back over a syscall.
   2365 
   2366       int $0x80 == CD 80  // Used to communicate with BSD syscalls
   2367       int $0x81 == CD 81  // Used to communicate with Mach traps
   2368       int $0x82 == CD 82  // Used to communicate with "thread" ?
   2369       sysenter  == 0F 34  // Used to communicate with Unix syscalls
   2370    */
   2371    {
   2372        UChar *p = (UChar *)arch->vex.guest_EIP;
   2373        Bool  ok = (p[0] == 0xCD && p[1] == 0x80)
   2374                   || (p[0] == 0xCD && p[1] == 0x81)
   2375                   || (p[0] == 0xCD && p[1] == 0x82)
   2376                   || (p[0] == 0x0F && p[1] == 0x34);
   2377        if (!ok)
   2378            VG_(message)(Vg_DebugMsg,
   2379                         "?! restarting over syscall at %#x %02x %02x\n",
   2380                         arch->vex.guest_EIP, p[0], p[1]);
   2381        vg_assert(ok);
   2382    }
   2383 
   2384 #elif defined(VGP_amd64_darwin)
   2385    arch->vex.guest_RIP = arch->vex.guest_IP_AT_SYSCALL;
   2386 
   2387    /* Make sure our caller is actually sane, and we're really backing
   2388       back over a syscall.
   2389 
   2390       syscall   == 0F 05
   2391    */
   2392    {
   2393        UChar *p = (UChar *)arch->vex.guest_RIP;
   2394 
   2395        Bool  ok = (p[0] == 0x0F && p[1] == 0x05);
   2396        if (!ok)
   2397            VG_(message)(Vg_DebugMsg,
   2398                         "?! restarting over syscall at %#llx %02x %02x\n",
   2399                         arch->vex.guest_RIP, p[0], p[1]);
   2400        vg_assert(ok);
   2401    }
   2402 
   2403 #elif defined(VGP_s390x_linux)
   2404    arch->vex.guest_IA -= 2;             // sizeof(syscall)
   2405 
   2406    /* Make sure our caller is actually sane, and we're really backing
   2407       back over a syscall.
   2408 
   2409       syscall == 0A <num>
   2410    */
   2411    {
   2412       UChar *p = (UChar *)arch->vex.guest_IA;
   2413       if (p[0] != 0x0A)
   2414          VG_(message)(Vg_DebugMsg,
   2415                       "?! restarting over syscall at %#llx %02x %02x\n",
   2416                       arch->vex.guest_IA, p[0], p[1]);
   2417 
   2418       vg_assert(p[0] == 0x0A);
   2419    }
   2420 
   2421 #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
   2422 
   2423    arch->vex.guest_PC -= 4;             // sizeof(mips instr)
   2424 
   2425    /* Make sure our caller is actually sane, and we're really backing
   2426       back over a syscall.
   2427 
   2428       syscall == 00 00 00 0C
   2429       big endian
   2430       syscall == 0C 00 00 00
   2431    */
   2432    {
   2433       UChar *p = (UChar *)(arch->vex.guest_PC);
   2434 #     if defined (VG_LITTLEENDIAN)
   2435       if (p[0] != 0x0c || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00)
   2436          VG_(message)(Vg_DebugMsg,
   2437                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
   2438                       (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
   2439 
   2440       vg_assert(p[0] == 0x0c && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00);
   2441 #     elif defined (VG_BIGENDIAN)
   2442       if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x0c)
   2443          VG_(message)(Vg_DebugMsg,
   2444                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
   2445                       (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
   2446 
   2447       vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x0c);
   2448 #     else
   2449 #        error "Unknown endianness"
   2450 #     endif
   2451    }
   2452 
   2453 #elif defined(VGP_x86_solaris)
   2454    arch->vex.guest_EIP -= 2;   // sizeof(int $0x91) or sizeof(syscall)
   2455 
   2456    /* Make sure our caller is actually sane, and we're really backing
   2457       back over a syscall.
   2458 
   2459       int $0x91 == CD 91
   2460       syscall   == 0F 05
   2461       sysenter  == 0F 34
   2462 
   2463       Handle also other syscall instructions because we also handle them in
   2464       the scheduler.
   2465       int $0x80 == CD 80
   2466       int $0x81 == CD 81
   2467       int $0x82 == CD 82
   2468    */
   2469    {
   2470       UChar *p = (UChar *)arch->vex.guest_EIP;
   2471 
   2472       Bool  ok = (p[0] == 0xCD && p[1] == 0x91)
   2473                   || (p[0] == 0x0F && p[1] == 0x05)
   2474                   || (p[0] == 0x0F && p[1] == 0x34)
   2475                   || (p[0] == 0xCD && p[1] == 0x80)
   2476                   || (p[0] == 0xCD && p[1] == 0x81)
   2477                   || (p[0] == 0xCD && p[1] == 0x82);
   2478       if (!ok)
   2479          VG_(message)(Vg_DebugMsg,
   2480                       "?! restarting over syscall at %#x %02x %02x\n",
   2481                       arch->vex.guest_EIP, p[0], p[1]);
   2482       vg_assert(ok);
   2483    }
   2484 
   2485 #elif defined(VGP_amd64_solaris)
   2486    arch->vex.guest_RIP -= 2;   // sizeof(syscall)
   2487 
   2488    /* Make sure our caller is actually sane, and we're really backing
   2489       back over a syscall.
   2490 
   2491       syscall   == 0F 05
   2492    */
   2493    {
   2494       UChar *p = (UChar *)arch->vex.guest_RIP;
   2495 
   2496       Bool  ok = (p[0] == 0x0F && p[1] == 0x05);
   2497       if (!ok)
   2498          VG_(message)(Vg_DebugMsg,
   2499                       "?! restarting over syscall at %#llx %02x %02x\n",
   2500                       arch->vex.guest_RIP, p[0], p[1]);
   2501       vg_assert(ok);
   2502    }
   2503 
   2504 #else
   2505 #  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
   2506 #endif
   2507 }
   2508 
   2509 
   2510 /*
   2511    Fix up the guest state when a syscall is interrupted by a signal
   2512    and so has been forced to return 'sysret'.
   2513 
   2514    To do this, we determine the precise state of the syscall by
   2515    looking at the (real) IP at the time the signal happened.  The
   2516    syscall sequence looks like:
   2517 
   2518      1. unblock signals
   2519      2. perform syscall
   2520      3. save result to guest state (EAX, RAX, R3+CR0.SO, R0, V0)
   2521      4. re-block signals
   2522 
   2523    If a signal
   2524    happens at      Then     Why?
   2525    [1-2)           restart  nothing has happened (restart syscall)
   2526    [2]             restart  syscall hasn't started, or kernel wants to restart
   2527    [2-3)           save     syscall complete, but results not saved
   2528    [3-4)           syscall complete, results saved
   2529 
   2530    Sometimes we never want to restart an interrupted syscall (because
   2531    sigaction says not to), so we only restart if "restart" is True.
   2532 
   2533    This will also call VG_(post_syscall) if the syscall has actually
   2534    completed (either because it was interrupted, or because it
   2535    actually finished).  It will not call VG_(post_syscall) if the
   2536    syscall is set up for restart, which means that the pre-wrapper may
   2537    get called multiple times.
   2538 */
   2539 
   2540 void
   2541 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
   2542                                                   Addr     ip,
   2543                                                   SysRes   sres,
   2544                                                   Bool     restart,
   2545                                                   struct vki_ucontext *uc)
   2546 {
   2547    /* Note that we don't know the syscall number here, since (1) in
   2548       general there's no reliable way to get hold of it short of
   2549       stashing it in the guest state before the syscall, and (2) in
   2550       any case we don't need to know it for the actions done by this
   2551       routine.
   2552 
   2553       Furthermore, 'sres' is only used in the case where the syscall
   2554       is complete, but the result has not been committed to the guest
   2555       state yet.  In any other situation it will be meaningless and
   2556       therefore ignored. */
   2557 
   2558    ThreadState*     tst;
   2559    SyscallStatus    canonical;
   2560    ThreadArchState* th_regs;
   2561    SyscallInfo*     sci;
   2562 
   2563    /* Compute some Booleans indicating which range we're in. */
   2564    Bool outside_range,
   2565         in_setup_to_restart,      // [1,2) in the .S files
   2566         at_restart,               // [2]   in the .S files
   2567         in_complete_to_committed, // [3,4) in the .S files
   2568         in_committed_to_finished; // [4,5) in the .S files
   2569 
   2570    if (VG_(clo_trace_signals))
   2571       VG_(message)( Vg_DebugMsg,
   2572                     "interrupted_syscall: tid=%u, ip=%#lx, "
   2573                     "restart=%s, sres.isErr=%s, sres.val=%lu\n",
   2574                     tid,
   2575                     ip,
   2576                     restart ? "True" : "False",
   2577                     sr_isError(sres) ? "True" : "False",
   2578                     sr_isError(sres) ? sr_Err(sres) : sr_Res(sres));
   2579 
   2580    vg_assert(VG_(is_valid_tid)(tid));
   2581    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   2582    vg_assert(VG_(is_running_thread)(tid));
   2583 
   2584    tst     = VG_(get_ThreadState)(tid);
   2585    th_regs = &tst->arch;
   2586    sci     = & syscallInfo[tid];
   2587 
   2588 #  if defined(VGO_linux)
   2589    outside_range
   2590       = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
   2591    in_setup_to_restart
   2592       = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
   2593    at_restart
   2594       = ip == ML_(blksys_restart);
   2595    in_complete_to_committed
   2596       = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
   2597    in_committed_to_finished
   2598       = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
   2599 #  elif defined(VGO_darwin)
   2600    outside_range
   2601       =  (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
   2602       && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
   2603       && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
   2604    in_setup_to_restart
   2605       =  (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
   2606       || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
   2607       || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
   2608    at_restart
   2609       =  (ip == ML_(blksys_restart_MACH))
   2610       || (ip == ML_(blksys_restart_MDEP))
   2611       || (ip == ML_(blksys_restart_UNIX));
   2612    in_complete_to_committed
   2613       =  (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
   2614       || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
   2615       || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
   2616    in_committed_to_finished
   2617       =  (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
   2618       || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
   2619       || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
   2620    /* Wasn't that just So Much Fun?  Does your head hurt yet?  Mine does. */
   2621 #  elif defined(VGO_solaris)
   2622    /* The solaris port is never outside the range. */
   2623    outside_range = False;
   2624    /* The Solaris kernel never restarts syscalls directly! */
   2625    at_restart = False;
   2626    if (tst->os_state.in_door_return) {
   2627       vg_assert(ip >= ML_(blksys_setup_DRET)
   2628                 && ip < ML_(blksys_finished_DRET));
   2629 
   2630       in_setup_to_restart
   2631          = ip >= ML_(blksys_setup_DRET) && ip < ML_(blksys_complete_DRET);
   2632       in_complete_to_committed
   2633          = ip >= ML_(blksys_complete_DRET) && ip < ML_(blksys_committed_DRET);
   2634       in_committed_to_finished
   2635          = ip >= ML_(blksys_committed_DRET) && ip < ML_(blksys_finished_DRET);
   2636    }
   2637    else {
   2638       vg_assert(ip >= ML_(blksys_setup) && ip < ML_(blksys_finished));
   2639 
   2640       in_setup_to_restart
   2641          = ip >= ML_(blksys_setup) && ip < ML_(blksys_complete);
   2642       in_complete_to_committed
   2643          = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
   2644       in_committed_to_finished
   2645          = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
   2646    }
   2647 #  else
   2648 #    error "Unknown OS"
   2649 #  endif
   2650 
   2651    /* Figure out what the state of the syscall was by examining the
   2652       (real) IP at the time of the signal, and act accordingly. */
   2653    if (outside_range) {
   2654       if (VG_(clo_trace_signals))
   2655          VG_(message)( Vg_DebugMsg,
   2656                        "  not in syscall at all: hmm, very suspicious\n" );
   2657       /* Looks like we weren't in a syscall at all.  Hmm. */
   2658       vg_assert(sci->status.what != SsIdle);
   2659       return;
   2660    }
   2661 
   2662    /* We should not be here unless this thread had first started up
   2663       the machinery for a syscall by calling VG_(client_syscall).
   2664       Hence: */
   2665    vg_assert(sci->status.what != SsIdle);
   2666 
   2667    /* now, do one of four fixup actions, depending on where the IP has
   2668       got to. */
   2669 
   2670    if (in_setup_to_restart) {
   2671       /* syscall hasn't even started; go around again */
   2672       if (VG_(clo_trace_signals))
   2673          VG_(message)( Vg_DebugMsg, "  not started: restarting\n");
   2674       vg_assert(sci->status.what == SsHandToKernel);
   2675       ML_(fixup_guest_state_to_restart_syscall)(th_regs);
   2676    }
   2677 
   2678    else
   2679    if (at_restart) {
   2680 #     if defined(VGO_solaris)
   2681       /* We should never hit this branch on Solaris, see the comment above. */
   2682       vg_assert(0);
   2683 #     endif
   2684 
   2685       /* We're either about to run the syscall, or it was interrupted
   2686          and the kernel restarted it.  Restart if asked, otherwise
   2687          EINTR it. */
   2688       if (restart) {
   2689          if (VG_(clo_trace_signals))
   2690             VG_(message)( Vg_DebugMsg, "  at syscall instr: restarting\n");
   2691          ML_(fixup_guest_state_to_restart_syscall)(th_regs);
   2692       } else {
   2693          if (VG_(clo_trace_signals))
   2694             VG_(message)( Vg_DebugMsg, "  at syscall instr: returning EINTR\n");
   2695          canonical = convert_SysRes_to_SyscallStatus(
   2696                         VG_(mk_SysRes_Error)( VKI_EINTR )
   2697                      );
   2698          if (!(sci->flags & SfNoWriteResult))
   2699             putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
   2700          sci->status = canonical;
   2701          VG_(post_syscall)(tid);
   2702       }
   2703    }
   2704 
   2705    else
   2706    if (in_complete_to_committed) {
   2707       /* Syscall complete, but result hasn't been written back yet.
   2708          Write the SysRes we were supplied with back to the guest
   2709          state. */
   2710       if (VG_(clo_trace_signals))
   2711          VG_(message)( Vg_DebugMsg,
   2712                        "  completed, but uncommitted: committing\n");
   2713       canonical = convert_SysRes_to_SyscallStatus( sres );
   2714       vg_assert(!(sci->flags & SfNoWriteResult));
   2715       putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
   2716 #     if defined(VGO_solaris)
   2717       if (tst->os_state.in_door_return) {
   2718 #        if defined(VGP_x86_solaris)
   2719          /* Registers %esp and %ebp were also modified by the syscall. */
   2720          tst->arch.vex.guest_ESP = uc->uc_mcontext.gregs[VKI_UESP];
   2721          tst->arch.vex.guest_EBP = uc->uc_mcontext.gregs[VKI_EBP];
   2722 #        elif defined(VGP_amd64_solaris)
   2723          tst->arch.vex.guest_RSP = uc->uc_mcontext.gregs[VKI_REG_RSP];
   2724          tst->arch.vex.guest_RBP = uc->uc_mcontext.gregs[VKI_REG_RBP];
   2725 #        endif
   2726       }
   2727 #     endif
   2728       sci->status = canonical;
   2729       VG_(post_syscall)(tid);
   2730    }
   2731 
   2732    else
   2733    if (in_committed_to_finished) {
   2734       /* Result committed, but the signal mask has not been restored;
   2735          we expect our caller (the signal handler) will have fixed
   2736          this up. */
   2737       if (VG_(clo_trace_signals))
   2738          VG_(message)( Vg_DebugMsg,
   2739                        "  completed and committed: nothing to do\n");
   2740 #     if defined(VGP_x86_solaris)
   2741       /* The %eax and %edx values are committed but the carry flag is still
   2742          uncommitted.  Save it now. */
   2743       LibVEX_GuestX86_put_eflag_c(sr_isError(sres), &th_regs->vex);
   2744 #     elif defined(VGP_amd64_solaris)
   2745       LibVEX_GuestAMD64_put_rflag_c(sr_isError(sres), &th_regs->vex);
   2746 #     endif
   2747       getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
   2748       vg_assert(sci->status.what == SsComplete);
   2749       VG_(post_syscall)(tid);
   2750    }
   2751 
   2752    else
   2753       VG_(core_panic)("?? strange syscall interrupt state?");
   2754 
   2755    /* In all cases, the syscall is now finished (even if we called
   2756       ML_(fixup_guest_state_to_restart_syscall), since that just
   2757       re-positions the guest's IP for another go at it).  So we need
   2758       to record that fact. */
   2759    sci->status.what = SsIdle;
   2760 }
   2761 
   2762 
   2763 #if defined(VGO_solaris)
   2764 /* Returns True if ip is inside a fixable syscall code in syscall-*-*.S.  This
   2765    function can be called by a 'non-running' thread! */
   2766 Bool VG_(is_ip_in_blocking_syscall)(ThreadId tid, Addr ip)
   2767 {
   2768    ThreadState *tst = VG_(get_ThreadState)(tid);
   2769 
   2770    if (tst->os_state.in_door_return)
   2771       return ip >= ML_(blksys_setup_DRET) && ip < ML_(blksys_finished_DRET);
   2772    else
   2773       return ip >= ML_(blksys_setup) && ip < ML_(blksys_finished);
   2774 }
   2775 #endif
   2776 
   2777 
   2778 #if defined(VGO_darwin)
   2779 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
   2780 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
   2781 // This longjmps back to the scheduler.
   2782 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
   2783 {
   2784    ThreadState*     tst;
   2785    SyscallInfo*     sci;
   2786 
   2787    VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
   2788 
   2789    PRINT("SYSCALL[%d,%u](%s) workq_ops() starting new workqueue item\n",
   2790          VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
   2791 
   2792    vg_assert(VG_(is_valid_tid)(tid));
   2793    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   2794    vg_assert(VG_(is_running_thread)(tid));
   2795 
   2796    tst     = VG_(get_ThreadState)(tid);
   2797    sci     = & syscallInfo[tid];
   2798    vg_assert(sci->status.what != SsIdle);
   2799    vg_assert(tst->os_state.wq_jmpbuf_valid);  // check this BEFORE post_syscall
   2800 
   2801    // Pretend the syscall completed normally, but don't touch the thread state.
   2802    sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
   2803    sci->flags |= SfNoWriteResult;
   2804    VG_(post_syscall)(tid);
   2805 
   2806    ML_(sync_mappings)("in", "ML_(wqthread_continue_NORETURN)", 0);
   2807 
   2808    sci->status.what = SsIdle;
   2809 
   2810    vg_assert(tst->sched_jmpbuf_valid);
   2811    VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
   2812 
   2813    /* NOTREACHED */
   2814    vg_assert(0);
   2815 }
   2816 #endif
   2817 
   2818 
   2819 /* ---------------------------------------------------------------------
   2820    A place to store the where-to-call-when-really-done pointer
   2821    ------------------------------------------------------------------ */
   2822 
   2823 // When the final thread is done, where shall I call to shutdown the
   2824 // system cleanly?  Is set once at startup (in m_main) and never
   2825 // changes after that.  Is basically a pointer to the exit
   2826 // continuation.  This is all just a nasty hack to avoid calling
   2827 // directly from m_syswrap to m_main at exit, since that would cause
   2828 // m_main to become part of a module cycle, which is silly.
   2829 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
   2830        (ThreadId,VgSchedReturnCode)
   2831    = NULL;
   2832 
   2833 /*--------------------------------------------------------------------*/
   2834 /*--- end                                                          ---*/
   2835 /*--------------------------------------------------------------------*/
   2836