Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Handle system calls.                          syswrap-main.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2012 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "libvex_guest_offsets.h"
     32 #include "libvex_trc_values.h"
     33 #include "pub_core_basics.h"
     34 #include "pub_core_aspacemgr.h"
     35 #include "pub_core_vki.h"
     36 #include "pub_core_vkiscnums.h"
     37 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
     38 #include "pub_core_threadstate.h"
     39 #include "pub_core_libcbase.h"
     40 #include "pub_core_libcassert.h"
     41 #include "pub_core_libcprint.h"
     42 #include "pub_core_libcproc.h"      // For VG_(getpid)()
     43 #include "pub_core_libcsignal.h"
     44 #include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
     45                                     //   and VG_(vg_yield)
     46 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     47 #include "pub_core_tooliface.h"
     48 #include "pub_core_options.h"
     49 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
     50 #include "pub_core_syscall.h"
     51 #include "pub_core_machine.h"
     52 #include "pub_core_syswrap.h"
     53 
     54 #include "priv_types_n_macros.h"
     55 #include "priv_syswrap-main.h"
     56 
     57 #if defined(VGO_darwin)
     58 #include "priv_syswrap-darwin.h"
     59 #endif
     60 
     61 /* Useful info which needs to be recorded somewhere:
     62    Use of registers in syscalls is:
     63 
     64           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
     65    LINUX:
     66    x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
     67    amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
     68    ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
     69    ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
     70    arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
     71    mips   v0    a0   a1   a2   a3 stack stack n/a  n/a  v0        (== NUM)
     72 
     73    On s390x the svc instruction is used for system calls. The system call
     74    number is encoded in the instruction (8 bit immediate field). Since Linux
     75    2.6 it is also allowed to use svc 0 with the system call number in r1.
     76    This was introduced for system calls >255, but works for all. It is
     77    also possible to see the svc 0 together with an EXecute instruction, that
     78    fills in the immediate field.
     79    s390x r1/SVC r2   r3   r4   r5   r6   r7   n/a  n/a  r2        (== ARG1)
     80 
     81    DARWIN:
     82    x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
     83    amd64  rax rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
     84 
     85    For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
     86    amd64-darwin.  Apparently 0(%esp) is some kind of return address
     87    (perhaps for syscalls done with "sysenter"?)  I don't think it is
     88    relevant for syscalls done with "int $0x80/1/2".
     89 */
     90 
     91 /* This is the top level of the system-call handler module.  All
     92    system calls are channelled through here, doing two things:
     93 
     94    * notify the tool of the events (mem/reg reads, writes) happening
     95 
     96    * perform the syscall, usually by passing it along to the kernel
     97      unmodified.
     98 
     99    A magical piece of assembly code, do_syscall_for_client_WRK, in
    100    syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
    101    kernel, whilst having the simulator retain control.
    102 */
    103 
    104 /* The main function is VG_(client_syscall).  The simulation calls it
    105    whenever a client thread wants to do a syscall.  The following is a
    106    sketch of what it does.
    107 
    108    * Ensures the root thread's stack is suitably mapped.  Tedious and
    109      arcane.  See big big comment in VG_(client_syscall).
    110 
    111    * First, it rounds up the syscall number and args (which is a
    112      platform dependent activity) and puts them in a struct ("args")
    113      and also a copy in "orig_args".
    114 
    115      The pre/post wrappers refer to these structs and so no longer
    116      need magic macros to access any specific registers.  This struct
    117      is stored in thread-specific storage.
    118 
    119 
    120    * The pre-wrapper is called, passing it a pointer to struct
    121      "args".
    122 
    123 
    124    * The pre-wrapper examines the args and pokes the tool
    125      appropriately.  It may modify the args; this is why "orig_args"
    126      is also stored.
    127 
    128      The pre-wrapper may choose to 'do' the syscall itself, and
    129      concludes one of three outcomes:
    130 
    131        Success(N)    -- syscall is already complete, with success;
    132                         result is N
    133 
    134        Fail(N)       -- syscall is already complete, with failure;
    135                         error code is N
    136 
    137        HandToKernel  -- (the usual case): this needs to be given to
    138                         the kernel to be done, using the values in
    139                         the possibly-modified "args" struct.
    140 
    141      In addition, the pre-wrapper may set some flags:
    142 
    143        MayBlock   -- only applicable when outcome==HandToKernel
    144 
    145        PostOnFail -- only applicable when outcome==HandToKernel or Fail
    146 
    147 
    148    * If the pre-outcome is HandToKernel, the syscall is duly handed
    149      off to the kernel (perhaps involving some thread switchery, but
    150      that's not important).  This reduces the possible set of outcomes
    151      to either Success(N) or Fail(N).
    152 
    153 
    154    * The outcome (Success(N) or Fail(N)) is written back to the guest
    155      register(s).  This is platform specific:
    156 
    157      x86:    Success(N) ==>  eax = N
    158              Fail(N)    ==>  eax = -N
    159 
    160      ditto amd64
    161 
    162      ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
    163              Fail(N) ==>     r3 = N, CR0.SO = 1
    164 
    165      Darwin:
    166      x86:    Success(N) ==>  edx:eax = N, cc = 0
    167              Fail(N)    ==>  edx:eax = N, cc = 1
    168 
    169      s390x:  Success(N) ==>  r2 = N
    170              Fail(N)    ==>  r2 = -N
    171 
    172    * The post wrapper is called if:
    173 
    174      - it exists, and
    175      - outcome==Success or (outcome==Fail and PostOnFail is set)
    176 
    177      The post wrapper is passed the adulterated syscall args (struct
    178      "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
    179 
    180    There are several other complications, primarily to do with
    181    syscalls getting interrupted, explained in comments in the code.
    182 */
    183 
    184 /* CAVEATS for writing wrappers.  It is important to follow these!
    185 
    186    The macros defined in priv_types_n_macros.h are designed to help
    187    decouple the wrapper logic from the actual representation of
    188    syscall args/results, since these wrappers are designed to work on
    189    multiple platforms.
    190 
    191    Sometimes a PRE wrapper will complete the syscall itself, without
    192    handing it to the kernel.  It will use one of SET_STATUS_Success,
    193    SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
    194    value.  It is critical to appreciate that use of the macro does not
    195    immediately cause the underlying guest state to be updated -- that
    196    is done by the driver logic in this file, when the wrapper returns.
    197 
    198    As a result, PRE wrappers of the following form will malfunction:
    199 
    200    PRE(fooble)
    201    {
    202       ... do stuff ...
    203       SET_STATUS_Somehow(...)
    204 
    205       // do something that assumes guest state is up to date
    206    }
    207 
    208    In particular, direct or indirect calls to VG_(poll_signals) after
    209    setting STATUS can cause the guest state to be read (in order to
    210    build signal frames).  Do not do this.  If you want a signal poll
    211    after the syscall goes through, do "*flags |= SfPollAfter" and the
    212    driver logic will do it for you.
    213 
    214    -----------
    215 
    216    Another critical requirement following introduction of new address
    217    space manager (JRS, 20050923):
    218 
    219    In a situation where the mappedness of memory has changed, aspacem
    220    should be notified BEFORE the tool.  Hence the following is
    221    correct:
    222 
    223       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
    224       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
    225       if (d)
    226          VG_(discard_translations)(s->start, s->end+1 - s->start);
    227 
    228    whilst this is wrong:
    229 
    230       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
    231       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
    232       if (d)
    233          VG_(discard_translations)(s->start, s->end+1 - s->start);
    234 
    235    The reason is that the tool may itself ask aspacem for more shadow
    236    memory as a result of the VG_TRACK call.  In such a situation it is
    237    critical that aspacem's segment array is up to date -- hence the
    238    need to notify aspacem first.
    239 
    240    -----------
    241 
    242    Also .. take care to call VG_(discard_translations) whenever
    243    memory with execute permissions is unmapped.
    244 */
    245 
    246 
    247 /* ---------------------------------------------------------------------
    248    Do potentially blocking syscall for the client, and mess with
    249    signal masks at the same time.
    250    ------------------------------------------------------------------ */
    251 
    252 /* Perform a syscall on behalf of a client thread, using a specific
    253    signal mask.  On completion, the signal mask is set to restore_mask
    254    (which presumably blocks almost everything).  If a signal happens
    255    during the syscall, the handler should call
    256    VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
    257    thread's context to do the right thing.
    258 
    259    The _WRK function is handwritten assembly, implemented per-platform
    260    in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
    261    properties.  See comments at the top of
    262    VG_(fixup_guest_state_after_syscall_interrupted) below for details.
    263 
    264    This function (these functions) are required to return zero in case
    265    of success (even if the syscall itself failed), and nonzero if the
    266    sigprocmask-swizzling calls failed.  We don't actually care about
    267    the failure values from sigprocmask, although most of the assembly
    268    implementations do attempt to return that, using the convention
    269    0 for success, or 0x8000 | error-code for failure.
    270 */
    271 #if defined(VGO_linux)
    272 extern
    273 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
    274                                       void* guest_state,
    275                                       const vki_sigset_t *syscall_mask,
    276                                       const vki_sigset_t *restore_mask,
    277                                       Word sigsetSzB );
    278 #elif defined(VGO_darwin)
    279 extern
    280 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
    281                                            void* guest_state,
    282                                            const vki_sigset_t *syscall_mask,
    283                                            const vki_sigset_t *restore_mask,
    284                                            Word sigsetSzB ); /* unused */
    285 extern
    286 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
    287                                            void* guest_state,
    288                                            const vki_sigset_t *syscall_mask,
    289                                            const vki_sigset_t *restore_mask,
    290                                            Word sigsetSzB ); /* unused */
    291 extern
    292 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
    293                                            void* guest_state,
    294                                            const vki_sigset_t *syscall_mask,
    295                                            const vki_sigset_t *restore_mask,
    296                                            Word sigsetSzB ); /* unused */
    297 #else
    298 #  error "Unknown OS"
    299 #endif
    300 
    301 
    302 static
    303 void do_syscall_for_client ( Int syscallno,
    304                              ThreadState* tst,
    305                              const vki_sigset_t* syscall_mask )
    306 {
    307    vki_sigset_t saved;
    308    UWord err;
    309 #  if defined(VGO_linux)
    310    err = ML_(do_syscall_for_client_WRK)(
    311             syscallno, &tst->arch.vex,
    312             syscall_mask, &saved, sizeof(vki_sigset_t)
    313          );
    314 #  elif defined(VGO_darwin)
    315    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
    316       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    317          err = ML_(do_syscall_for_client_unix_WRK)(
    318                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    319                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    320                );
    321          break;
    322       case VG_DARWIN_SYSCALL_CLASS_MACH:
    323          err = ML_(do_syscall_for_client_mach_WRK)(
    324                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    325                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    326                );
    327          break;
    328       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    329          err = ML_(do_syscall_for_client_mdep_WRK)(
    330                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
    331                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
    332                );
    333          break;
    334       default:
    335          vg_assert(0);
    336          /*NOTREACHED*/
    337          break;
    338    }
    339 #  else
    340 #    error "Unknown OS"
    341 #  endif
    342    vg_assert2(
    343       err == 0,
    344       "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
    345       (Int)(err & 0xFFF)
    346    );
    347 }
    348 
    349 
    350 /* ---------------------------------------------------------------------
    351    Impedance matchers and misc helpers
    352    ------------------------------------------------------------------ */
    353 
    354 static
    355 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
    356 {
    357    return a1->sysno == a2->sysno
    358           && a1->arg1 == a2->arg1
    359           && a1->arg2 == a2->arg2
    360           && a1->arg3 == a2->arg3
    361           && a1->arg4 == a2->arg4
    362           && a1->arg5 == a2->arg5
    363           && a1->arg6 == a2->arg6
    364           && a1->arg7 == a2->arg7
    365           && a1->arg8 == a2->arg8;
    366 }
    367 
    368 static
    369 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
    370 {
    371    /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
    372    if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres ))
    373       return True;
    374 #  if defined(VGO_darwin)
    375    /* Darwin-specific debugging guff */
    376    vg_assert(s1->what == s2->what);
    377    VG_(printf)("eq_SyscallStatus:\n");
    378    VG_(printf)("  {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
    379    VG_(printf)("  {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
    380    vg_assert(0);
    381 #  endif
    382    return False;
    383 }
    384 
    385 /* Convert between SysRes and SyscallStatus, to the extent possible. */
    386 
    387 static
    388 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
    389 {
    390    SyscallStatus status;
    391    status.what = SsComplete;
    392    status.sres = res;
    393    return status;
    394 }
    395 
    396 
    397 /* Impedance matchers.  These convert syscall arg or result data from
    398    the platform-specific in-guest-state format to the canonical
    399    formats, and back. */
    400 
    401 static
    402 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
    403                                     /*IN*/ VexGuestArchState* gst_vanilla,
    404                                     /*IN*/ UInt trc )
    405 {
    406 #if defined(VGP_x86_linux)
    407    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    408    canonical->sysno = gst->guest_EAX;
    409    canonical->arg1  = gst->guest_EBX;
    410    canonical->arg2  = gst->guest_ECX;
    411    canonical->arg3  = gst->guest_EDX;
    412    canonical->arg4  = gst->guest_ESI;
    413    canonical->arg5  = gst->guest_EDI;
    414    canonical->arg6  = gst->guest_EBP;
    415    canonical->arg7  = 0;
    416    canonical->arg8  = 0;
    417 
    418 #elif defined(VGP_amd64_linux)
    419    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    420    canonical->sysno = gst->guest_RAX;
    421    canonical->arg1  = gst->guest_RDI;
    422    canonical->arg2  = gst->guest_RSI;
    423    canonical->arg3  = gst->guest_RDX;
    424    canonical->arg4  = gst->guest_R10;
    425    canonical->arg5  = gst->guest_R8;
    426    canonical->arg6  = gst->guest_R9;
    427    canonical->arg7  = 0;
    428    canonical->arg8  = 0;
    429 
    430 #elif defined(VGP_ppc32_linux)
    431    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    432    canonical->sysno = gst->guest_GPR0;
    433    canonical->arg1  = gst->guest_GPR3;
    434    canonical->arg2  = gst->guest_GPR4;
    435    canonical->arg3  = gst->guest_GPR5;
    436    canonical->arg4  = gst->guest_GPR6;
    437    canonical->arg5  = gst->guest_GPR7;
    438    canonical->arg6  = gst->guest_GPR8;
    439    canonical->arg7  = 0;
    440    canonical->arg8  = 0;
    441 
    442 #elif defined(VGP_ppc64_linux)
    443    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    444    canonical->sysno = gst->guest_GPR0;
    445    canonical->arg1  = gst->guest_GPR3;
    446    canonical->arg2  = gst->guest_GPR4;
    447    canonical->arg3  = gst->guest_GPR5;
    448    canonical->arg4  = gst->guest_GPR6;
    449    canonical->arg5  = gst->guest_GPR7;
    450    canonical->arg6  = gst->guest_GPR8;
    451    canonical->arg7  = 0;
    452    canonical->arg8  = 0;
    453 
    454 #elif defined(VGP_arm_linux)
    455    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    456    canonical->sysno = gst->guest_R7;
    457    canonical->arg1  = gst->guest_R0;
    458    canonical->arg2  = gst->guest_R1;
    459    canonical->arg3  = gst->guest_R2;
    460    canonical->arg4  = gst->guest_R3;
    461    canonical->arg5  = gst->guest_R4;
    462    canonical->arg6  = gst->guest_R5;
    463    canonical->arg7  = 0;
    464    canonical->arg8  = 0;
    465 
    466 #elif defined(VGP_mips32_linux)
    467    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
    468    canonical->sysno = gst->guest_r2;    // v0
    469    if (canonical->sysno != __NR_syscall) {
    470       canonical->arg1  = gst->guest_r4;    // a0
    471       canonical->arg2  = gst->guest_r5;    // a1
    472       canonical->arg3  = gst->guest_r6;    // a2
    473       canonical->arg4  = gst->guest_r7;    // a3
    474       canonical->arg5  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
    475       canonical->arg6  = *((UInt*) (gst->guest_r29 + 20));    // 20(sp)
    476       canonical->arg8 = 0;
    477    } else {
    478       // Fixme hack handle syscall()
    479       canonical->sysno = gst->guest_r4;    // a0
    480       canonical->arg1  = gst->guest_r5;    // a1
    481       canonical->arg2  = gst->guest_r6;    // a2
    482       canonical->arg3  = gst->guest_r7;    // a3
    483       canonical->arg4  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
    484       canonical->arg5  = *((UInt*) (gst->guest_r29 + 20));    // 20(guest_SP/sp)
    485       canonical->arg6  = *((UInt*) (gst->guest_r29 + 24));    // 24(guest_SP/sp)
    486       canonical->arg8 = __NR_syscall;
    487    }
    488 
    489 #elif defined(VGP_x86_darwin)
    490    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    491    UWord *stack = (UWord *)gst->guest_ESP;
    492    // GrP fixme hope syscalls aren't called with really shallow stacks...
    493    canonical->sysno = gst->guest_EAX;
    494    if (canonical->sysno != 0) {
    495       // stack[0] is return address
    496       canonical->arg1  = stack[1];
    497       canonical->arg2  = stack[2];
    498       canonical->arg3  = stack[3];
    499       canonical->arg4  = stack[4];
    500       canonical->arg5  = stack[5];
    501       canonical->arg6  = stack[6];
    502       canonical->arg7  = stack[7];
    503       canonical->arg8  = stack[8];
    504    } else {
    505       // GrP fixme hack handle syscall()
    506       // GrP fixme what about __syscall() ?
    507       // stack[0] is return address
    508       // DDD: the tool can't see that the params have been shifted!  Can
    509       //      lead to incorrect checking, I think, because the PRRAn/PSARn
    510       //      macros will mention the pre-shifted args.
    511       canonical->sysno = stack[1];
    512       vg_assert(canonical->sysno != 0);
    513       canonical->arg1  = stack[2];
    514       canonical->arg2  = stack[3];
    515       canonical->arg3  = stack[4];
    516       canonical->arg4  = stack[5];
    517       canonical->arg5  = stack[6];
    518       canonical->arg6  = stack[7];
    519       canonical->arg7  = stack[8];
    520       canonical->arg8  = stack[9];
    521 
    522       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
    523             VG_(getpid)(), /*tid,*/
    524             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
    525    }
    526 
    527    // Here we determine what kind of syscall it was by looking at the
    528    // interrupt kind, and then encode the syscall number using the 64-bit
    529    // encoding for Valgrind's internal use.
    530    //
    531    // DDD: Would it be better to stash the JMP kind into the Darwin
    532    // thread state rather than passing in the trc?
    533    switch (trc) {
    534    case VEX_TRC_JMP_SYS_INT128:
    535       // int $0x80 = Unix, 64-bit result
    536       vg_assert(canonical->sysno >= 0);
    537       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
    538       break;
    539    case VEX_TRC_JMP_SYS_SYSENTER:
    540       // syscall = Unix, 32-bit result
    541       // OR        Mach, 32-bit result
    542       if (canonical->sysno >= 0) {
    543          // GrP fixme hack:  0xffff == I386_SYSCALL_NUMBER_MASK
    544          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
    545                                                              & 0xffff);
    546       } else {
    547          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
    548       }
    549       break;
    550    case VEX_TRC_JMP_SYS_INT129:
    551       // int $0x81 = Mach, 32-bit result
    552       vg_assert(canonical->sysno < 0);
    553       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
    554       break;
    555    case VEX_TRC_JMP_SYS_INT130:
    556       // int $0x82 = mdep, 32-bit result
    557       vg_assert(canonical->sysno >= 0);
    558       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
    559       break;
    560    default:
    561       vg_assert(0);
    562       break;
    563    }
    564 
    565 #elif defined(VGP_amd64_darwin)
    566    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    567    UWord *stack = (UWord *)gst->guest_RSP;
    568 
    569    vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
    570 
    571    // GrP fixme hope syscalls aren't called with really shallow stacks...
    572    canonical->sysno = gst->guest_RAX;
    573    if (canonical->sysno != __NR_syscall) {
    574       // stack[0] is return address
    575       canonical->arg1  = gst->guest_RDI;
    576       canonical->arg2  = gst->guest_RSI;
    577       canonical->arg3  = gst->guest_RDX;
    578       canonical->arg4  = gst->guest_R10;  // not rcx with syscall insn
    579       canonical->arg5  = gst->guest_R8;
    580       canonical->arg6  = gst->guest_R9;
    581       canonical->arg7  = stack[1];
    582       canonical->arg8  = stack[2];
    583    } else {
    584       // GrP fixme hack handle syscall()
    585       // GrP fixme what about __syscall() ?
    586       // stack[0] is return address
    587       // DDD: the tool can't see that the params have been shifted!  Can
    588       //      lead to incorrect checking, I think, because the PRRAn/PSARn
    589       //      macros will mention the pre-shifted args.
    590       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
    591       vg_assert(canonical->sysno != __NR_syscall);
    592       canonical->arg1  = gst->guest_RSI;
    593       canonical->arg2  = gst->guest_RDX;
    594       canonical->arg3  = gst->guest_R10;  // not rcx with syscall insn
    595       canonical->arg4  = gst->guest_R8;
    596       canonical->arg5  = gst->guest_R9;
    597       canonical->arg6  = stack[1];
    598       canonical->arg7  = stack[2];
    599       canonical->arg8  = stack[3];
    600 
    601       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
    602             VG_(getpid)(), /*tid,*/
    603             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
    604    }
    605 
    606    // no canonical->sysno adjustment needed
    607 
    608 #elif defined(VGP_s390x_linux)
    609    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
    610    canonical->sysno = gst->guest_SYSNO;
    611    canonical->arg1  = gst->guest_r2;
    612    canonical->arg2  = gst->guest_r3;
    613    canonical->arg3  = gst->guest_r4;
    614    canonical->arg4  = gst->guest_r5;
    615    canonical->arg5  = gst->guest_r6;
    616    canonical->arg6  = gst->guest_r7;
    617    canonical->arg7  = 0;
    618    canonical->arg8  = 0;
    619 #else
    620 #  error "getSyscallArgsFromGuestState: unknown arch"
    621 #endif
    622 }
    623 
    624 static
    625 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
    626                                     /*OUT*/VexGuestArchState* gst_vanilla )
    627 {
    628 #if defined(VGP_x86_linux)
    629    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    630    gst->guest_EAX = canonical->sysno;
    631    gst->guest_EBX = canonical->arg1;
    632    gst->guest_ECX = canonical->arg2;
    633    gst->guest_EDX = canonical->arg3;
    634    gst->guest_ESI = canonical->arg4;
    635    gst->guest_EDI = canonical->arg5;
    636    gst->guest_EBP = canonical->arg6;
    637 
    638 #elif defined(VGP_amd64_linux)
    639    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    640    gst->guest_RAX = canonical->sysno;
    641    gst->guest_RDI = canonical->arg1;
    642    gst->guest_RSI = canonical->arg2;
    643    gst->guest_RDX = canonical->arg3;
    644    gst->guest_R10 = canonical->arg4;
    645    gst->guest_R8  = canonical->arg5;
    646    gst->guest_R9  = canonical->arg6;
    647 
    648 #elif defined(VGP_ppc32_linux)
    649    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    650    gst->guest_GPR0 = canonical->sysno;
    651    gst->guest_GPR3 = canonical->arg1;
    652    gst->guest_GPR4 = canonical->arg2;
    653    gst->guest_GPR5 = canonical->arg3;
    654    gst->guest_GPR6 = canonical->arg4;
    655    gst->guest_GPR7 = canonical->arg5;
    656    gst->guest_GPR8 = canonical->arg6;
    657 
    658 #elif defined(VGP_ppc64_linux)
    659    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    660    gst->guest_GPR0 = canonical->sysno;
    661    gst->guest_GPR3 = canonical->arg1;
    662    gst->guest_GPR4 = canonical->arg2;
    663    gst->guest_GPR5 = canonical->arg3;
    664    gst->guest_GPR6 = canonical->arg4;
    665    gst->guest_GPR7 = canonical->arg5;
    666    gst->guest_GPR8 = canonical->arg6;
    667 
    668 #elif defined(VGP_arm_linux)
    669    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    670    gst->guest_R7 = canonical->sysno;
    671    gst->guest_R0 = canonical->arg1;
    672    gst->guest_R1 = canonical->arg2;
    673    gst->guest_R2 = canonical->arg3;
    674    gst->guest_R3 = canonical->arg4;
    675    gst->guest_R4 = canonical->arg5;
    676    gst->guest_R5 = canonical->arg6;
    677 
    678 #elif defined(VGP_x86_darwin)
    679    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    680    UWord *stack = (UWord *)gst->guest_ESP;
    681 
    682    gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
    683 
    684    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
    685    // stack[0] is return address
    686    stack[1] = canonical->arg1;
    687    stack[2] = canonical->arg2;
    688    stack[3] = canonical->arg3;
    689    stack[4] = canonical->arg4;
    690    stack[5] = canonical->arg5;
    691    stack[6] = canonical->arg6;
    692    stack[7] = canonical->arg7;
    693    stack[8] = canonical->arg8;
    694 
    695 #elif defined(VGP_amd64_darwin)
    696    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    697    UWord *stack = (UWord *)gst->guest_RSP;
    698 
    699    gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
    700    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
    701 
    702    // stack[0] is return address
    703    gst->guest_RDI = canonical->arg1;
    704    gst->guest_RSI = canonical->arg2;
    705    gst->guest_RDX = canonical->arg3;
    706    gst->guest_RCX = canonical->arg4;
    707    gst->guest_R8  = canonical->arg5;
    708    gst->guest_R9  = canonical->arg6;
    709    stack[1]       = canonical->arg7;
    710    stack[2]       = canonical->arg8;
    711 
    712 #elif defined(VGP_s390x_linux)
    713    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
    714    gst->guest_SYSNO  = canonical->sysno;
    715    gst->guest_r2     = canonical->arg1;
    716    gst->guest_r3     = canonical->arg2;
    717    gst->guest_r4     = canonical->arg3;
    718    gst->guest_r5     = canonical->arg4;
    719    gst->guest_r6     = canonical->arg5;
    720    gst->guest_r7     = canonical->arg6;
    721 
    722 #elif defined(VGP_mips32_linux)
    723    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
    724    if (canonical->arg8 != __NR_syscall) {
    725       gst->guest_r2 = canonical->sysno;
    726       gst->guest_r4 = canonical->arg1;
    727       gst->guest_r5 = canonical->arg2;
    728       gst->guest_r6 = canonical->arg3;
    729       gst->guest_r7 = canonical->arg4;
    730       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5;    // 16(guest_GPR29/sp)
    731       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6;    // 20(sp)
    732    } else {
    733       canonical->arg8 = 0;
    734       gst->guest_r2 = __NR_syscall;
    735       gst->guest_r4 = canonical->sysno;
    736       gst->guest_r5 = canonical->arg1;
    737       gst->guest_r6 = canonical->arg2;
    738       gst->guest_r7 = canonical->arg3;
    739       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4;    // 16(guest_GPR29/sp)
    740       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5;    // 20(sp)
    741       *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6;    // 24(sp)
    742    }
    743 #else
    744 #  error "putSyscallArgsIntoGuestState: unknown arch"
    745 #endif
    746 }
    747 
    748 static
    749 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
    750                                       /*IN*/ VexGuestArchState* gst_vanilla )
    751 {
    752 #  if defined(VGP_x86_linux)
    753    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    754    canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
    755    canonical->what = SsComplete;
    756 
    757 #  elif defined(VGP_amd64_linux)
    758    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    759    canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
    760    canonical->what = SsComplete;
    761 
    762 #  elif defined(VGP_ppc32_linux)
    763    VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
    764    UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
    765    UInt                cr0so = (cr >> 28) & 1;
    766    canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
    767    canonical->what = SsComplete;
    768 
    769 #  elif defined(VGP_ppc64_linux)
    770    VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
    771    UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
    772    UInt                cr0so = (cr >> 28) & 1;
    773    canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
    774    canonical->what = SsComplete;
    775 
    776 #  elif defined(VGP_arm_linux)
    777    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    778    canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
    779    canonical->what = SsComplete;
    780 
    781 #  elif defined(VGP_mips32_linux)
    782    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
    783    UInt                v0 = gst->guest_r2;    // v0
    784    UInt                v1 = gst->guest_r3;    // v1
    785    UInt                a3 = gst->guest_r7;    // a3
    786    canonical->sres = VG_(mk_SysRes_mips32_linux)( v0, v1, a3 );
    787    canonical->what = SsComplete;
    788 
    789 #  elif defined(VGP_x86_darwin)
    790    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
    791    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    792    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
    793    UInt err = 0;
    794    UInt wLO = 0;
    795    UInt wHI = 0;
    796    switch (gst->guest_SC_CLASS) {
    797       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    798          // int $0x80 = Unix, 64-bit result
    799          err = carry;
    800          wLO = gst->guest_EAX;
    801          wHI = gst->guest_EDX;
    802          break;
    803       case VG_DARWIN_SYSCALL_CLASS_MACH:
    804          // int $0x81 = Mach, 32-bit result
    805          wLO = gst->guest_EAX;
    806          break;
    807       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    808          // int $0x82 = mdep, 32-bit result
    809          wLO = gst->guest_EAX;
    810          break;
    811       default:
    812          vg_assert(0);
    813          break;
    814    }
    815    canonical->sres = VG_(mk_SysRes_x86_darwin)(
    816                         gst->guest_SC_CLASS, err ? True : False,
    817                         wHI, wLO
    818                      );
    819    canonical->what = SsComplete;
    820 
    821 #  elif defined(VGP_amd64_darwin)
    822    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
    823    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    824    ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
    825    ULong err = 0;
    826    ULong wLO = 0;
    827    ULong wHI = 0;
    828    switch (gst->guest_SC_CLASS) {
    829       case VG_DARWIN_SYSCALL_CLASS_UNIX:
    830          // syscall = Unix, 128-bit result
    831          err = carry;
    832          wLO = gst->guest_RAX;
    833          wHI = gst->guest_RDX;
    834          break;
    835       case VG_DARWIN_SYSCALL_CLASS_MACH:
    836          // syscall = Mach, 64-bit result
    837          wLO = gst->guest_RAX;
    838          break;
    839       case VG_DARWIN_SYSCALL_CLASS_MDEP:
    840          // syscall = mdep, 64-bit result
    841          wLO = gst->guest_RAX;
    842          break;
    843       default:
    844          vg_assert(0);
    845          break;
    846    }
    847    canonical->sres = VG_(mk_SysRes_amd64_darwin)(
    848                         gst->guest_SC_CLASS, err ? True : False,
    849                         wHI, wLO
    850                      );
    851    canonical->what = SsComplete;
    852 
    853 #  elif defined(VGP_s390x_linux)
    854    VexGuestS390XState* gst   = (VexGuestS390XState*)gst_vanilla;
    855    canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
    856    canonical->what = SsComplete;
    857 
    858 #  else
    859 #    error "getSyscallStatusFromGuestState: unknown arch"
    860 #  endif
    861 }
    862 
    863 static
    864 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
    865                                       /*IN*/ SyscallStatus*     canonical,
    866                                       /*OUT*/VexGuestArchState* gst_vanilla )
    867 {
    868 #  if defined(VGP_x86_linux)
    869    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    870    vg_assert(canonical->what == SsComplete);
    871    if (sr_isError(canonical->sres)) {
    872       /* This isn't exactly right, in that really a Failure with res
    873          not in the range 1 .. 4095 is unrepresentable in the
    874          Linux-x86 scheme.  Oh well. */
    875       gst->guest_EAX = - (Int)sr_Err(canonical->sres);
    876    } else {
    877       gst->guest_EAX = sr_Res(canonical->sres);
    878    }
    879    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    880              OFFSET_x86_EAX, sizeof(UWord) );
    881 
    882 #  elif defined(VGP_amd64_linux)
    883    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    884    vg_assert(canonical->what == SsComplete);
    885    if (sr_isError(canonical->sres)) {
    886       /* This isn't exactly right, in that really a Failure with res
    887          not in the range 1 .. 4095 is unrepresentable in the
    888          Linux-amd64 scheme.  Oh well. */
    889       gst->guest_RAX = - (Long)sr_Err(canonical->sres);
    890    } else {
    891       gst->guest_RAX = sr_Res(canonical->sres);
    892    }
    893    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    894              OFFSET_amd64_RAX, sizeof(UWord) );
    895 
    896 #  elif defined(VGP_ppc32_linux)
    897    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
    898    UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
    899    vg_assert(canonical->what == SsComplete);
    900    if (sr_isError(canonical->sres)) {
    901       /* set CR0.SO */
    902       LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
    903       gst->guest_GPR3 = sr_Err(canonical->sres);
    904    } else {
    905       /* clear CR0.SO */
    906       LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
    907       gst->guest_GPR3 = sr_Res(canonical->sres);
    908    }
    909    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    910              OFFSET_ppc32_GPR3, sizeof(UWord) );
    911    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    912              OFFSET_ppc32_CR0_0, sizeof(UChar) );
    913 
    914 #  elif defined(VGP_ppc64_linux)
    915    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
    916    UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
    917    vg_assert(canonical->what == SsComplete);
    918    if (sr_isError(canonical->sres)) {
    919       /* set CR0.SO */
    920       LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
    921       gst->guest_GPR3 = sr_Err(canonical->sres);
    922    } else {
    923       /* clear CR0.SO */
    924       LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
    925       gst->guest_GPR3 = sr_Res(canonical->sres);
    926    }
    927    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    928              OFFSET_ppc64_GPR3, sizeof(UWord) );
    929    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    930              OFFSET_ppc64_CR0_0, sizeof(UChar) );
    931 
    932 #  elif defined(VGP_arm_linux)
    933    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
    934    vg_assert(canonical->what == SsComplete);
    935    if (sr_isError(canonical->sres)) {
    936       /* This isn't exactly right, in that really a Failure with res
    937          not in the range 1 .. 4095 is unrepresentable in the
    938          Linux-arm scheme.  Oh well. */
    939       gst->guest_R0 = - (Int)sr_Err(canonical->sres);
    940    } else {
    941       gst->guest_R0 = sr_Res(canonical->sres);
    942    }
    943    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    944              OFFSET_arm_R0, sizeof(UWord) );
    945 
    946 #elif defined(VGP_x86_darwin)
    947    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    948    SysRes sres = canonical->sres;
    949    vg_assert(canonical->what == SsComplete);
    950    /* Unfortunately here we have to break abstraction and look
    951       directly inside 'res', in order to decide what to do. */
    952    switch (sres._mode) {
    953       case SysRes_MACH: // int $0x81 = Mach, 32-bit result
    954       case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
    955          gst->guest_EAX = sres._wLO;
    956          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    957                    OFFSET_x86_EAX, sizeof(UInt) );
    958          break;
    959       case SysRes_UNIX_OK:  // int $0x80 = Unix, 64-bit result
    960       case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
    961          gst->guest_EAX = sres._wLO;
    962          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    963                    OFFSET_x86_EAX, sizeof(UInt) );
    964          gst->guest_EDX = sres._wHI;
    965          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    966                    OFFSET_x86_EDX, sizeof(UInt) );
    967          LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
    968                                       gst );
    969          // GrP fixme sets defined for entire eflags, not just bit c
    970          // DDD: this breaks exp-ptrcheck.
    971          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    972                    offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
    973          break;
    974       default:
    975          vg_assert(0);
    976          break;
    977    }
    978 
    979 #elif defined(VGP_amd64_darwin)
    980    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
    981    SysRes sres = canonical->sres;
    982    vg_assert(canonical->what == SsComplete);
    983    /* Unfortunately here we have to break abstraction and look
    984       directly inside 'res', in order to decide what to do. */
    985    switch (sres._mode) {
    986       case SysRes_MACH: // syscall = Mach, 64-bit result
    987       case SysRes_MDEP: // syscall = mdep, 64-bit result
    988          gst->guest_RAX = sres._wLO;
    989          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    990                    OFFSET_amd64_RAX, sizeof(ULong) );
    991          break;
    992       case SysRes_UNIX_OK:  // syscall = Unix, 128-bit result
    993       case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
    994          gst->guest_RAX = sres._wLO;
    995          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    996                    OFFSET_amd64_RAX, sizeof(ULong) );
    997          gst->guest_RDX = sres._wHI;
    998          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
    999                    OFFSET_amd64_RDX, sizeof(ULong) );
   1000          LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
   1001                                         gst );
   1002          // GrP fixme sets defined for entire rflags, not just bit c
   1003          // DDD: this breaks exp-ptrcheck.
   1004          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1005                    offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
   1006          break;
   1007       default:
   1008          vg_assert(0);
   1009          break;
   1010    }
   1011 
   1012 #  elif defined(VGP_s390x_linux)
   1013    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
   1014    vg_assert(canonical->what == SsComplete);
   1015    if (sr_isError(canonical->sres)) {
   1016       gst->guest_r2 = - (Long)sr_Err(canonical->sres);
   1017    } else {
   1018       gst->guest_r2 = sr_Res(canonical->sres);
   1019    }
   1020 
   1021 #  elif defined(VGP_mips32_linux)
   1022    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
   1023    vg_assert(canonical->what == SsComplete);
   1024    if (sr_isError(canonical->sres)) {
   1025       gst->guest_r2 = (Int)sr_Err(canonical->sres);
   1026       gst->guest_r7 = (Int)sr_Err(canonical->sres);
   1027    } else {
   1028       gst->guest_r2 = sr_Res(canonical->sres);
   1029       gst->guest_r3 = sr_ResEx(canonical->sres);
   1030       gst->guest_r7 = (Int)sr_Err(canonical->sres);
   1031    }
   1032    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1033              OFFSET_mips32_r2, sizeof(UWord) );
   1034    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1035              OFFSET_mips32_r3, sizeof(UWord) );
   1036    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
   1037              OFFSET_mips32_r7, sizeof(UWord) );
   1038 
   1039 #  else
   1040 #    error "putSyscallStatusIntoGuestState: unknown arch"
   1041 #  endif
   1042 }
   1043 
   1044 
   1045 /* Tell me the offsets in the guest state of the syscall params, so
   1046    that the scalar argument checkers don't have to have this info
   1047    hardwired. */
   1048 
   1049 static
   1050 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
   1051 {
   1052 #if defined(VGP_x86_linux)
   1053    layout->o_sysno  = OFFSET_x86_EAX;
   1054    layout->o_arg1   = OFFSET_x86_EBX;
   1055    layout->o_arg2   = OFFSET_x86_ECX;
   1056    layout->o_arg3   = OFFSET_x86_EDX;
   1057    layout->o_arg4   = OFFSET_x86_ESI;
   1058    layout->o_arg5   = OFFSET_x86_EDI;
   1059    layout->o_arg6   = OFFSET_x86_EBP;
   1060    layout->uu_arg7  = -1; /* impossible value */
   1061    layout->uu_arg8  = -1; /* impossible value */
   1062 
   1063 #elif defined(VGP_amd64_linux)
   1064    layout->o_sysno  = OFFSET_amd64_RAX;
   1065    layout->o_arg1   = OFFSET_amd64_RDI;
   1066    layout->o_arg2   = OFFSET_amd64_RSI;
   1067    layout->o_arg3   = OFFSET_amd64_RDX;
   1068    layout->o_arg4   = OFFSET_amd64_R10;
   1069    layout->o_arg5   = OFFSET_amd64_R8;
   1070    layout->o_arg6   = OFFSET_amd64_R9;
   1071    layout->uu_arg7  = -1; /* impossible value */
   1072    layout->uu_arg8  = -1; /* impossible value */
   1073 
   1074 #elif defined(VGP_ppc32_linux)
   1075    layout->o_sysno  = OFFSET_ppc32_GPR0;
   1076    layout->o_arg1   = OFFSET_ppc32_GPR3;
   1077    layout->o_arg2   = OFFSET_ppc32_GPR4;
   1078    layout->o_arg3   = OFFSET_ppc32_GPR5;
   1079    layout->o_arg4   = OFFSET_ppc32_GPR6;
   1080    layout->o_arg5   = OFFSET_ppc32_GPR7;
   1081    layout->o_arg6   = OFFSET_ppc32_GPR8;
   1082    layout->uu_arg7  = -1; /* impossible value */
   1083    layout->uu_arg8  = -1; /* impossible value */
   1084 
   1085 #elif defined(VGP_ppc64_linux)
   1086    layout->o_sysno  = OFFSET_ppc64_GPR0;
   1087    layout->o_arg1   = OFFSET_ppc64_GPR3;
   1088    layout->o_arg2   = OFFSET_ppc64_GPR4;
   1089    layout->o_arg3   = OFFSET_ppc64_GPR5;
   1090    layout->o_arg4   = OFFSET_ppc64_GPR6;
   1091    layout->o_arg5   = OFFSET_ppc64_GPR7;
   1092    layout->o_arg6   = OFFSET_ppc64_GPR8;
   1093    layout->uu_arg7  = -1; /* impossible value */
   1094    layout->uu_arg8  = -1; /* impossible value */
   1095 
   1096 #elif defined(VGP_arm_linux)
   1097    layout->o_sysno  = OFFSET_arm_R7;
   1098    layout->o_arg1   = OFFSET_arm_R0;
   1099    layout->o_arg2   = OFFSET_arm_R1;
   1100    layout->o_arg3   = OFFSET_arm_R2;
   1101    layout->o_arg4   = OFFSET_arm_R3;
   1102    layout->o_arg5   = OFFSET_arm_R4;
   1103    layout->o_arg6   = OFFSET_arm_R5;
   1104    layout->uu_arg7  = -1; /* impossible value */
   1105    layout->uu_arg8  = -1; /* impossible value */
   1106 
   1107 #elif defined(VGP_mips32_linux)
   1108    layout->o_sysno  = OFFSET_mips32_r2;
   1109    layout->o_arg1   = OFFSET_mips32_r4;
   1110    layout->o_arg2   = OFFSET_mips32_r5;
   1111    layout->o_arg3   = OFFSET_mips32_r6;
   1112    layout->o_arg4   = OFFSET_mips32_r7;
   1113    layout->s_arg5   = sizeof(UWord) * 4;
   1114    layout->s_arg6   = sizeof(UWord) * 5;
   1115    layout->uu_arg7  = -1; /* impossible value */
   1116    layout->uu_arg8  = -1; /* impossible value */
   1117 
   1118 #elif defined(VGP_x86_darwin)
   1119    layout->o_sysno  = OFFSET_x86_EAX;
   1120    // syscall parameters are on stack in C convention
   1121    layout->s_arg1   = sizeof(UWord) * 1;
   1122    layout->s_arg2   = sizeof(UWord) * 2;
   1123    layout->s_arg3   = sizeof(UWord) * 3;
   1124    layout->s_arg4   = sizeof(UWord) * 4;
   1125    layout->s_arg5   = sizeof(UWord) * 5;
   1126    layout->s_arg6   = sizeof(UWord) * 6;
   1127    layout->s_arg7   = sizeof(UWord) * 7;
   1128    layout->s_arg8   = sizeof(UWord) * 8;
   1129 
   1130 #elif defined(VGP_amd64_darwin)
   1131    layout->o_sysno  = OFFSET_amd64_RAX;
   1132    layout->o_arg1   = OFFSET_amd64_RDI;
   1133    layout->o_arg2   = OFFSET_amd64_RSI;
   1134    layout->o_arg3   = OFFSET_amd64_RDX;
   1135    layout->o_arg4   = OFFSET_amd64_RCX;
   1136    layout->o_arg5   = OFFSET_amd64_R8;
   1137    layout->o_arg6   = OFFSET_amd64_R9;
   1138    layout->s_arg7   = sizeof(UWord) * 1;
   1139    layout->s_arg8   = sizeof(UWord) * 2;
   1140 
   1141 #elif defined(VGP_s390x_linux)
   1142    layout->o_sysno  = OFFSET_s390x_SYSNO;
   1143    layout->o_arg1   = OFFSET_s390x_r2;
   1144    layout->o_arg2   = OFFSET_s390x_r3;
   1145    layout->o_arg3   = OFFSET_s390x_r4;
   1146    layout->o_arg4   = OFFSET_s390x_r5;
   1147    layout->o_arg5   = OFFSET_s390x_r6;
   1148    layout->o_arg6   = OFFSET_s390x_r7;
   1149    layout->uu_arg7  = -1; /* impossible value */
   1150    layout->uu_arg8  = -1; /* impossible value */
   1151 #else
   1152 #  error "getSyscallLayout: unknown arch"
   1153 #endif
   1154 }
   1155 
   1156 
   1157 /* ---------------------------------------------------------------------
   1158    The main driver logic
   1159    ------------------------------------------------------------------ */
   1160 
   1161 /* Finding the handlers for a given syscall, or faking up one
   1162    when no handler is found. */
   1163 
   1164 static
   1165 void bad_before ( ThreadId              tid,
   1166                   SyscallArgLayout*     layout,
   1167                   /*MOD*/SyscallArgs*   args,
   1168                   /*OUT*/SyscallStatus* status,
   1169                   /*OUT*/UWord*         flags )
   1170 {
   1171    VG_(dmsg)("WARNING: unhandled syscall: %s\n",
   1172       VG_SYSNUM_STRING_EXTRA(args->sysno));
   1173    if (VG_(clo_verbosity) > 1) {
   1174       VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1175    }
   1176    VG_(dmsg)("You may be able to write your own handler.\n");
   1177    VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
   1178    VG_(dmsg)("Nevertheless we consider this a bug.  Please report\n");
   1179    VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
   1180 
   1181    SET_STATUS_Failure(VKI_ENOSYS);
   1182 }
   1183 
   1184 static SyscallTableEntry bad_sys =
   1185    { bad_before, NULL };
   1186 
   1187 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
   1188 {
   1189    const SyscallTableEntry* sys = NULL;
   1190 
   1191 #  if defined(VGO_linux)
   1192    sys = ML_(get_linux_syscall_entry)( syscallno );
   1193 
   1194 #  elif defined(VGO_darwin)
   1195    Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
   1196 
   1197    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
   1198    case VG_DARWIN_SYSCALL_CLASS_UNIX:
   1199       if (idx >= 0 && idx < ML_(syscall_table_size) &&
   1200           ML_(syscall_table)[idx].before != NULL)
   1201          sys = &ML_(syscall_table)[idx];
   1202          break;
   1203    case VG_DARWIN_SYSCALL_CLASS_MACH:
   1204       if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
   1205           ML_(mach_trap_table)[idx].before != NULL)
   1206          sys = &ML_(mach_trap_table)[idx];
   1207          break;
   1208    case VG_DARWIN_SYSCALL_CLASS_MDEP:
   1209       if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
   1210           ML_(mdep_trap_table)[idx].before != NULL)
   1211          sys = &ML_(mdep_trap_table)[idx];
   1212          break;
   1213    default:
   1214       vg_assert(0);
   1215       break;
   1216    }
   1217 
   1218 #  else
   1219 #    error Unknown OS
   1220 #  endif
   1221 
   1222    return sys == NULL  ? &bad_sys  : sys;
   1223 }
   1224 
   1225 
   1226 /* Add and remove signals from mask so that we end up telling the
   1227    kernel the state we actually want rather than what the client
   1228    wants. */
   1229 static void sanitize_client_sigmask(vki_sigset_t *mask)
   1230 {
   1231    VG_(sigdelset)(mask, VKI_SIGKILL);
   1232    VG_(sigdelset)(mask, VKI_SIGSTOP);
   1233    VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
   1234 }
   1235 
   1236 typedef
   1237    struct {
   1238       SyscallArgs   orig_args;
   1239       SyscallArgs   args;
   1240       SyscallStatus status;
   1241       UWord         flags;
   1242    }
   1243    SyscallInfo;
   1244 
   1245 SyscallInfo syscallInfo[VG_N_THREADS];
   1246 
   1247 
   1248 /* The scheduler needs to be able to zero out these records after a
   1249    fork, hence this is exported from m_syswrap. */
   1250 void VG_(clear_syscallInfo) ( Int tid )
   1251 {
   1252    vg_assert(tid >= 0 && tid < VG_N_THREADS);
   1253    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
   1254    syscallInfo[tid].status.what = SsIdle;
   1255 }
   1256 
   1257 static void ensure_initialised ( void )
   1258 {
   1259    Int i;
   1260    static Bool init_done = False;
   1261    if (init_done)
   1262       return;
   1263    init_done = True;
   1264    for (i = 0; i < VG_N_THREADS; i++) {
   1265       VG_(clear_syscallInfo)( i );
   1266    }
   1267 }
   1268 
   1269 /* --- This is the main function of this file. --- */
   1270 
   1271 void VG_(client_syscall) ( ThreadId tid, UInt trc )
   1272 {
   1273    Word                     sysno;
   1274    ThreadState*             tst;
   1275    const SyscallTableEntry* ent;
   1276    SyscallArgLayout         layout;
   1277    SyscallInfo*             sci;
   1278 
   1279    ensure_initialised();
   1280 
   1281    vg_assert(VG_(is_valid_tid)(tid));
   1282    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   1283    vg_assert(VG_(is_running_thread)(tid));
   1284 
   1285    tst = VG_(get_ThreadState)(tid);
   1286 
   1287    /* BEGIN ensure root thread's stack is suitably mapped */
   1288    /* In some rare circumstances, we may do the syscall without the
   1289       bottom page of the stack being mapped, because the stack pointer
   1290       was moved down just a few instructions before the syscall
   1291       instruction, and there have been no memory references since
   1292       then, that would cause a call to VG_(extend_stack) to have
   1293       happened.
   1294 
   1295       In native execution that's OK: the kernel automagically extends
   1296       the stack's mapped area down to cover the stack pointer (or sp -
   1297       redzone, really).  In simulated normal execution that's OK too,
   1298       since any signals we get from accessing below the mapped area of
   1299       the (guest's) stack lead us to VG_(extend_stack), where we
   1300       simulate the kernel's stack extension logic.  But that leaves
   1301       the problem of entering a syscall with the SP unmapped.  Because
   1302       the kernel doesn't know that the segment immediately above SP is
   1303       supposed to be a grow-down segment, it causes the syscall to
   1304       fail, and thereby causes a divergence between native behaviour
   1305       (syscall succeeds) and simulated behaviour (syscall fails).
   1306 
   1307       This is quite a rare failure mode.  It has only been seen
   1308       affecting calls to sys_readlink on amd64-linux, and even then it
   1309       requires a certain code sequence around the syscall to trigger
   1310       it.  Here is one:
   1311 
   1312       extern int my_readlink ( const char* path );
   1313       asm(
   1314       ".text\n"
   1315       ".globl my_readlink\n"
   1316       "my_readlink:\n"
   1317       "\tsubq    $0x1008,%rsp\n"
   1318       "\tmovq    %rdi,%rdi\n"              // path is in rdi
   1319       "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
   1320       "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
   1321       "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
   1322       "\tsyscall\n"
   1323       "\taddq    $0x1008,%rsp\n"
   1324       "\tret\n"
   1325       ".previous\n"
   1326       );
   1327 
   1328       For more details, see bug #156404
   1329       (https://bugs.kde.org/show_bug.cgi?id=156404).
   1330 
   1331       The fix is actually very simple.  We simply need to call
   1332       VG_(extend_stack) for this thread, handing it the lowest
   1333       possible valid address for stack (sp - redzone), to ensure the
   1334       pages all the way down to that address, are mapped.  Because
   1335       this is a potentially expensive and frequent operation, we
   1336       filter in two ways:
   1337 
   1338       First, only the main thread (tid=1) has a growdown stack.  So
   1339       ignore all others.  It is conceivable, although highly unlikely,
   1340       that the main thread exits, and later another thread is
   1341       allocated tid=1, but that's harmless, I believe;
   1342       VG_(extend_stack) will do nothing when applied to a non-root
   1343       thread.
   1344 
   1345       Secondly, first call VG_(am_find_nsegment) directly, to see if
   1346       the page holding (sp - redzone) is mapped correctly.  If so, do
   1347       nothing.  This is almost always the case.  VG_(extend_stack)
   1348       calls VG_(am_find_nsegment) twice, so this optimisation -- and
   1349       that's all it is -- more or less halves the number of calls to
   1350       VG_(am_find_nsegment) required.
   1351 
   1352       TODO: the test "seg->kind == SkAnonC" is really inadequate,
   1353       because although it tests whether the segment is mapped
   1354       _somehow_, it doesn't check that it has the right permissions
   1355       (r,w, maybe x) ?  We could test that here, but it will also be
   1356       necessary to fix the corresponding test in VG_(extend_stack).
   1357 
   1358       All this guff is of course Linux-specific.  Hence the ifdef.
   1359    */
   1360 #  if defined(VGO_linux)
   1361    if (tid == 1/*ROOT THREAD*/) {
   1362       Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
   1363       NSegment const* seg = VG_(am_find_nsegment)(stackMin);
   1364       if (seg && seg->kind == SkAnonC) {
   1365          /* stackMin is already mapped.  Nothing to do. */
   1366       } else {
   1367          (void)VG_(extend_stack)( stackMin,
   1368                                   tst->client_stack_szB );
   1369       }
   1370    }
   1371 #  endif
   1372    /* END ensure root thread's stack is suitably mapped */
   1373 
   1374    /* First off, get the syscall args and number.  This is a
   1375       platform-dependent action. */
   1376 
   1377    sci = & syscallInfo[tid];
   1378    vg_assert(sci->status.what == SsIdle);
   1379 
   1380    getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
   1381 
   1382    /* Copy .orig_args to .args.  The pre-handler may modify .args, but
   1383       we want to keep the originals too, just in case. */
   1384    sci->args = sci->orig_args;
   1385 
   1386    /* Save the syscall number in the thread state in case the syscall
   1387       is interrupted by a signal. */
   1388    sysno = sci->orig_args.sysno;
   1389 
   1390    /* It's sometimes useful, as a crude debugging hack, to get a
   1391       stack trace at each (or selected) syscalls. */
   1392    if (0 && sysno == __NR_ioctl) {
   1393       VG_(umsg)("\nioctl:\n");
   1394       VG_(get_and_pp_StackTrace)(tid, 10);
   1395       VG_(umsg)("\n");
   1396    }
   1397 
   1398 #  if defined(VGO_darwin)
   1399    /* Record syscall class.  But why?  Because the syscall might be
   1400       interrupted by a signal, and in the signal handler (which will
   1401       be m_signals.async_signalhandler) we will need to build a SysRes
   1402       reflecting the syscall return result.  In order to do that we
   1403       need to know the syscall class.  Hence stash it in the guest
   1404       state of this thread.  This madness is not needed on Linux
   1405       because it only has a single syscall return convention and so
   1406       there is no ambiguity involved in converting the post-signal
   1407       machine state into a SysRes. */
   1408    tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
   1409 #  endif
   1410 
   1411    /* The default what-to-do-next thing is hand the syscall to the
   1412       kernel, so we pre-set that here.  Set .sres to something
   1413       harmless looking (is irrelevant because .what is not
   1414       SsComplete.) */
   1415    sci->status.what = SsHandToKernel;
   1416    sci->status.sres = VG_(mk_SysRes_Error)(0);
   1417    sci->flags       = 0;
   1418 
   1419    /* Fetch the syscall's handlers.  If no handlers exist for this
   1420       syscall, we are given dummy handlers which force an immediate
   1421       return with ENOSYS. */
   1422    ent = get_syscall_entry(sysno);
   1423 
   1424    /* Fetch the layout information, which tells us where in the guest
   1425       state the syscall args reside.  This is a platform-dependent
   1426       action.  This info is needed so that the scalar syscall argument
   1427       checks (PRE_REG_READ calls) know which bits of the guest state
   1428       they need to inspect. */
   1429    getSyscallArgLayout( &layout );
   1430 
   1431    /* Make sure the tmp signal mask matches the real signal mask;
   1432       sigsuspend may change this. */
   1433    vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
   1434 
   1435    /* Right, we're finally ready to Party.  Call the pre-handler and
   1436       see what we get back.  At this point:
   1437 
   1438         sci->status.what  is Unset (we don't know yet).
   1439         sci->orig_args    contains the original args.
   1440         sci->args         is the same as sci->orig_args.
   1441         sci->flags        is zero.
   1442    */
   1443 
   1444    PRINT("SYSCALL[%d,%d](%s) ",
   1445       VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
   1446 
   1447    /* Do any pre-syscall actions */
   1448    if (VG_(needs).syscall_wrapper) {
   1449       UWord tmpv[8];
   1450       tmpv[0] = sci->orig_args.arg1;
   1451       tmpv[1] = sci->orig_args.arg2;
   1452       tmpv[2] = sci->orig_args.arg3;
   1453       tmpv[3] = sci->orig_args.arg4;
   1454       tmpv[4] = sci->orig_args.arg5;
   1455       tmpv[5] = sci->orig_args.arg6;
   1456       tmpv[6] = sci->orig_args.arg7;
   1457       tmpv[7] = sci->orig_args.arg8;
   1458       VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
   1459                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
   1460    }
   1461 
   1462    vg_assert(ent);
   1463    vg_assert(ent->before);
   1464    (ent->before)( tid,
   1465                   &layout,
   1466                   &sci->args, &sci->status, &sci->flags );
   1467 
   1468    /* The pre-handler may have modified:
   1469          sci->args
   1470          sci->status
   1471          sci->flags
   1472       All else remains unchanged.
   1473       Although the args may be modified, pre handlers are not allowed
   1474       to change the syscall number.
   1475    */
   1476    /* Now we proceed according to what the pre-handler decided. */
   1477    vg_assert(sci->status.what == SsHandToKernel
   1478              || sci->status.what == SsComplete);
   1479    vg_assert(sci->args.sysno == sci->orig_args.sysno);
   1480 
   1481    if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
   1482       /* The pre-handler completed the syscall itself, declaring
   1483          success. */
   1484       if (sci->flags & SfNoWriteResult) {
   1485          PRINT(" --> [pre-success] NoWriteResult");
   1486       } else {
   1487          PRINT(" --> [pre-success] Success(0x%llx:0x%llx)",
   1488                (ULong)sr_ResHI(sci->status.sres),
   1489                (ULong)sr_Res(sci->status.sres));
   1490       }
   1491       /* In this case the allowable flags are to ask for a signal-poll
   1492          and/or a yield after the call.  Changing the args isn't
   1493          allowed. */
   1494       vg_assert(0 == (sci->flags
   1495                       & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
   1496       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1497    }
   1498 
   1499    else
   1500    if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
   1501       /* The pre-handler decided to fail syscall itself. */
   1502       PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres));
   1503       /* In this case, the pre-handler is also allowed to ask for the
   1504          post-handler to be run anyway.  Changing the args is not
   1505          allowed. */
   1506       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
   1507       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1508    }
   1509 
   1510    else
   1511    if (sci->status.what != SsHandToKernel) {
   1512       /* huh?! */
   1513       vg_assert(0);
   1514    }
   1515 
   1516    else /* (sci->status.what == HandToKernel) */ {
   1517       /* Ok, this is the usual case -- and the complicated one.  There
   1518          are two subcases: sync and async.  async is the general case
   1519          and is to be used when there is any possibility that the
   1520          syscall might block [a fact that the pre-handler must tell us
   1521          via the sci->flags field.]  Because the tidying-away /
   1522          context-switch overhead of the async case could be large, if
   1523          we are sure that the syscall will not block, we fast-track it
   1524          by doing it directly in this thread, which is a lot
   1525          simpler. */
   1526 
   1527       /* Check that the given flags are allowable: MayBlock, PollAfter
   1528          and PostOnFail are ok. */
   1529       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
   1530 
   1531       if (sci->flags & SfMayBlock) {
   1532 
   1533          /* Syscall may block, so run it asynchronously */
   1534          vki_sigset_t mask;
   1535 
   1536          PRINT(" --> [async] ... \n");
   1537 
   1538          mask = tst->sig_mask;
   1539          sanitize_client_sigmask(&mask);
   1540 
   1541          /* Gack.  More impedance matching.  Copy the possibly
   1542             modified syscall args back into the guest state. */
   1543          /* JRS 2009-Mar-16: if the syscall args are possibly modified,
   1544             then this assertion is senseless:
   1545               vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
   1546             The case that exposed it was sys_posix_spawn on Darwin,
   1547             which heavily modifies its arguments but then lets the call
   1548             go through anyway, with SfToBlock set, hence we end up here. */
   1549          putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
   1550 
   1551          /* Drop the bigLock */
   1552          VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
   1553          /* Urr.  We're now in a race against other threads trying to
   1554             acquire the bigLock.  I guess that doesn't matter provided
   1555             that do_syscall_for_client only touches thread-local
   1556             state. */
   1557 
   1558          /* Do the call, which operates directly on the guest state,
   1559             not on our abstracted copies of the args/result. */
   1560          do_syscall_for_client(sysno, tst, &mask);
   1561 
   1562          /* do_syscall_for_client may not return if the syscall was
   1563             interrupted by a signal.  In that case, flow of control is
   1564             first to m_signals.async_sighandler, which calls
   1565             VG_(fixup_guest_state_after_syscall_interrupted), which
   1566             fixes up the guest state, and possibly calls
   1567             VG_(post_syscall).  Once that's done, control drops back
   1568             to the scheduler.  */
   1569 
   1570          /* Darwin: do_syscall_for_client may not return if the
   1571             syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
   1572             responded by starting the thread at wqthread_hijack(reuse=1)
   1573             (to run another workqueue item). In that case, wqthread_hijack
   1574             calls ML_(wqthread_continue), which is similar to
   1575             VG_(fixup_guest_state_after_syscall_interrupted). */
   1576 
   1577          /* Reacquire the lock */
   1578          VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
   1579 
   1580          /* Even more impedance matching.  Extract the syscall status
   1581             from the guest state. */
   1582          getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
   1583          vg_assert(sci->status.what == SsComplete);
   1584 
   1585          /* Be decorative, if required. */
   1586          if (VG_(clo_trace_syscalls)) {
   1587             Bool failed = sr_isError(sci->status.sres);
   1588             if (failed) {
   1589                PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)",
   1590                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
   1591                      (ULong)sr_Err(sci->status.sres));
   1592             } else {
   1593                PRINT("SYSCALL[%d,%d](%s) ... [async] --> "
   1594                      "Success(0x%llx:0x%llx)",
   1595                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
   1596                      (ULong)sr_ResHI(sci->status.sres),
   1597                      (ULong)sr_Res(sci->status.sres) );
   1598             }
   1599          }
   1600 
   1601       } else {
   1602 
   1603          /* run the syscall directly */
   1604          /* The pre-handler may have modified the syscall args, but
   1605             since we're passing values in ->args directly to the
   1606             kernel, there's no point in flushing them back to the
   1607             guest state.  Indeed doing so could be construed as
   1608             incorrect. */
   1609          SysRes sres
   1610             = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
   1611                                      sci->args.arg3, sci->args.arg4,
   1612                                      sci->args.arg5, sci->args.arg6,
   1613                                      sci->args.arg7, sci->args.arg8 );
   1614          sci->status = convert_SysRes_to_SyscallStatus(sres);
   1615 
   1616          /* Be decorative, if required. */
   1617          if (VG_(clo_trace_syscalls)) {
   1618             Bool failed = sr_isError(sci->status.sres);
   1619             if (failed) {
   1620                PRINT("[sync] --> Failure(0x%llx)",
   1621                      (ULong)sr_Err(sci->status.sres) );
   1622             } else {
   1623                PRINT("[sync] --> Success(0x%llx:0x%llx)",
   1624                      (ULong)sr_ResHI(sci->status.sres),
   1625                      (ULong)sr_Res(sci->status.sres) );
   1626             }
   1627          }
   1628       }
   1629    }
   1630 
   1631    vg_assert(sci->status.what == SsComplete);
   1632 
   1633    vg_assert(VG_(is_running_thread)(tid));
   1634 
   1635    /* Dump the syscall result back in the guest state.  This is
   1636       a platform-specific action. */
   1637    if (!(sci->flags & SfNoWriteResult))
   1638       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
   1639 
   1640    /* Situation now:
   1641       - the guest state is now correctly modified following the syscall
   1642       - modified args, original args and syscall status are still
   1643         available in the syscallInfo[] entry for this syscall.
   1644 
   1645       Now go on to do the post-syscall actions (read on down ..)
   1646    */
   1647    PRINT(" ");
   1648    VG_(post_syscall)(tid);
   1649    PRINT("\n");
   1650 }
   1651 
   1652 
   1653 /* Perform post syscall actions.  The expected state on entry is
   1654    precisely as at the end of VG_(client_syscall), that is:
   1655 
   1656    - guest state up to date following the syscall
   1657    - modified args, original args and syscall status are still
   1658      available in the syscallInfo[] entry for this syscall.
   1659    - syscall status matches what's in the guest state.
   1660 
   1661    There are two ways to get here: the normal way -- being called by
   1662    VG_(client_syscall), and the unusual way, from
   1663    VG_(fixup_guest_state_after_syscall_interrupted).
   1664    Darwin: there's a third way, ML_(wqthread_continue).
   1665 */
   1666 void VG_(post_syscall) (ThreadId tid)
   1667 {
   1668    SyscallInfo*             sci;
   1669    const SyscallTableEntry* ent;
   1670    SyscallStatus            test_status;
   1671    ThreadState*             tst;
   1672    Word sysno;
   1673 
   1674    /* Preliminaries */
   1675    vg_assert(VG_(is_valid_tid)(tid));
   1676    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   1677    vg_assert(VG_(is_running_thread)(tid));
   1678 
   1679    tst = VG_(get_ThreadState)(tid);
   1680    sci = & syscallInfo[tid];
   1681 
   1682    /* m_signals.sigvgkill_handler might call here even when not in
   1683       a syscall. */
   1684    if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
   1685       sci->status.what = SsIdle;
   1686       return;
   1687    }
   1688 
   1689    /* Validate current syscallInfo entry.  In particular we require
   1690       that the current .status matches what's actually in the guest
   1691       state.  At least in the normal case where we have actually
   1692       previously written the result into the guest state. */
   1693    vg_assert(sci->status.what == SsComplete);
   1694 
   1695    getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
   1696    if (!(sci->flags & SfNoWriteResult))
   1697       vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
   1698    /* Failure of the above assertion on Darwin can indicate a problem
   1699       in the syscall wrappers that pre-fail or pre-succeed the
   1700       syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
   1701       when they really should call SET_STATUS_from_SysRes.  The former
   1702       create a UNIX-class syscall result on Darwin, which may not be
   1703       correct for the syscall; if that's the case then this assertion
   1704       fires.  See PRE(thread_fast_set_cthread_self) for an example.  On
   1705       non-Darwin platforms this assertion is should never fail, and this
   1706       comment is completely irrelevant. */
   1707    /* Ok, looks sane */
   1708 
   1709    /* Get the system call number.  Because the pre-handler isn't
   1710       allowed to mess with it, it should be the same for both the
   1711       original and potentially-modified args. */
   1712    vg_assert(sci->args.sysno == sci->orig_args.sysno);
   1713    sysno = sci->args.sysno;
   1714    ent = get_syscall_entry(sysno);
   1715 
   1716    /* pre: status == Complete (asserted above) */
   1717    /* Consider either success or failure.  Now run the post handler if:
   1718       - it exists, and
   1719       - Success or (Failure and PostOnFail is set)
   1720    */
   1721    if (ent->after
   1722        && ((!sr_isError(sci->status.sres))
   1723            || (sr_isError(sci->status.sres)
   1724                && (sci->flags & SfPostOnFail) ))) {
   1725 
   1726       (ent->after)( tid, &sci->args, &sci->status );
   1727    }
   1728 
   1729    /* Because the post handler might have changed the status (eg, the
   1730       post-handler for sys_open can change the result from success to
   1731       failure if the kernel supplied a fd that it doesn't like), once
   1732       again dump the syscall result back in the guest state.*/
   1733    if (!(sci->flags & SfNoWriteResult))
   1734       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
   1735 
   1736    /* Do any post-syscall actions required by the tool. */
   1737    if (VG_(needs).syscall_wrapper) {
   1738       UWord tmpv[8];
   1739       tmpv[0] = sci->orig_args.arg1;
   1740       tmpv[1] = sci->orig_args.arg2;
   1741       tmpv[2] = sci->orig_args.arg3;
   1742       tmpv[3] = sci->orig_args.arg4;
   1743       tmpv[4] = sci->orig_args.arg5;
   1744       tmpv[5] = sci->orig_args.arg6;
   1745       tmpv[6] = sci->orig_args.arg7;
   1746       tmpv[7] = sci->orig_args.arg8;
   1747       VG_TDICT_CALL(tool_post_syscall, tid,
   1748                     sysno,
   1749                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
   1750                     sci->status.sres);
   1751    }
   1752 
   1753    /* The syscall is done. */
   1754    vg_assert(sci->status.what == SsComplete);
   1755    sci->status.what = SsIdle;
   1756 
   1757    /* The pre/post wrappers may have concluded that pending signals
   1758       might have been created, and will have set SfPollAfter to
   1759       request a poll for them once the syscall is done. */
   1760    if (sci->flags & SfPollAfter)
   1761       VG_(poll_signals)(tid);
   1762 
   1763    /* Similarly, the wrappers might have asked for a yield
   1764       afterwards. */
   1765    if (sci->flags & SfYieldAfter)
   1766       VG_(vg_yield)();
   1767 }
   1768 
   1769 
   1770 /* ---------------------------------------------------------------------
   1771    Dealing with syscalls which get interrupted by a signal:
   1772    VG_(fixup_guest_state_after_syscall_interrupted)
   1773    ------------------------------------------------------------------ */
   1774 
   1775 /* Syscalls done on behalf of the client are finally handed off to the
   1776    kernel in VG_(client_syscall) above, either by calling
   1777    do_syscall_for_client (the async case), or by calling
   1778    VG_(do_syscall6) (the sync case).
   1779 
   1780    If the syscall is not interrupted by a signal (it may block and
   1781    later unblock, but that's irrelevant here) then those functions
   1782    eventually return and so control is passed to VG_(post_syscall).
   1783    NB: not sure if the sync case can actually get interrupted, as it
   1784    operates with all signals masked.
   1785 
   1786    However, the syscall may get interrupted by an async-signal.  In
   1787    that case do_syscall_for_client/VG_(do_syscall6) do not
   1788    return.  Instead we wind up in m_signals.async_sighandler.  We need
   1789    to fix up the guest state to make it look like the syscall was
   1790    interrupted for guest.  So async_sighandler calls here, and this
   1791    does the fixup.  Note that from here we wind up calling
   1792    VG_(post_syscall) too.
   1793 */
   1794 
   1795 
   1796 /* These are addresses within ML_(do_syscall_for_client_WRK).  See
   1797    syscall-$PLAT.S for details.
   1798 */
   1799 #if defined(VGO_linux)
   1800   extern const Addr ML_(blksys_setup);
   1801   extern const Addr ML_(blksys_restart);
   1802   extern const Addr ML_(blksys_complete);
   1803   extern const Addr ML_(blksys_committed);
   1804   extern const Addr ML_(blksys_finished);
   1805 #elif defined(VGO_darwin)
   1806   /* Darwin requires extra uglyness */
   1807   extern const Addr ML_(blksys_setup_MACH);
   1808   extern const Addr ML_(blksys_restart_MACH);
   1809   extern const Addr ML_(blksys_complete_MACH);
   1810   extern const Addr ML_(blksys_committed_MACH);
   1811   extern const Addr ML_(blksys_finished_MACH);
   1812   extern const Addr ML_(blksys_setup_MDEP);
   1813   extern const Addr ML_(blksys_restart_MDEP);
   1814   extern const Addr ML_(blksys_complete_MDEP);
   1815   extern const Addr ML_(blksys_committed_MDEP);
   1816   extern const Addr ML_(blksys_finished_MDEP);
   1817   extern const Addr ML_(blksys_setup_UNIX);
   1818   extern const Addr ML_(blksys_restart_UNIX);
   1819   extern const Addr ML_(blksys_complete_UNIX);
   1820   extern const Addr ML_(blksys_committed_UNIX);
   1821   extern const Addr ML_(blksys_finished_UNIX);
   1822 #else
   1823 # error "Unknown OS"
   1824 #endif
   1825 
   1826 
   1827 /* Back up guest state to restart a system call. */
   1828 
   1829 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
   1830 {
   1831 #if defined(VGP_x86_linux)
   1832    arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
   1833 
   1834    /* Make sure our caller is actually sane, and we're really backing
   1835       back over a syscall.
   1836 
   1837       int $0x80 == CD 80
   1838    */
   1839    {
   1840       UChar *p = (UChar *)arch->vex.guest_EIP;
   1841 
   1842       if (p[0] != 0xcd || p[1] != 0x80)
   1843          VG_(message)(Vg_DebugMsg,
   1844                       "?! restarting over syscall at %#x %02x %02x\n",
   1845                       arch->vex.guest_EIP, p[0], p[1]);
   1846 
   1847       vg_assert(p[0] == 0xcd && p[1] == 0x80);
   1848    }
   1849 
   1850 #elif defined(VGP_amd64_linux)
   1851    arch->vex.guest_RIP -= 2;             // sizeof(syscall)
   1852 
   1853    /* Make sure our caller is actually sane, and we're really backing
   1854       back over a syscall.
   1855 
   1856       syscall == 0F 05
   1857    */
   1858    {
   1859       UChar *p = (UChar *)arch->vex.guest_RIP;
   1860 
   1861       if (p[0] != 0x0F || p[1] != 0x05)
   1862          VG_(message)(Vg_DebugMsg,
   1863                       "?! restarting over syscall at %#llx %02x %02x\n",
   1864                       arch->vex.guest_RIP, p[0], p[1]);
   1865 
   1866       vg_assert(p[0] == 0x0F && p[1] == 0x05);
   1867    }
   1868 
   1869 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
   1870    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
   1871 
   1872    /* Make sure our caller is actually sane, and we're really backing
   1873       back over a syscall.
   1874 
   1875       sc == 44 00 00 02
   1876    */
   1877    {
   1878       UChar *p = (UChar *)arch->vex.guest_CIA;
   1879 
   1880       if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
   1881          VG_(message)(Vg_DebugMsg,
   1882                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
   1883                       arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
   1884 
   1885       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
   1886    }
   1887 
   1888 #elif defined(VGP_arm_linux)
   1889    if (arch->vex.guest_R15T & 1) {
   1890       // Thumb mode.  SVC is a encoded as
   1891       //   1101 1111 imm8
   1892       // where imm8 is the SVC number, and we only accept 0.
   1893       arch->vex.guest_R15T -= 2;   // sizeof(thumb 16 bit insn)
   1894       UChar* p     = (UChar*)(arch->vex.guest_R15T - 1);
   1895       Bool   valid = p[0] == 0 && p[1] == 0xDF;
   1896       if (!valid) {
   1897          VG_(message)(Vg_DebugMsg,
   1898                       "?! restarting over (Thumb) syscall that is not syscall "
   1899                       "at %#llx %02x %02x\n",
   1900                       arch->vex.guest_R15T - 1ULL, p[0], p[1]);
   1901       }
   1902       vg_assert(valid);
   1903       // FIXME: NOTE, this really isn't right.  We need to back up
   1904       // ITSTATE to what it was before the SVC instruction, but we
   1905       // don't know what it was.  At least assert that it is now
   1906       // zero, because if it is nonzero then it must also have
   1907       // been nonzero for the SVC itself, which means it was
   1908       // conditional.  Urk.
   1909       vg_assert(arch->vex.guest_ITSTATE == 0);
   1910    } else {
   1911       // ARM mode.  SVC is encoded as
   1912       //   cond 1111 imm24
   1913       // where imm24 is the SVC number, and we only accept 0.
   1914       arch->vex.guest_R15T -= 4;   // sizeof(arm instr)
   1915       UChar* p     = (UChar*)arch->vex.guest_R15T;
   1916       Bool   valid = p[0] == 0 && p[1] == 0 && p[2] == 0
   1917                      && (p[3] & 0xF) == 0xF;
   1918       if (!valid) {
   1919          VG_(message)(Vg_DebugMsg,
   1920                       "?! restarting over (ARM) syscall that is not syscall "
   1921                       "at %#llx %02x %02x %02x %02x\n",
   1922                       arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]);
   1923       }
   1924       vg_assert(valid);
   1925    }
   1926 
   1927 #elif defined(VGP_x86_darwin)
   1928    arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
   1929 
   1930    /* Make sure our caller is actually sane, and we're really backing
   1931       back over a syscall.
   1932 
   1933       int $0x80 == CD 80
   1934       int $0x81 == CD 81
   1935       int $0x82 == CD 82
   1936       sysenter  == 0F 34
   1937    */
   1938    {
   1939        UChar *p = (UChar *)arch->vex.guest_EIP;
   1940        Bool  ok = (p[0] == 0xCD && p[1] == 0x80)
   1941                   || (p[0] == 0xCD && p[1] == 0x81)
   1942                   || (p[0] == 0xCD && p[1] == 0x82)
   1943                   || (p[0] == 0x0F && p[1] == 0x34);
   1944        if (!ok)
   1945            VG_(message)(Vg_DebugMsg,
   1946                         "?! restarting over syscall at %#x %02x %02x\n",
   1947                         arch->vex.guest_EIP, p[0], p[1]);
   1948        vg_assert(ok);
   1949    }
   1950 
   1951 #elif defined(VGP_amd64_darwin)
   1952    // DDD: #warning GrP fixme amd64 restart unimplemented
   1953    vg_assert(0);
   1954 
   1955 #elif defined(VGP_s390x_linux)
   1956    arch->vex.guest_IA -= 2;             // sizeof(syscall)
   1957 
   1958    /* Make sure our caller is actually sane, and we're really backing
   1959       back over a syscall.
   1960 
   1961       syscall == 0A <num>
   1962    */
   1963    {
   1964       UChar *p = (UChar *)arch->vex.guest_IA;
   1965       if (p[0] != 0x0A)
   1966          VG_(message)(Vg_DebugMsg,
   1967                       "?! restarting over syscall at %#llx %02x %02x\n",
   1968                       arch->vex.guest_IA, p[0], p[1]);
   1969 
   1970       vg_assert(p[0] == 0x0A);
   1971    }
   1972 
   1973 #elif defined(VGP_mips32_linux)
   1974 
   1975    arch->vex.guest_PC -= 4;             // sizeof(mips instr)
   1976 
   1977    /* Make sure our caller is actually sane, and we're really backing
   1978       back over a syscall.
   1979 
   1980       syscall == 00 00 00 0C
   1981       big endian
   1982       syscall == 0C 00 00 00
   1983    */
   1984    {
   1985       UChar *p = (UChar *)(arch->vex.guest_PC);
   1986 #     if defined (VG_LITTLEENDIAN)
   1987       if (p[0] != 0x0c || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00)
   1988          VG_(message)(Vg_DebugMsg,
   1989                       "?! restarting over syscall at %#x %02x %02x %02x %02x\n",
   1990                       arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
   1991 
   1992       vg_assert(p[0] == 0x0c && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00);
   1993 #     elif defined (VG_BIGENDIAN)
   1994       if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x0c)
   1995          VG_(message)(Vg_DebugMsg,
   1996                       "?! restarting over syscall at %#x %02x %02x %02x %02x\n",
   1997                       arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
   1998 
   1999       vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x0c);
   2000 #     else
   2001 #        error "Unknown endianness"
   2002 #     endif
   2003    }
   2004 
   2005 #else
   2006 #  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
   2007 #endif
   2008 }
   2009 
   2010 
   2011 /*
   2012    Fix up the guest state when a syscall is interrupted by a signal
   2013    and so has been forced to return 'sysret'.
   2014 
   2015    To do this, we determine the precise state of the syscall by
   2016    looking at the (real) IP at the time the signal happened.  The
   2017    syscall sequence looks like:
   2018 
   2019      1. unblock signals
   2020      2. perform syscall
   2021      3. save result to guest state (EAX, RAX, R3+CR0.SO, R0, V0)
   2022      4. re-block signals
   2023 
   2024    If a signal
   2025    happens at      Then     Why?
   2026    [1-2)           restart  nothing has happened (restart syscall)
   2027    [2]             restart  syscall hasn't started, or kernel wants to restart
   2028    [2-3)           save     syscall complete, but results not saved
   2029    [3-4)           syscall complete, results saved
   2030 
   2031    Sometimes we never want to restart an interrupted syscall (because
   2032    sigaction says not to), so we only restart if "restart" is True.
   2033 
   2034    This will also call VG_(post_syscall) if the syscall has actually
   2035    completed (either because it was interrupted, or because it
   2036    actually finished).  It will not call VG_(post_syscall) if the
   2037    syscall is set up for restart, which means that the pre-wrapper may
   2038    get called multiple times.
   2039 */
   2040 
   2041 void
   2042 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
   2043                                                   Addr     ip,
   2044                                                   SysRes   sres,
   2045                                                   Bool     restart)
   2046 {
   2047    /* Note that we don't know the syscall number here, since (1) in
   2048       general there's no reliable way to get hold of it short of
   2049       stashing it in the guest state before the syscall, and (2) in
   2050       any case we don't need to know it for the actions done by this
   2051       routine.
   2052 
   2053       Furthermore, 'sres' is only used in the case where the syscall
   2054       is complete, but the result has not been committed to the guest
   2055       state yet.  In any other situation it will be meaningless and
   2056       therefore ignored. */
   2057 
   2058    ThreadState*     tst;
   2059    SyscallStatus    canonical;
   2060    ThreadArchState* th_regs;
   2061    SyscallInfo*     sci;
   2062 
   2063    /* Compute some Booleans indicating which range we're in. */
   2064    Bool outside_range,
   2065         in_setup_to_restart,      // [1,2) in the .S files
   2066         at_restart,               // [2]   in the .S files
   2067         in_complete_to_committed, // [3,4) in the .S files
   2068         in_committed_to_finished; // [4,5) in the .S files
   2069 
   2070 #  if defined(VGO_linux)
   2071    outside_range
   2072       = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
   2073    in_setup_to_restart
   2074       = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
   2075    at_restart
   2076       = ip == ML_(blksys_restart);
   2077    in_complete_to_committed
   2078       = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
   2079    in_committed_to_finished
   2080       = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
   2081 #  elif defined(VGO_darwin)
   2082    outside_range
   2083       =  (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
   2084       && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
   2085       && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
   2086    in_setup_to_restart
   2087       =  (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
   2088       || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
   2089       || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
   2090    at_restart
   2091       =  (ip == ML_(blksys_restart_MACH))
   2092       || (ip == ML_(blksys_restart_MDEP))
   2093       || (ip == ML_(blksys_restart_UNIX));
   2094    in_complete_to_committed
   2095       =  (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
   2096       || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
   2097       || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
   2098    in_committed_to_finished
   2099       =  (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
   2100       || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
   2101       || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
   2102    /* Wasn't that just So Much Fun?  Does your head hurt yet?  Mine does. */
   2103 #  else
   2104 #    error "Unknown OS"
   2105 #  endif
   2106 
   2107    if (VG_(clo_trace_signals))
   2108       VG_(message)( Vg_DebugMsg,
   2109                     "interrupted_syscall: tid=%d, ip=0x%llx, "
   2110                     "restart=%s, sres.isErr=%s, sres.val=%lld\n",
   2111                     (Int)tid,
   2112                     (ULong)ip,
   2113                     restart ? "True" : "False",
   2114                     sr_isError(sres) ? "True" : "False",
   2115                     (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
   2116 
   2117    vg_assert(VG_(is_valid_tid)(tid));
   2118    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   2119    vg_assert(VG_(is_running_thread)(tid));
   2120 
   2121    tst     = VG_(get_ThreadState)(tid);
   2122    th_regs = &tst->arch;
   2123    sci     = & syscallInfo[tid];
   2124 
   2125    /* Figure out what the state of the syscall was by examining the
   2126       (real) IP at the time of the signal, and act accordingly. */
   2127    if (outside_range) {
   2128       if (VG_(clo_trace_signals))
   2129          VG_(message)( Vg_DebugMsg,
   2130                        "  not in syscall at all: hmm, very suspicious\n" );
   2131       /* Looks like we weren't in a syscall at all.  Hmm. */
   2132       vg_assert(sci->status.what != SsIdle);
   2133       return;
   2134    }
   2135 
   2136    /* We should not be here unless this thread had first started up
   2137       the machinery for a syscall by calling VG_(client_syscall).
   2138       Hence: */
   2139    vg_assert(sci->status.what != SsIdle);
   2140 
   2141    /* now, do one of four fixup actions, depending on where the IP has
   2142       got to. */
   2143 
   2144    if (in_setup_to_restart) {
   2145       /* syscall hasn't even started; go around again */
   2146       if (VG_(clo_trace_signals))
   2147          VG_(message)( Vg_DebugMsg, "  not started: restarting\n");
   2148       vg_assert(sci->status.what == SsHandToKernel);
   2149       ML_(fixup_guest_state_to_restart_syscall)(th_regs);
   2150    }
   2151 
   2152    else
   2153    if (at_restart) {
   2154       /* We're either about to run the syscall, or it was interrupted
   2155          and the kernel restarted it.  Restart if asked, otherwise
   2156          EINTR it. */
   2157       if (restart) {
   2158          if (VG_(clo_trace_signals))
   2159             VG_(message)( Vg_DebugMsg, "  at syscall instr: restarting\n");
   2160          ML_(fixup_guest_state_to_restart_syscall)(th_regs);
   2161       } else {
   2162          if (VG_(clo_trace_signals))
   2163             VG_(message)( Vg_DebugMsg, "  at syscall instr: returning EINTR\n");
   2164          canonical = convert_SysRes_to_SyscallStatus(
   2165                         VG_(mk_SysRes_Error)( VKI_EINTR )
   2166                      );
   2167          if (!(sci->flags & SfNoWriteResult))
   2168             putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
   2169          sci->status = canonical;
   2170          VG_(post_syscall)(tid);
   2171       }
   2172    }
   2173 
   2174    else
   2175    if (in_complete_to_committed) {
   2176       /* Syscall complete, but result hasn't been written back yet.
   2177          Write the SysRes we were supplied with back to the guest
   2178          state. */
   2179       if (VG_(clo_trace_signals))
   2180          VG_(message)( Vg_DebugMsg,
   2181                        "  completed, but uncommitted: committing\n");
   2182       canonical = convert_SysRes_to_SyscallStatus( sres );
   2183       if (!(sci->flags & SfNoWriteResult))
   2184          putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
   2185       sci->status = canonical;
   2186       VG_(post_syscall)(tid);
   2187    }
   2188 
   2189    else
   2190    if (in_committed_to_finished) {
   2191       /* Result committed, but the signal mask has not been restored;
   2192          we expect our caller (the signal handler) will have fixed
   2193          this up. */
   2194       if (VG_(clo_trace_signals))
   2195          VG_(message)( Vg_DebugMsg,
   2196                        "  completed and committed: nothing to do\n");
   2197       getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
   2198       vg_assert(sci->status.what == SsComplete);
   2199       VG_(post_syscall)(tid);
   2200    }
   2201 
   2202    else
   2203       VG_(core_panic)("?? strange syscall interrupt state?");
   2204 
   2205    /* In all cases, the syscall is now finished (even if we called
   2206       ML_(fixup_guest_state_to_restart_syscall), since that just
   2207       re-positions the guest's IP for another go at it).  So we need
   2208       to record that fact. */
   2209    sci->status.what = SsIdle;
   2210 }
   2211 
   2212 
   2213 #if defined(VGO_darwin)
   2214 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
   2215 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
   2216 // This longjmps back to the scheduler.
   2217 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
   2218 {
   2219    ThreadState*     tst;
   2220    SyscallInfo*     sci;
   2221 
   2222    VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
   2223 
   2224    PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n",
   2225          VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
   2226 
   2227    vg_assert(VG_(is_valid_tid)(tid));
   2228    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   2229    vg_assert(VG_(is_running_thread)(tid));
   2230 
   2231    tst     = VG_(get_ThreadState)(tid);
   2232    sci     = & syscallInfo[tid];
   2233    vg_assert(sci->status.what != SsIdle);
   2234    vg_assert(tst->os_state.wq_jmpbuf_valid);  // check this BEFORE post_syscall
   2235 
   2236    // Pretend the syscall completed normally, but don't touch the thread state.
   2237    sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
   2238    sci->flags |= SfNoWriteResult;
   2239    VG_(post_syscall)(tid);
   2240 
   2241    sci->status.what = SsIdle;
   2242 
   2243    vg_assert(tst->sched_jmpbuf_valid);
   2244    VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
   2245 
   2246    /* NOTREACHED */
   2247    vg_assert(0);
   2248 }
   2249 #endif
   2250 
   2251 
   2252 /* ---------------------------------------------------------------------
   2253    A place to store the where-to-call-when-really-done pointer
   2254    ------------------------------------------------------------------ */
   2255 
   2256 // When the final thread is done, where shall I call to shutdown the
   2257 // system cleanly?  Is set once at startup (in m_main) and never
   2258 // changes after that.  Is basically a pointer to the exit
   2259 // continuation.  This is all just a nasty hack to avoid calling
   2260 // directly from m_syswrap to m_main at exit, since that would cause
   2261 // m_main to become part of a module cycle, which is silly.
   2262 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
   2263        (ThreadId,VgSchedReturnCode)
   2264    = NULL;
   2265 
   2266 /*--------------------------------------------------------------------*/
   2267 /*--- end                                                          ---*/
   2268 /*--------------------------------------------------------------------*/
   2269