Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Platform-specific syscalls stuff.        syswrap-x86-linux.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2015 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #if defined(VGP_x86_linux)
     32 
     33 /* TODO/FIXME jrs 20050207: assignments to the syscall return result
     34    in interrupted_syscall() need to be reviewed.  They don't seem
     35    to assign the shadow state.
     36 */
     37 
     38 #include "pub_core_basics.h"
     39 #include "pub_core_vki.h"
     40 #include "pub_core_vkiscnums.h"
     41 #include "pub_core_threadstate.h"
     42 #include "pub_core_aspacemgr.h"
     43 #include "pub_core_debuglog.h"
     44 #include "pub_core_libcbase.h"
     45 #include "pub_core_libcassert.h"
     46 #include "pub_core_libcprint.h"
     47 #include "pub_core_libcproc.h"
     48 #include "pub_core_libcsignal.h"
     49 #include "pub_core_mallocfree.h"
     50 #include "pub_core_options.h"
     51 #include "pub_core_scheduler.h"
     52 #include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
     53 #include "pub_core_signals.h"
     54 #include "pub_core_syscall.h"
     55 #include "pub_core_syswrap.h"
     56 #include "pub_core_tooliface.h"
     57 
     58 #include "priv_types_n_macros.h"
     59 #include "priv_syswrap-generic.h"    /* for decls of generic wrappers */
     60 #include "priv_syswrap-linux.h"      /* for decls of linux-ish wrappers */
     61 #include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
     62 #include "priv_syswrap-main.h"
     63 
     64 
     65 /* ---------------------------------------------------------------------
     66    clone() handling
     67    ------------------------------------------------------------------ */
     68 
     69 /* Call f(arg1), but first switch stacks, using 'stack' as the new
     70    stack, and use 'retaddr' as f's return-to address.  Also, clear all
     71    the integer registers before entering f.*/
     72 __attribute__((noreturn))
     73 void ML_(call_on_new_stack_0_1) ( Addr stack,
     74 			          Addr retaddr,
     75 			          void (*f)(Word),
     76                                   Word arg1 );
     77 //  4(%esp) == stack
     78 //  8(%esp) == retaddr
     79 // 12(%esp) == f
     80 // 16(%esp) == arg1
     81 asm(
     82 ".text\n"
     83 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
     84 "vgModuleLocal_call_on_new_stack_0_1:\n"
     85 "   movl %esp, %esi\n"     // remember old stack pointer
     86 "   movl 4(%esi), %esp\n"  // set stack
     87 "   pushl 16(%esi)\n"      // arg1 to stack
     88 "   pushl  8(%esi)\n"      // retaddr to stack
     89 "   pushl 12(%esi)\n"      // f to stack
     90 "   movl $0, %eax\n"       // zero all GP regs
     91 "   movl $0, %ebx\n"
     92 "   movl $0, %ecx\n"
     93 "   movl $0, %edx\n"
     94 "   movl $0, %esi\n"
     95 "   movl $0, %edi\n"
     96 "   movl $0, %ebp\n"
     97 "   ret\n"                 // jump to f
     98 "   ud2\n"                 // should never get here
     99 ".previous\n"
    100 );
    101 
    102 
    103 /*
    104         Perform a clone system call.  clone is strange because it has
    105         fork()-like return-twice semantics, so it needs special
    106         handling here.
    107 
    108         Upon entry, we have:
    109 
    110             int (fn)(void*)     in  0+FSZ(%esp)
    111             void* child_stack   in  4+FSZ(%esp)
    112             int flags           in  8+FSZ(%esp)
    113             void* arg           in 12+FSZ(%esp)
    114             pid_t* child_tid    in 16+FSZ(%esp)
    115             pid_t* parent_tid   in 20+FSZ(%esp)
    116             void* tls_ptr       in 24+FSZ(%esp)
    117 
    118         System call requires:
    119 
    120             int    $__NR_clone  in %eax
    121             int    flags        in %ebx
    122             void*  child_stack  in %ecx
    123             pid_t* parent_tid   in %edx
    124             pid_t* child_tid    in %edi
    125             void*  tls_ptr      in %esi
    126 
    127 	Returns an Int encoded in the linux-x86 way, not a SysRes.
    128  */
    129 #define FSZ               "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
    130 #define __NR_CLONE        VG_STRINGIFY(__NR_clone)
    131 #define __NR_EXIT         VG_STRINGIFY(__NR_exit)
    132 
    133 extern
    134 Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
    135                                  void* stack,
    136                                  Int   flags,
    137                                  void* arg,
    138                                  Int*  child_tid,
    139                                  Int*  parent_tid,
    140                                  vki_modify_ldt_t * );
    141 asm(
    142 ".text\n"
    143 ".globl do_syscall_clone_x86_linux\n"
    144 "do_syscall_clone_x86_linux:\n"
    145 "        push    %ebx\n"
    146 "        push    %edi\n"
    147 "        push    %esi\n"
    148 
    149          /* set up child stack with function and arg */
    150 "        movl     4+"FSZ"(%esp), %ecx\n"    /* syscall arg2: child stack */
    151 "        movl    12+"FSZ"(%esp), %ebx\n"    /* fn arg */
    152 "        movl     0+"FSZ"(%esp), %eax\n"    /* fn */
    153 "        lea     -8(%ecx), %ecx\n"          /* make space on stack */
    154 "        movl    %ebx, 4(%ecx)\n"           /*   fn arg */
    155 "        movl    %eax, 0(%ecx)\n"           /*   fn */
    156 
    157          /* get other args to clone */
    158 "        movl     8+"FSZ"(%esp), %ebx\n"    /* syscall arg1: flags */
    159 "        movl    20+"FSZ"(%esp), %edx\n"    /* syscall arg3: parent tid * */
    160 "        movl    16+"FSZ"(%esp), %edi\n"    /* syscall arg5: child tid * */
    161 "        movl    24+"FSZ"(%esp), %esi\n"    /* syscall arg4: tls_ptr * */
    162 "        movl    $"__NR_CLONE", %eax\n"
    163 "        int     $0x80\n"                   /* clone() */
    164 "        testl   %eax, %eax\n"              /* child if retval == 0 */
    165 "        jnz     1f\n"
    166 
    167          /* CHILD - call thread function */
    168 "        popl    %eax\n"
    169 "        call    *%eax\n"                   /* call fn */
    170 
    171          /* exit with result */
    172 "        movl    %eax, %ebx\n"              /* arg1: return value from fn */
    173 "        movl    $"__NR_EXIT", %eax\n"
    174 "        int     $0x80\n"
    175 
    176          /* Hm, exit returned */
    177 "        ud2\n"
    178 
    179 "1:\n"   /* PARENT or ERROR */
    180 "        pop     %esi\n"
    181 "        pop     %edi\n"
    182 "        pop     %ebx\n"
    183 "        ret\n"
    184 ".previous\n"
    185 );
    186 
    187 #undef FSZ
    188 #undef __NR_CLONE
    189 #undef __NR_EXIT
    190 
    191 
    192 // forward declarations
    193 static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
    194 static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
    195 
    196 /*
    197    When a client clones, we need to keep track of the new thread.  This means:
    198    1. allocate a ThreadId+ThreadState+stack for the thread
    199 
    200    2. initialize the thread's new VCPU state
    201 
    202    3. create the thread using the same args as the client requested,
    203    but using the scheduler entrypoint for EIP, and a separate stack
    204    for ESP.
    205  */
    206 static SysRes do_clone ( ThreadId ptid,
    207                          UInt flags, Addr esp,
    208                          Int* parent_tidptr,
    209                          Int* child_tidptr,
    210                          vki_modify_ldt_t *tlsinfo)
    211 {
    212    static const Bool debug = False;
    213 
    214    ThreadId     ctid = VG_(alloc_ThreadState)();
    215    ThreadState* ptst = VG_(get_ThreadState)(ptid);
    216    ThreadState* ctst = VG_(get_ThreadState)(ctid);
    217    UWord*       stack;
    218    SysRes       res;
    219    Int          eax;
    220    vki_sigset_t blockall, savedmask;
    221 
    222    VG_(sigfillset)(&blockall);
    223 
    224    vg_assert(VG_(is_running_thread)(ptid));
    225    vg_assert(VG_(is_valid_tid)(ctid));
    226 
    227    stack = (UWord*)ML_(allocstack)(ctid);
    228    if (stack == NULL) {
    229       res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
    230       goto out;
    231    }
    232 
    233    /* Copy register state
    234 
    235       Both parent and child return to the same place, and the code
    236       following the clone syscall works out which is which, so we
    237       don't need to worry about it.
    238 
    239       The parent gets the child's new tid returned from clone, but the
    240       child gets 0.
    241 
    242       If the clone call specifies a NULL esp for the new thread, then
    243       it actually gets a copy of the parent's esp.
    244    */
    245    /* Note: the clone call done by the Quadrics Elan3 driver specifies
    246       clone flags of 0xF00, and it seems to rely on the assumption
    247       that the child inherits a copy of the parent's GDT.
    248       setup_child takes care of setting that up. */
    249    setup_child( &ctst->arch, &ptst->arch, True );
    250 
    251    /* Make sys_clone appear to have returned Success(0) in the
    252       child. */
    253    ctst->arch.vex.guest_EAX = 0;
    254 
    255    if (esp != 0)
    256       ctst->arch.vex.guest_ESP = esp;
    257 
    258    ctst->os_state.parent = ptid;
    259 
    260    /* inherit signal mask */
    261    ctst->sig_mask     = ptst->sig_mask;
    262    ctst->tmp_sig_mask = ptst->sig_mask;
    263 
    264    /* Start the child with its threadgroup being the same as the
    265       parent's.  This is so that any exit_group calls that happen
    266       after the child is created but before it sets its
    267       os_state.threadgroup field for real (in thread_wrapper in
    268       syswrap-linux.c), really kill the new thread.  a.k.a this avoids
    269       a race condition in which the thread is unkillable (via
    270       exit_group) because its threadgroup is not set.  The race window
    271       is probably only a few hundred or a few thousand cycles long.
    272       See #226116. */
    273    ctst->os_state.threadgroup = ptst->os_state.threadgroup;
    274 
    275    ML_(guess_and_register_stack) (esp, ctst);
    276 
    277    /* Assume the clone will succeed, and tell any tool that wants to
    278       know that this thread has come into existence.  We cannot defer
    279       it beyond this point because sys_set_thread_area, just below,
    280       causes tCheck to assert by making references to the new ThreadId
    281       if we don't state the new thread exists prior to that point.
    282       If the clone fails, we'll send out a ll_exit notification for it
    283       at the out: label below, to clean up. */
    284    vg_assert(VG_(owns_BigLock_LL)(ptid));
    285    VG_TRACK ( pre_thread_ll_create, ptid, ctid );
    286 
    287    if (flags & VKI_CLONE_SETTLS) {
    288       if (debug)
    289 	 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%u "
    290                      "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
    291 		     tlsinfo, tlsinfo->entry_number,
    292                      tlsinfo->base_addr, tlsinfo->limit,
    293 		     ptst->arch.vex.guest_ESP,
    294 		     ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
    295       res = sys_set_thread_area(ctid, tlsinfo);
    296       if (sr_isError(res))
    297 	 goto out;
    298    }
    299 
    300    flags &= ~VKI_CLONE_SETTLS;
    301 
    302    /* start the thread with everything blocked */
    303    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
    304 
    305    /* Create the new thread */
    306    eax = do_syscall_clone_x86_linux(
    307             ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
    308             child_tidptr, parent_tidptr, NULL
    309          );
    310    res = VG_(mk_SysRes_x86_linux)( eax );
    311 
    312    VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
    313 
    314   out:
    315    if (sr_isError(res)) {
    316       /* clone failed */
    317       VG_(cleanup_thread)(&ctst->arch);
    318       ctst->status = VgTs_Empty;
    319       /* oops.  Better tell the tool the thread exited in a hurry :-) */
    320       VG_TRACK( pre_thread_ll_exit, ctid );
    321    }
    322 
    323    return res;
    324 }
    325 
    326 
    327 /* ---------------------------------------------------------------------
    328    LDT/GDT simulation
    329    ------------------------------------------------------------------ */
    330 
    331 /* Details of the LDT simulation
    332    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    333 
    334    When a program runs natively, the linux kernel allows each *thread*
    335    in it to have its own LDT.  Almost all programs never do this --
    336    it's wildly unportable, after all -- and so the kernel never
    337    allocates the structure, which is just as well as an LDT occupies
    338    64k of memory (8192 entries of size 8 bytes).
    339 
    340    A thread may choose to modify its LDT entries, by doing the
    341    __NR_modify_ldt syscall.  In such a situation the kernel will then
    342    allocate an LDT structure for it.  Each LDT entry is basically a
    343    (base, limit) pair.  A virtual address in a specific segment is
    344    translated to a linear address by adding the segment's base value.
    345    In addition, the virtual address must not exceed the limit value.
    346 
    347    To use an LDT entry, a thread loads one of the segment registers
    348    (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
    349    .. 8191) it wants to use.  In fact, the required value is (index <<
    350    3) + 7, but that's not important right now.  Any normal instruction
    351    which includes an addressing mode can then be made relative to that
    352    LDT entry by prefixing the insn with a so-called segment-override
    353    prefix, a byte which indicates which of the 6 segment registers
    354    holds the LDT index.
    355 
    356    Now, a key constraint is that valgrind's address checks operate in
    357    terms of linear addresses.  So we have to explicitly translate
    358    virtual addrs into linear addrs, and that means doing a complete
    359    LDT simulation.
    360 
    361    Calls to modify_ldt are intercepted.  For each thread, we maintain
    362    an LDT (with the same normally-never-allocated optimisation that
    363    the kernel does).  This is updated as expected via calls to
    364    modify_ldt.
    365 
    366    When a thread does an amode calculation involving a segment
    367    override prefix, the relevant LDT entry for the thread is
    368    consulted.  It all works.
    369 
    370    There is a conceptual problem, which appears when switching back to
    371    native execution, either temporarily to pass syscalls to the
    372    kernel, or permanently, when debugging V.  Problem at such points
    373    is that it's pretty pointless to copy the simulated machine's
    374    segment registers to the real machine, because we'd also need to
    375    copy the simulated LDT into the real one, and that's prohibitively
    376    expensive.
    377 
    378    Fortunately it looks like no syscalls rely on the segment regs or
    379    LDT being correct, so we can get away with it.  Apart from that the
    380    simulation is pretty straightforward.  All 6 segment registers are
    381    tracked, although only %ds, %es, %fs and %gs are allowed as
    382    prefixes.  Perhaps it could be restricted even more than that -- I
    383    am not sure what is and isn't allowed in user-mode.
    384 */
    385 
    386 /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
    387    the Linux kernel's logic (cut-n-paste of code in
    388    linux/kernel/ldt.c).  */
    389 
    390 static
    391 void translate_to_hw_format ( /* IN  */ vki_modify_ldt_t* inn,
    392                               /* OUT */ VexGuestX86SegDescr* out,
    393                                         Int oldmode )
    394 {
    395    UInt entry_1, entry_2;
    396    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    397 
    398    if (0)
    399       VG_(printf)("translate_to_hw_format: base %#lx, limit %u\n",
    400                   inn->base_addr, inn->limit );
    401 
    402    /* Allow LDTs to be cleared by the user. */
    403    if (inn->base_addr == 0 && inn->limit == 0) {
    404       if (oldmode ||
    405           (inn->contents == 0      &&
    406            inn->read_exec_only == 1   &&
    407            inn->seg_32bit == 0      &&
    408            inn->limit_in_pages == 0   &&
    409            inn->seg_not_present == 1   &&
    410            inn->useable == 0 )) {
    411          entry_1 = 0;
    412          entry_2 = 0;
    413          goto install;
    414       }
    415    }
    416 
    417    entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
    418              (inn->limit & 0x0ffff);
    419    entry_2 = (inn->base_addr & 0xff000000) |
    420              ((inn->base_addr & 0x00ff0000) >> 16) |
    421              (inn->limit & 0xf0000) |
    422              ((inn->read_exec_only ^ 1) << 9) |
    423              (inn->contents << 10) |
    424              ((inn->seg_not_present ^ 1) << 15) |
    425              (inn->seg_32bit << 22) |
    426              (inn->limit_in_pages << 23) |
    427              0x7000;
    428    if (!oldmode)
    429       entry_2 |= (inn->useable << 20);
    430 
    431    /* Install the new entry ...  */
    432   install:
    433    out->LdtEnt.Words.word1 = entry_1;
    434    out->LdtEnt.Words.word2 = entry_2;
    435 }
    436 
    437 /* Create a zeroed-out GDT. */
    438 static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
    439 {
    440    Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
    441    return VG_(calloc)("di.syswrap-x86.azxG.1", nbytes, 1);
    442 }
    443 
    444 /* Create a zeroed-out LDT. */
    445 static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
    446 {
    447    Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
    448    return VG_(calloc)("di.syswrap-x86.azxL.1", nbytes, 1);
    449 }
    450 
    451 /* Free up an LDT or GDT allocated by the above fns. */
    452 static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
    453 {
    454    vg_assert(dt);
    455    VG_(free)(dt);
    456 }
    457 
    458 /* Copy contents between two existing LDTs. */
    459 static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
    460                                VexGuestX86SegDescr* dst )
    461 {
    462    Int i;
    463    vg_assert(src);
    464    vg_assert(dst);
    465    for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
    466       dst[i] = src[i];
    467 }
    468 
    469 /* Copy contents between two existing GDTs. */
    470 static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
    471                                VexGuestX86SegDescr* dst )
    472 {
    473    Int i;
    474    vg_assert(src);
    475    vg_assert(dst);
    476    for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
    477       dst[i] = src[i];
    478 }
    479 
    480 /* Free this thread's DTs, if it has any. */
    481 static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
    482 {
    483    vg_assert(sizeof(HWord) == sizeof(void*));
    484 
    485    if (0)
    486       VG_(printf)("deallocate_LGDTs_for_thread: "
    487                   "ldt = 0x%lx, gdt = 0x%lx\n",
    488                   vex->guest_LDT, vex->guest_GDT );
    489 
    490    if (vex->guest_LDT != (HWord)NULL) {
    491       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
    492       vex->guest_LDT = (HWord)NULL;
    493    }
    494 
    495    if (vex->guest_GDT != (HWord)NULL) {
    496       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
    497       vex->guest_GDT = (HWord)NULL;
    498    }
    499 }
    500 
    501 
    502 /*
    503  * linux/kernel/ldt.c
    504  *
    505  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
    506  * Copyright (C) 1999 Ingo Molnar <mingo (at) redhat.com>
    507  */
    508 
    509 /*
    510  * read_ldt() is not really atomic - this is not a problem since
    511  * synchronization of reads and writes done to the LDT has to be
    512  * assured by user-space anyway. Writes are atomic, to protect
    513  * the security checks done on new descriptors.
    514  */
    515 static
    516 SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
    517 {
    518    SysRes res;
    519    UInt   i, size;
    520    UChar* ldt;
    521 
    522    if (0)
    523       VG_(printf)("read_ldt: tid = %u, ptr = %p, bytecount = %u\n",
    524                   tid, ptr, bytecount );
    525 
    526    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    527    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    528 
    529    ldt = (UChar*)(VG_(threads)[tid].arch.vex.guest_LDT);
    530    res = VG_(mk_SysRes_Success)( 0 );
    531    if (ldt == NULL)
    532       /* LDT not allocated, meaning all entries are null */
    533       goto out;
    534 
    535    size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
    536    if (size > bytecount)
    537       size = bytecount;
    538 
    539    res = VG_(mk_SysRes_Success)( size );
    540    for (i = 0; i < size; i++)
    541       ptr[i] = ldt[i];
    542 
    543   out:
    544    return res;
    545 }
    546 
    547 
    548 static
    549 SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
    550 {
    551    SysRes res;
    552    VexGuestX86SegDescr* ldt;
    553    vki_modify_ldt_t* ldt_info;
    554 
    555    if (0)
    556       VG_(printf)("write_ldt: tid = %u, ptr = %p, "
    557                   "bytecount = %u, oldmode = %d\n",
    558                   tid, ptr, bytecount, oldmode );
    559 
    560    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    561    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    562 
    563    ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
    564    ldt_info = (vki_modify_ldt_t*)ptr;
    565 
    566    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
    567    if (bytecount != sizeof(vki_modify_ldt_t))
    568       goto out;
    569 
    570    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
    571    if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
    572       goto out;
    573    if (ldt_info->contents == 3) {
    574       if (oldmode)
    575          goto out;
    576       if (ldt_info->seg_not_present == 0)
    577          goto out;
    578    }
    579 
    580    /* If this thread doesn't have an LDT, we'd better allocate it
    581       now. */
    582    if (ldt == NULL) {
    583       ldt = alloc_zeroed_x86_LDT();
    584       VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
    585    }
    586 
    587    /* Install the new entry ...  */
    588    translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
    589    res = VG_(mk_SysRes_Success)( 0 );
    590 
    591   out:
    592    return res;
    593 }
    594 
    595 
    596 static SysRes sys_modify_ldt ( ThreadId tid,
    597                                Int func, void* ptr, UInt bytecount )
    598 {
    599    SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
    600 
    601    switch (func) {
    602    case 0:
    603       ret = read_ldt(tid, ptr, bytecount);
    604       break;
    605    case 1:
    606       ret = write_ldt(tid, ptr, bytecount, 1);
    607       break;
    608    case 2:
    609       VG_(unimplemented)("sys_modify_ldt: func == 2");
    610       /* god knows what this is about */
    611       /* ret = read_default_ldt(ptr, bytecount); */
    612       /*UNREACHED*/
    613       break;
    614    case 0x11:
    615       ret = write_ldt(tid, ptr, bytecount, 0);
    616       break;
    617    }
    618    return ret;
    619 }
    620 
    621 
    622 static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
    623 {
    624    Int                  idx;
    625    VexGuestX86SegDescr* gdt;
    626 
    627    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    628    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    629 
    630    if (info == NULL)
    631       return VG_(mk_SysRes_Error)( VKI_EFAULT );
    632 
    633    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
    634 
    635    /* If the thread doesn't have a GDT, allocate it now. */
    636    if (!gdt) {
    637       gdt = alloc_zeroed_x86_GDT();
    638       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
    639    }
    640 
    641    idx = info->entry_number;
    642 
    643    if (idx == -1) {
    644       /* Find and use the first free entry.  Don't allocate entry
    645          zero, because the hardware will never do that, and apparently
    646          doing so confuses some code (perhaps stuff running on
    647          Wine). */
    648       for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
    649          if (gdt[idx].LdtEnt.Words.word1 == 0
    650              && gdt[idx].LdtEnt.Words.word2 == 0)
    651             break;
    652       }
    653 
    654       if (idx == VEX_GUEST_X86_GDT_NENT)
    655          return VG_(mk_SysRes_Error)( VKI_ESRCH );
    656    } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
    657       /* Similarly, reject attempts to use GDT[0]. */
    658       return VG_(mk_SysRes_Error)( VKI_EINVAL );
    659    }
    660 
    661    translate_to_hw_format(info, &gdt[idx], 0);
    662 
    663    VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
    664              "set_thread_area(info->entry)",
    665              (Addr) & info->entry_number, sizeof(unsigned int) );
    666    info->entry_number = idx;
    667    VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
    668              (Addr) & info->entry_number, sizeof(unsigned int) );
    669 
    670    return VG_(mk_SysRes_Success)( 0 );
    671 }
    672 
    673 
    674 static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
    675 {
    676    Int idx;
    677    VexGuestX86SegDescr* gdt;
    678 
    679    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    680    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    681 
    682    if (info == NULL)
    683       return VG_(mk_SysRes_Error)( VKI_EFAULT );
    684 
    685    idx = info->entry_number;
    686 
    687    if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
    688       return VG_(mk_SysRes_Error)( VKI_EINVAL );
    689 
    690    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
    691 
    692    /* If the thread doesn't have a GDT, allocate it now. */
    693    if (!gdt) {
    694       gdt = alloc_zeroed_x86_GDT();
    695       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
    696    }
    697 
    698    info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
    699                      ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
    700                      gdt[idx].LdtEnt.Bits.BaseLow;
    701    info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
    702                    gdt[idx].LdtEnt.Bits.LimitLow;
    703    info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
    704    info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
    705    info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
    706    info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
    707    info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
    708    info->useable = gdt[idx].LdtEnt.Bits.Sys;
    709    info->reserved = 0;
    710 
    711    return VG_(mk_SysRes_Success)( 0 );
    712 }
    713 
    714 /* ---------------------------------------------------------------------
    715    More thread stuff
    716    ------------------------------------------------------------------ */
    717 
    718 void VG_(cleanup_thread) ( ThreadArchState* arch )
    719 {
    720    /* Release arch-specific resources held by this thread. */
    721    /* On x86, we have to dump the LDT and GDT. */
    722    deallocate_LGDTs_for_thread( &arch->vex );
    723 }
    724 
    725 
    726 static void setup_child ( /*OUT*/ ThreadArchState *child,
    727                           /*IN*/  ThreadArchState *parent,
    728                           Bool inherit_parents_GDT )
    729 {
    730    /* We inherit our parent's guest state. */
    731    child->vex = parent->vex;
    732    child->vex_shadow1 = parent->vex_shadow1;
    733    child->vex_shadow2 = parent->vex_shadow2;
    734 
    735    /* We inherit our parent's LDT. */
    736    if (parent->vex.guest_LDT == (HWord)NULL) {
    737       /* We hope this is the common case. */
    738       child->vex.guest_LDT = (HWord)NULL;
    739    } else {
    740       /* No luck .. we have to take a copy of the parent's. */
    741       child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
    742       copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
    743                         (VexGuestX86SegDescr*)child->vex.guest_LDT );
    744    }
    745 
    746    /* Either we start with an empty GDT (the usual case) or inherit a
    747       copy of our parents' one (Quadrics Elan3 driver -style clone
    748       only). */
    749    child->vex.guest_GDT = (HWord)NULL;
    750 
    751    if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
    752       child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
    753       copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
    754                         (VexGuestX86SegDescr*)child->vex.guest_GDT );
    755    }
    756 }
    757 
    758 
    759 /* ---------------------------------------------------------------------
    760    PRE/POST wrappers for x86/Linux-specific syscalls
    761    ------------------------------------------------------------------ */
    762 
    763 #define PRE(name)       DEFN_PRE_TEMPLATE(x86_linux, name)
    764 #define POST(name)      DEFN_POST_TEMPLATE(x86_linux, name)
    765 
    766 /* Add prototypes for the wrappers declared here, so that gcc doesn't
    767    harass us for not having prototypes.  Really this is a kludge --
    768    the right thing to do is to make these wrappers 'static' since they
    769    aren't visible outside this file, but that requires even more macro
    770    magic. */
    771 DECL_TEMPLATE(x86_linux, sys_stat64);
    772 DECL_TEMPLATE(x86_linux, sys_fstatat64);
    773 DECL_TEMPLATE(x86_linux, sys_fstat64);
    774 DECL_TEMPLATE(x86_linux, sys_lstat64);
    775 DECL_TEMPLATE(x86_linux, sys_clone);
    776 DECL_TEMPLATE(x86_linux, old_mmap);
    777 DECL_TEMPLATE(x86_linux, sys_mmap2);
    778 DECL_TEMPLATE(x86_linux, sys_sigreturn);
    779 DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
    780 DECL_TEMPLATE(x86_linux, sys_modify_ldt);
    781 DECL_TEMPLATE(x86_linux, sys_set_thread_area);
    782 DECL_TEMPLATE(x86_linux, sys_get_thread_area);
    783 DECL_TEMPLATE(x86_linux, sys_ptrace);
    784 DECL_TEMPLATE(x86_linux, sys_sigsuspend);
    785 DECL_TEMPLATE(x86_linux, old_select);
    786 DECL_TEMPLATE(x86_linux, sys_vm86old);
    787 DECL_TEMPLATE(x86_linux, sys_vm86);
    788 DECL_TEMPLATE(x86_linux, sys_syscall223);
    789 
    790 PRE(old_select)
    791 {
    792    /* struct sel_arg_struct {
    793       unsigned long n;
    794       fd_set *inp, *outp, *exp;
    795       struct timeval *tvp;
    796       };
    797    */
    798    PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
    799    PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
    800    *flags |= SfMayBlock;
    801    {
    802       UInt* arg_struct = (UInt*)ARG1;
    803       UInt a1, a2, a3, a4, a5;
    804 
    805       a1 = arg_struct[0];
    806       a2 = arg_struct[1];
    807       a3 = arg_struct[2];
    808       a4 = arg_struct[3];
    809       a5 = arg_struct[4];
    810 
    811       PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", (Int)a1,a2,a3,a4,a5);
    812       if (a2 != (Addr)NULL)
    813          PRE_MEM_READ( "old_select(readfds)",   a2, a1/8 /* __FD_SETSIZE/8 */ );
    814       if (a3 != (Addr)NULL)
    815          PRE_MEM_READ( "old_select(writefds)",  a3, a1/8 /* __FD_SETSIZE/8 */ );
    816       if (a4 != (Addr)NULL)
    817          PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
    818       if (a5 != (Addr)NULL)
    819          PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
    820    }
    821 }
    822 
    823 PRE(sys_clone)
    824 {
    825    UInt cloneflags;
    826    Bool badarg = False;
    827 
    828    PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
    829    PRE_REG_READ2(int, "clone",
    830                  unsigned long, flags,
    831                  void *, child_stack);
    832 
    833    if (ARG1 & VKI_CLONE_PARENT_SETTID) {
    834       if (VG_(tdict).track_pre_reg_read) {
    835          PRA3("clone", int *, parent_tidptr);
    836       }
    837       PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
    838       if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
    839                                              VKI_PROT_WRITE)) {
    840          badarg = True;
    841       }
    842    }
    843    if (ARG1 & VKI_CLONE_SETTLS) {
    844       if (VG_(tdict).track_pre_reg_read) {
    845          PRA4("clone", vki_modify_ldt_t *, tlsinfo);
    846       }
    847       PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
    848       if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
    849                                              VKI_PROT_READ)) {
    850          badarg = True;
    851       }
    852    }
    853    if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
    854       if (VG_(tdict).track_pre_reg_read) {
    855          PRA5("clone", int *, child_tidptr);
    856       }
    857       PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
    858       if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
    859                                              VKI_PROT_WRITE)) {
    860          badarg = True;
    861       }
    862    }
    863 
    864    if (badarg) {
    865       SET_STATUS_Failure( VKI_EFAULT );
    866       return;
    867    }
    868 
    869    cloneflags = ARG1;
    870 
    871    if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
    872       SET_STATUS_Failure( VKI_EINVAL );
    873       return;
    874    }
    875 
    876    /* Be ultra-paranoid and filter out any clone-variants we don't understand:
    877       - ??? specifies clone flags of 0x100011
    878       - ??? specifies clone flags of 0x1200011.
    879       - NPTL specifies clone flags of 0x7D0F00.
    880       - The Quadrics Elan3 driver specifies clone flags of 0xF00.
    881       - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
    882       Everything else is rejected.
    883    */
    884    if (
    885         1 ||
    886         /* 11 Nov 05: for the time being, disable this ultra-paranoia.
    887            The switch below probably does a good enough job. */
    888           (cloneflags == 0x100011 || cloneflags == 0x1200011
    889                                   || cloneflags == 0x7D0F00
    890                                   || cloneflags == 0x790F00
    891                                   || cloneflags == 0x3D0F00
    892                                   || cloneflags == 0x410F00
    893                                   || cloneflags == 0xF00
    894                                   || cloneflags == 0xF21)) {
    895      /* OK */
    896    }
    897    else {
    898       /* Nah.  We don't like it.  Go away. */
    899       goto reject;
    900    }
    901 
    902    /* Only look at the flags we really care about */
    903    switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
    904                          | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
    905    case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
    906       /* thread creation */
    907       SET_STATUS_from_SysRes(
    908          do_clone(tid,
    909                   ARG1,         /* flags */
    910                   (Addr)ARG2,   /* child ESP */
    911                   (Int *)ARG3,  /* parent_tidptr */
    912                   (Int *)ARG5,  /* child_tidptr */
    913                   (vki_modify_ldt_t *)ARG4)); /* set_tls */
    914       break;
    915 
    916    case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
    917       /* FALLTHROUGH - assume vfork == fork */
    918       cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
    919 
    920    case 0: /* plain fork */
    921       SET_STATUS_from_SysRes(
    922          ML_(do_fork_clone)(tid,
    923                        cloneflags,      /* flags */
    924                        (Int *)ARG3,     /* parent_tidptr */
    925                        (Int *)ARG5));   /* child_tidptr */
    926       break;
    927 
    928    default:
    929    reject:
    930       /* should we just ENOSYS? */
    931       VG_(message)(Vg_UserMsg, "\n");
    932       VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
    933       VG_(message)(Vg_UserMsg, "\n");
    934       VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
    935       VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
    936       VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
    937       VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
    938       VG_(unimplemented)
    939          ("Valgrind does not support general clone().");
    940    }
    941 
    942    if (SUCCESS) {
    943       if (ARG1 & VKI_CLONE_PARENT_SETTID)
    944          POST_MEM_WRITE(ARG3, sizeof(Int));
    945       if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
    946          POST_MEM_WRITE(ARG5, sizeof(Int));
    947 
    948       /* Thread creation was successful; let the child have the chance
    949          to run */
    950       *flags |= SfYieldAfter;
    951    }
    952 }
    953 
    954 PRE(sys_sigreturn)
    955 {
    956    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
    957       an explanation of what follows. */
    958 
    959    ThreadState* tst;
    960    PRINT("sys_sigreturn ( )");
    961 
    962    vg_assert(VG_(is_valid_tid)(tid));
    963    vg_assert(tid >= 1 && tid < VG_N_THREADS);
    964    vg_assert(VG_(is_running_thread)(tid));
    965 
    966    /* Adjust esp to point to start of frame; skip back up over
    967       sigreturn sequence's "popl %eax" and handler ret addr */
    968    tst = VG_(get_ThreadState)(tid);
    969    tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
    970    /* XXX why does ESP change differ from rt_sigreturn case below? */
    971 
    972    /* This is only so that the EIP is (might be) useful to report if
    973       something goes wrong in the sigreturn */
    974    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
    975 
    976    /* Restore register state from frame and remove it */
    977    VG_(sigframe_destroy)(tid, False);
    978 
    979    /* Tell the driver not to update the guest state with the "result",
    980       and set a bogus result to keep it happy. */
    981    *flags |= SfNoWriteResult;
    982    SET_STATUS_Success(0);
    983 
    984    /* Check to see if any signals arose as a result of this. */
    985    *flags |= SfPollAfter;
    986 }
    987 
    988 PRE(sys_rt_sigreturn)
    989 {
    990    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
    991       an explanation of what follows. */
    992 
    993    ThreadState* tst;
    994    PRINT("sys_rt_sigreturn ( )");
    995 
    996    vg_assert(VG_(is_valid_tid)(tid));
    997    vg_assert(tid >= 1 && tid < VG_N_THREADS);
    998    vg_assert(VG_(is_running_thread)(tid));
    999 
   1000    /* Adjust esp to point to start of frame; skip back up over handler
   1001       ret addr */
   1002    tst = VG_(get_ThreadState)(tid);
   1003    tst->arch.vex.guest_ESP -= sizeof(Addr);
   1004    /* XXX why does ESP change differ from sigreturn case above? */
   1005 
   1006    /* This is only so that the EIP is (might be) useful to report if
   1007       something goes wrong in the sigreturn */
   1008    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
   1009 
   1010    /* Restore register state from frame and remove it */
   1011    VG_(sigframe_destroy)(tid, True);
   1012 
   1013    /* Tell the driver not to update the guest state with the "result",
   1014       and set a bogus result to keep it happy. */
   1015    *flags |= SfNoWriteResult;
   1016    SET_STATUS_Success(0);
   1017 
   1018    /* Check to see if any signals arose as a result of this. */
   1019    *flags |= SfPollAfter;
   1020 }
   1021 
   1022 PRE(sys_modify_ldt)
   1023 {
   1024    PRINT("sys_modify_ldt ( %ld, %#lx, %lu )", SARG1, ARG2, ARG3);
   1025    PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
   1026                  unsigned long, bytecount);
   1027 
   1028    if (ARG1 == 0) {
   1029       /* read the LDT into ptr */
   1030       PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
   1031    }
   1032    if (ARG1 == 1 || ARG1 == 0x11) {
   1033       /* write the LDT with the entry pointed at by ptr */
   1034       PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
   1035    }
   1036    /* "do" the syscall ourselves; the kernel never sees it */
   1037    SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
   1038 
   1039    if (ARG1 == 0 && SUCCESS && RES > 0) {
   1040       POST_MEM_WRITE( ARG2, RES );
   1041    }
   1042 }
   1043 
   1044 PRE(sys_set_thread_area)
   1045 {
   1046    PRINT("sys_set_thread_area ( %#lx )", ARG1);
   1047    PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
   1048    PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
   1049 
   1050    /* "do" the syscall ourselves; the kernel never sees it */
   1051    SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
   1052 }
   1053 
   1054 PRE(sys_get_thread_area)
   1055 {
   1056    PRINT("sys_get_thread_area ( %#lx )", ARG1);
   1057    PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
   1058    PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
   1059 
   1060    /* "do" the syscall ourselves; the kernel never sees it */
   1061    SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
   1062 
   1063    if (SUCCESS) {
   1064       POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
   1065    }
   1066 }
   1067 
   1068 // Parts of this are x86-specific, but the *PEEK* cases are generic.
   1069 //
   1070 // ARG3 is only used for pointers into the traced process's address
   1071 // space and for offsets into the traced process's struct
   1072 // user_regs_struct. It is never a pointer into this process's memory
   1073 // space, and we should therefore not check anything it points to.
   1074 PRE(sys_ptrace)
   1075 {
   1076    PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", SARG1, SARG2, ARG3, ARG4);
   1077    PRE_REG_READ4(int, "ptrace",
   1078                  long, request, long, pid, unsigned long, addr,
   1079                  unsigned long, data);
   1080    switch (ARG1) {
   1081    case VKI_PTRACE_PEEKTEXT:
   1082    case VKI_PTRACE_PEEKDATA:
   1083    case VKI_PTRACE_PEEKUSR:
   1084       PRE_MEM_WRITE( "ptrace(peek)", ARG4,
   1085 		     sizeof (long));
   1086       break;
   1087    case VKI_PTRACE_GETREGS:
   1088       PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
   1089 		     sizeof (struct vki_user_regs_struct));
   1090       break;
   1091    case VKI_PTRACE_GETFPREGS:
   1092       PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
   1093 		     sizeof (struct vki_user_i387_struct));
   1094       break;
   1095    case VKI_PTRACE_GETFPXREGS:
   1096       PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
   1097                      sizeof(struct vki_user_fxsr_struct) );
   1098       break;
   1099    case VKI_PTRACE_GET_THREAD_AREA:
   1100       PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4,
   1101                      sizeof(struct vki_user_desc) );
   1102       break;
   1103    case VKI_PTRACE_SETREGS:
   1104       PRE_MEM_READ( "ptrace(setregs)", ARG4,
   1105 		     sizeof (struct vki_user_regs_struct));
   1106       break;
   1107    case VKI_PTRACE_SETFPREGS:
   1108       PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
   1109 		     sizeof (struct vki_user_i387_struct));
   1110       break;
   1111    case VKI_PTRACE_SETFPXREGS:
   1112       PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
   1113                      sizeof(struct vki_user_fxsr_struct) );
   1114       break;
   1115    case VKI_PTRACE_SET_THREAD_AREA:
   1116       PRE_MEM_READ( "ptrace(set_thread_area)", ARG4,
   1117                      sizeof(struct vki_user_desc) );
   1118       break;
   1119    case VKI_PTRACE_GETEVENTMSG:
   1120       PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
   1121       break;
   1122    case VKI_PTRACE_GETSIGINFO:
   1123       PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
   1124       break;
   1125    case VKI_PTRACE_SETSIGINFO:
   1126       PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
   1127       break;
   1128    case VKI_PTRACE_GETREGSET:
   1129       ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
   1130       break;
   1131    case VKI_PTRACE_SETREGSET:
   1132       ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
   1133       break;
   1134    default:
   1135       break;
   1136    }
   1137 }
   1138 
   1139 POST(sys_ptrace)
   1140 {
   1141    switch (ARG1) {
   1142    case VKI_PTRACE_PEEKTEXT:
   1143    case VKI_PTRACE_PEEKDATA:
   1144    case VKI_PTRACE_PEEKUSR:
   1145       POST_MEM_WRITE( ARG4, sizeof (long));
   1146       break;
   1147    case VKI_PTRACE_GETREGS:
   1148       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
   1149       break;
   1150    case VKI_PTRACE_GETFPREGS:
   1151       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
   1152       break;
   1153    case VKI_PTRACE_GETFPXREGS:
   1154       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
   1155       break;
   1156    case VKI_PTRACE_GET_THREAD_AREA:
   1157       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_desc) );
   1158       break;
   1159    case VKI_PTRACE_GETEVENTMSG:
   1160       POST_MEM_WRITE( ARG4, sizeof(unsigned long));
   1161       break;
   1162    case VKI_PTRACE_GETSIGINFO:
   1163       /* XXX: This is a simplification. Different parts of the
   1164        * siginfo_t are valid depending on the type of signal.
   1165        */
   1166       POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
   1167       break;
   1168    case VKI_PTRACE_GETREGSET:
   1169       ML_(linux_POST_getregset)(tid, ARG3, ARG4);
   1170       break;
   1171    default:
   1172       break;
   1173    }
   1174 }
   1175 
   1176 PRE(old_mmap)
   1177 {
   1178    /* struct mmap_arg_struct {
   1179          unsigned long addr;
   1180          unsigned long len;
   1181          unsigned long prot;
   1182          unsigned long flags;
   1183          unsigned long fd;
   1184          unsigned long offset;
   1185    }; */
   1186    UWord a1, a2, a3, a4, a5, a6;
   1187    SysRes r;
   1188 
   1189    UWord* args = (UWord*)ARG1;
   1190    PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
   1191    PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
   1192 
   1193    a1 = args[1-1];
   1194    a2 = args[2-1];
   1195    a3 = args[3-1];
   1196    a4 = args[4-1];
   1197    a5 = args[5-1];
   1198    a6 = args[6-1];
   1199 
   1200    PRINT("old_mmap ( %#lx, %lu, %ld, %ld, %ld, %ld )",
   1201          a1, a2, (Word)a3, (Word)a4, (Word)a5, (Word)a6 );
   1202 
   1203    r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
   1204    SET_STATUS_from_SysRes(r);
   1205 }
   1206 
   1207 PRE(sys_mmap2)
   1208 {
   1209    SysRes r;
   1210 
   1211    // Exactly like old_mmap() except:
   1212    //  - all 6 args are passed in regs, rather than in a memory-block.
   1213    //  - the file offset is specified in pagesize units rather than bytes,
   1214    //    so that it can be used for files bigger than 2^32 bytes.
   1215    // pagesize or 4K-size units in offset?  For ppc32/64-linux, this is
   1216    // 4K-sized.  Assert that the page size is 4K here for safety.
   1217    vg_assert(VKI_PAGE_SIZE == 4096);
   1218    PRINT("sys_mmap2 ( %#lx, %lu, %lu, %lu, %lu, %lu )",
   1219          ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 );
   1220    PRE_REG_READ6(long, "mmap2",
   1221                  unsigned long, start, unsigned long, length,
   1222                  unsigned long, prot,  unsigned long, flags,
   1223                  unsigned long, fd,    unsigned long, offset);
   1224 
   1225    r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
   1226                                        4096 * (Off64T)ARG6 );
   1227    SET_STATUS_from_SysRes(r);
   1228 }
   1229 
   1230 // XXX: lstat64/fstat64/stat64 are generic, but not necessarily
   1231 // applicable to every architecture -- I think only to 32-bit archs.
   1232 // We're going to need something like linux/core_os32.h for such
   1233 // things, eventually, I think.  --njn
   1234 PRE(sys_lstat64)
   1235 {
   1236    PRINT("sys_lstat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
   1237    PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
   1238    PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
   1239    PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
   1240 }
   1241 
   1242 POST(sys_lstat64)
   1243 {
   1244    vg_assert(SUCCESS);
   1245    if (RES == 0) {
   1246       POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
   1247    }
   1248 }
   1249 
   1250 PRE(sys_stat64)
   1251 {
   1252    FUSE_COMPATIBLE_MAY_BLOCK();
   1253    PRINT("sys_stat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
   1254    PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
   1255    PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
   1256    PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
   1257 }
   1258 
   1259 POST(sys_stat64)
   1260 {
   1261    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
   1262 }
   1263 
   1264 PRE(sys_fstatat64)
   1265 {
   1266    FUSE_COMPATIBLE_MAY_BLOCK();
   1267    // ARG4 =  int flags;  Flags are or'ed together, therefore writing them
   1268    // as a hex constant is more meaningful.
   1269    PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx, %#lx )",
   1270          SARG1, ARG2, (HChar*)ARG2, ARG3, ARG4);
   1271    PRE_REG_READ4(long, "fstatat64",
   1272                  int, dfd, char *, file_name, struct stat64 *, buf, int, flags);
   1273    PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
   1274    PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
   1275 }
   1276 
   1277 POST(sys_fstatat64)
   1278 {
   1279    POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
   1280 }
   1281 
   1282 PRE(sys_fstat64)
   1283 {
   1284    PRINT("sys_fstat64 ( %lu, %#lx )", ARG1, ARG2);
   1285    PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
   1286    PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
   1287 }
   1288 
   1289 POST(sys_fstat64)
   1290 {
   1291    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
   1292 }
   1293 
   1294 /* NB: arm-linux has a clone of this one, and ppc32-linux has an almost
   1295    identical version. */
   1296 PRE(sys_sigsuspend)
   1297 {
   1298    /* The C library interface to sigsuspend just takes a pointer to
   1299       a signal mask but this system call has three arguments - the first
   1300       two don't appear to be used by the kernel and are always passed as
   1301       zero by glibc and the third is the first word of the signal mask
   1302       so only 32 signals are supported.
   1303 
   1304       In fact glibc normally uses rt_sigsuspend if it is available as
   1305       that takes a pointer to the signal mask so supports more signals.
   1306     */
   1307    *flags |= SfMayBlock;
   1308    PRINT("sys_sigsuspend ( %ld, %ld, %lu )", SARG1, SARG2, ARG3 );
   1309    PRE_REG_READ3(int, "sigsuspend",
   1310                  int, history0, int, history1,
   1311                  vki_old_sigset_t, mask);
   1312 }
   1313 
   1314 PRE(sys_vm86old)
   1315 {
   1316    PRINT("sys_vm86old ( %#lx )", ARG1);
   1317    PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
   1318    PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
   1319 }
   1320 
   1321 POST(sys_vm86old)
   1322 {
   1323    POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
   1324 }
   1325 
   1326 PRE(sys_vm86)
   1327 {
   1328    PRINT("sys_vm86 ( %lu, %#lx )", ARG1, ARG2);
   1329    PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
   1330    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
   1331       PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
   1332 }
   1333 
   1334 POST(sys_vm86)
   1335 {
   1336    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
   1337       POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
   1338 }
   1339 
   1340 
   1341 /* ---------------------------------------------------------------
   1342    PRE/POST wrappers for x86/Linux-variant specific syscalls
   1343    ------------------------------------------------------------ */
   1344 
   1345 PRE(sys_syscall223)
   1346 {
   1347    Int err;
   1348 
   1349    /* 223 is used by sys_bproc.  If we're not on a declared bproc
   1350       variant, fail in the usual way. */
   1351 
   1352    if (!KernelVariantiS(KernelVariant_bproc, VG_(clo_kernel_variant))) {
   1353       PRINT("non-existent syscall! (syscall 223)");
   1354       PRE_REG_READ0(long, "ni_syscall(223)");
   1355       SET_STATUS_Failure( VKI_ENOSYS );
   1356       return;
   1357    }
   1358 
   1359    err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
   1360                                            ARG4, ARG5, ARG6 );
   1361    if (err) {
   1362       SET_STATUS_Failure( err );
   1363       return;
   1364    }
   1365    /* Let it go through. */
   1366    *flags |= SfMayBlock; /* who knows?  play safe. */
   1367 }
   1368 
   1369 POST(sys_syscall223)
   1370 {
   1371    ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
   1372                                       ARG4, ARG5, ARG6 );
   1373 }
   1374 
   1375 #undef PRE
   1376 #undef POST
   1377 
   1378 
   1379 /* ---------------------------------------------------------------------
   1380    The x86/Linux syscall table
   1381    ------------------------------------------------------------------ */
   1382 
   1383 /* Add an x86-linux specific wrapper to a syscall table. */
   1384 #define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(x86_linux, sysno, name)
   1385 #define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(x86_linux, sysno, name)
   1386 
   1387 
   1388 // This table maps from __NR_xxx syscall numbers (from
   1389 // linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
   1390 // wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
   1391 //
   1392 // For those syscalls not handled by Valgrind, the annotation indicate its
   1393 // arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
   1394 // (unknown).
   1395 
   1396 static SyscallTableEntry syscall_table[] = {
   1397 //zz    //   (restart_syscall)                             // 0
   1398    GENX_(__NR_exit,              sys_exit),           // 1
   1399    GENX_(__NR_fork,              sys_fork),           // 2
   1400    GENXY(__NR_read,              sys_read),           // 3
   1401    GENX_(__NR_write,             sys_write),          // 4
   1402 
   1403    GENXY(__NR_open,              sys_open),           // 5
   1404    GENXY(__NR_close,             sys_close),          // 6
   1405    GENXY(__NR_waitpid,           sys_waitpid),        // 7
   1406    GENXY(__NR_creat,             sys_creat),          // 8
   1407    GENX_(__NR_link,              sys_link),           // 9
   1408 
   1409    GENX_(__NR_unlink,            sys_unlink),         // 10
   1410    GENX_(__NR_execve,            sys_execve),         // 11
   1411    GENX_(__NR_chdir,             sys_chdir),          // 12
   1412    GENXY(__NR_time,              sys_time),           // 13
   1413    GENX_(__NR_mknod,             sys_mknod),          // 14
   1414 
   1415    GENX_(__NR_chmod,             sys_chmod),          // 15
   1416 //zz    LINX_(__NR_lchown,            sys_lchown16),       // 16
   1417    GENX_(__NR_break,             sys_ni_syscall),     // 17
   1418 //zz    //   (__NR_oldstat,           sys_stat),           // 18 (obsolete)
   1419    LINX_(__NR_lseek,             sys_lseek),          // 19
   1420 
   1421    GENX_(__NR_getpid,            sys_getpid),         // 20
   1422    LINX_(__NR_mount,             sys_mount),          // 21
   1423    LINX_(__NR_umount,            sys_oldumount),      // 22
   1424    LINX_(__NR_setuid,            sys_setuid16),       // 23 ## P
   1425    LINX_(__NR_getuid,            sys_getuid16),       // 24 ## P
   1426 
   1427    LINX_(__NR_stime,             sys_stime),          // 25 * (SVr4,SVID,X/OPEN)
   1428    PLAXY(__NR_ptrace,            sys_ptrace),         // 26
   1429    GENX_(__NR_alarm,             sys_alarm),          // 27
   1430 //zz    //   (__NR_oldfstat,          sys_fstat),          // 28 * L -- obsolete
   1431    GENX_(__NR_pause,             sys_pause),          // 29
   1432 
   1433    LINX_(__NR_utime,             sys_utime),          // 30
   1434    GENX_(__NR_stty,              sys_ni_syscall),     // 31
   1435    GENX_(__NR_gtty,              sys_ni_syscall),     // 32
   1436    GENX_(__NR_access,            sys_access),         // 33
   1437    GENX_(__NR_nice,              sys_nice),           // 34
   1438 
   1439    GENX_(__NR_ftime,             sys_ni_syscall),     // 35
   1440    GENX_(__NR_sync,              sys_sync),           // 36
   1441    GENX_(__NR_kill,              sys_kill),           // 37
   1442    GENX_(__NR_rename,            sys_rename),         // 38
   1443    GENX_(__NR_mkdir,             sys_mkdir),          // 39
   1444 
   1445    GENX_(__NR_rmdir,             sys_rmdir),          // 40
   1446    GENXY(__NR_dup,               sys_dup),            // 41
   1447    LINXY(__NR_pipe,              sys_pipe),           // 42
   1448    GENXY(__NR_times,             sys_times),          // 43
   1449    GENX_(__NR_prof,              sys_ni_syscall),     // 44
   1450 //zz
   1451    GENX_(__NR_brk,               sys_brk),            // 45
   1452    LINX_(__NR_setgid,            sys_setgid16),       // 46
   1453    LINX_(__NR_getgid,            sys_getgid16),       // 47
   1454 //zz    //   (__NR_signal,            sys_signal),         // 48 */* (ANSI C)
   1455    LINX_(__NR_geteuid,           sys_geteuid16),      // 49
   1456 
   1457    LINX_(__NR_getegid,           sys_getegid16),      // 50
   1458    GENX_(__NR_acct,              sys_acct),           // 51
   1459    LINX_(__NR_umount2,           sys_umount),         // 52
   1460    GENX_(__NR_lock,              sys_ni_syscall),     // 53
   1461    LINXY(__NR_ioctl,             sys_ioctl),          // 54
   1462 
   1463    LINXY(__NR_fcntl,             sys_fcntl),          // 55
   1464    GENX_(__NR_mpx,               sys_ni_syscall),     // 56
   1465    GENX_(__NR_setpgid,           sys_setpgid),        // 57
   1466    GENX_(__NR_ulimit,            sys_ni_syscall),     // 58
   1467 //zz    //   (__NR_oldolduname,       sys_olduname),       // 59 Linux -- obsolete
   1468 //zz
   1469    GENX_(__NR_umask,             sys_umask),          // 60
   1470    GENX_(__NR_chroot,            sys_chroot),         // 61
   1471 //zz    //   (__NR_ustat,             sys_ustat)           // 62 SVr4 -- deprecated
   1472    GENXY(__NR_dup2,              sys_dup2),           // 63
   1473    GENX_(__NR_getppid,           sys_getppid),        // 64
   1474 
   1475    GENX_(__NR_getpgrp,           sys_getpgrp),        // 65
   1476    GENX_(__NR_setsid,            sys_setsid),         // 66
   1477    LINXY(__NR_sigaction,         sys_sigaction),      // 67
   1478 //zz    //   (__NR_sgetmask,          sys_sgetmask),       // 68 */* (ANSI C)
   1479 //zz    //   (__NR_ssetmask,          sys_ssetmask),       // 69 */* (ANSI C)
   1480 //zz
   1481    LINX_(__NR_setreuid,          sys_setreuid16),     // 70
   1482    LINX_(__NR_setregid,          sys_setregid16),     // 71
   1483    PLAX_(__NR_sigsuspend,        sys_sigsuspend),     // 72
   1484    LINXY(__NR_sigpending,        sys_sigpending),     // 73
   1485    GENX_(__NR_sethostname,       sys_sethostname),    // 74
   1486 //zz
   1487    GENX_(__NR_setrlimit,         sys_setrlimit),      // 75
   1488    GENXY(__NR_getrlimit,         sys_old_getrlimit),  // 76
   1489    GENXY(__NR_getrusage,         sys_getrusage),      // 77
   1490    GENXY(__NR_gettimeofday,      sys_gettimeofday),   // 78
   1491    GENX_(__NR_settimeofday,      sys_settimeofday),   // 79
   1492 
   1493    LINXY(__NR_getgroups,         sys_getgroups16),    // 80
   1494    LINX_(__NR_setgroups,         sys_setgroups16),    // 81
   1495    PLAX_(__NR_select,            old_select),         // 82
   1496    GENX_(__NR_symlink,           sys_symlink),        // 83
   1497 //zz    //   (__NR_oldlstat,          sys_lstat),          // 84 -- obsolete
   1498 //zz
   1499    GENX_(__NR_readlink,          sys_readlink),       // 85
   1500 //zz    //   (__NR_uselib,            sys_uselib),         // 86 */Linux
   1501 //zz    //   (__NR_swapon,            sys_swapon),         // 87 */Linux
   1502 //zz    //   (__NR_reboot,            sys_reboot),         // 88 */Linux
   1503 //zz    //   (__NR_readdir,           old_readdir),        // 89 -- superseded
   1504 //zz
   1505    PLAX_(__NR_mmap,              old_mmap),           // 90
   1506    GENXY(__NR_munmap,            sys_munmap),         // 91
   1507    GENX_(__NR_truncate,          sys_truncate),       // 92
   1508    GENX_(__NR_ftruncate,         sys_ftruncate),      // 93
   1509    GENX_(__NR_fchmod,            sys_fchmod),         // 94
   1510 
   1511    LINX_(__NR_fchown,            sys_fchown16),       // 95
   1512    GENX_(__NR_getpriority,       sys_getpriority),    // 96
   1513    GENX_(__NR_setpriority,       sys_setpriority),    // 97
   1514    GENX_(__NR_profil,            sys_ni_syscall),     // 98
   1515    GENXY(__NR_statfs,            sys_statfs),         // 99
   1516 
   1517    GENXY(__NR_fstatfs,           sys_fstatfs),        // 100
   1518    LINX_(__NR_ioperm,            sys_ioperm),         // 101
   1519    LINXY(__NR_socketcall,        sys_socketcall),     // 102 x86/Linux-only
   1520    LINXY(__NR_syslog,            sys_syslog),         // 103
   1521    GENXY(__NR_setitimer,         sys_setitimer),      // 104
   1522 
   1523    GENXY(__NR_getitimer,         sys_getitimer),      // 105
   1524    GENXY(__NR_stat,              sys_newstat),        // 106
   1525    GENXY(__NR_lstat,             sys_newlstat),       // 107
   1526    GENXY(__NR_fstat,             sys_newfstat),       // 108
   1527 //zz    //   (__NR_olduname,          sys_uname),          // 109 -- obsolete
   1528 //zz
   1529    GENX_(__NR_iopl,              sys_iopl),           // 110
   1530    LINX_(__NR_vhangup,           sys_vhangup),        // 111
   1531    GENX_(__NR_idle,              sys_ni_syscall),     // 112
   1532    PLAXY(__NR_vm86old,           sys_vm86old),        // 113 x86/Linux-only
   1533    GENXY(__NR_wait4,             sys_wait4),          // 114
   1534 //zz
   1535 //zz    //   (__NR_swapoff,           sys_swapoff),        // 115 */Linux
   1536    LINXY(__NR_sysinfo,           sys_sysinfo),        // 116
   1537    LINXY(__NR_ipc,               sys_ipc),            // 117
   1538    GENX_(__NR_fsync,             sys_fsync),          // 118
   1539    PLAX_(__NR_sigreturn,         sys_sigreturn),      // 119 ?/Linux
   1540 
   1541    PLAX_(__NR_clone,             sys_clone),          // 120
   1542 //zz    //   (__NR_setdomainname,     sys_setdomainname),  // 121 */*(?)
   1543    GENXY(__NR_uname,             sys_newuname),       // 122
   1544    PLAX_(__NR_modify_ldt,        sys_modify_ldt),     // 123
   1545    LINXY(__NR_adjtimex,          sys_adjtimex),       // 124
   1546 
   1547    GENXY(__NR_mprotect,          sys_mprotect),       // 125
   1548    LINXY(__NR_sigprocmask,       sys_sigprocmask),    // 126
   1549 //zz    // Nb: create_module() was removed 2.4-->2.6
   1550    GENX_(__NR_create_module,     sys_ni_syscall),     // 127
   1551    LINX_(__NR_init_module,       sys_init_module),    // 128
   1552    LINX_(__NR_delete_module,     sys_delete_module),  // 129
   1553 //zz
   1554 //zz    // Nb: get_kernel_syms() was removed 2.4-->2.6
   1555    GENX_(__NR_get_kernel_syms,   sys_ni_syscall),     // 130
   1556    LINX_(__NR_quotactl,          sys_quotactl),       // 131
   1557    GENX_(__NR_getpgid,           sys_getpgid),        // 132
   1558    GENX_(__NR_fchdir,            sys_fchdir),         // 133
   1559 //zz    //   (__NR_bdflush,           sys_bdflush),        // 134 */Linux
   1560 //zz
   1561 //zz    //   (__NR_sysfs,             sys_sysfs),          // 135 SVr4
   1562    LINX_(__NR_personality,       sys_personality),    // 136
   1563    GENX_(__NR_afs_syscall,       sys_ni_syscall),     // 137
   1564    LINX_(__NR_setfsuid,          sys_setfsuid16),     // 138
   1565    LINX_(__NR_setfsgid,          sys_setfsgid16),     // 139
   1566 
   1567    LINXY(__NR__llseek,           sys_llseek),         // 140
   1568    GENXY(__NR_getdents,          sys_getdents),       // 141
   1569    GENX_(__NR__newselect,        sys_select),         // 142
   1570    GENX_(__NR_flock,             sys_flock),          // 143
   1571    GENX_(__NR_msync,             sys_msync),          // 144
   1572 
   1573    GENXY(__NR_readv,             sys_readv),          // 145
   1574    GENX_(__NR_writev,            sys_writev),         // 146
   1575    GENX_(__NR_getsid,            sys_getsid),         // 147
   1576    GENX_(__NR_fdatasync,         sys_fdatasync),      // 148
   1577    LINXY(__NR__sysctl,           sys_sysctl),         // 149
   1578 
   1579    GENX_(__NR_mlock,             sys_mlock),          // 150
   1580    GENX_(__NR_munlock,           sys_munlock),        // 151
   1581    GENX_(__NR_mlockall,          sys_mlockall),       // 152
   1582    LINX_(__NR_munlockall,        sys_munlockall),     // 153
   1583    LINXY(__NR_sched_setparam,    sys_sched_setparam), // 154
   1584 
   1585    LINXY(__NR_sched_getparam,         sys_sched_getparam),        // 155
   1586    LINX_(__NR_sched_setscheduler,     sys_sched_setscheduler),    // 156
   1587    LINX_(__NR_sched_getscheduler,     sys_sched_getscheduler),    // 157
   1588    LINX_(__NR_sched_yield,            sys_sched_yield),           // 158
   1589    LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
   1590 
   1591    LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
   1592    LINXY(__NR_sched_rr_get_interval,  sys_sched_rr_get_interval), // 161
   1593    GENXY(__NR_nanosleep,         sys_nanosleep),      // 162
   1594    GENX_(__NR_mremap,            sys_mremap),         // 163
   1595    LINX_(__NR_setresuid,         sys_setresuid16),    // 164
   1596 
   1597    LINXY(__NR_getresuid,         sys_getresuid16),    // 165
   1598    PLAXY(__NR_vm86,              sys_vm86),           // 166 x86/Linux-only
   1599    GENX_(__NR_query_module,      sys_ni_syscall),     // 167
   1600    GENXY(__NR_poll,              sys_poll),           // 168
   1601 //zz    //   (__NR_nfsservctl,        sys_nfsservctl),     // 169 */Linux
   1602 //zz
   1603    LINX_(__NR_setresgid,         sys_setresgid16),    // 170
   1604    LINXY(__NR_getresgid,         sys_getresgid16),    // 171
   1605    LINXY(__NR_prctl,             sys_prctl),          // 172
   1606    PLAX_(__NR_rt_sigreturn,      sys_rt_sigreturn),   // 173 x86/Linux only?
   1607    LINXY(__NR_rt_sigaction,      sys_rt_sigaction),   // 174
   1608 
   1609    LINXY(__NR_rt_sigprocmask,    sys_rt_sigprocmask), // 175
   1610    LINXY(__NR_rt_sigpending,     sys_rt_sigpending),  // 176
   1611    LINXY(__NR_rt_sigtimedwait,   sys_rt_sigtimedwait),// 177
   1612    LINXY(__NR_rt_sigqueueinfo,   sys_rt_sigqueueinfo),// 178
   1613    LINX_(__NR_rt_sigsuspend,     sys_rt_sigsuspend),  // 179
   1614 
   1615    GENXY(__NR_pread64,           sys_pread64),        // 180
   1616    GENX_(__NR_pwrite64,          sys_pwrite64),       // 181
   1617    LINX_(__NR_chown,             sys_chown16),        // 182
   1618    GENXY(__NR_getcwd,            sys_getcwd),         // 183
   1619    LINXY(__NR_capget,            sys_capget),         // 184
   1620 
   1621    LINX_(__NR_capset,            sys_capset),         // 185
   1622    GENXY(__NR_sigaltstack,       sys_sigaltstack),    // 186
   1623    LINXY(__NR_sendfile,          sys_sendfile),       // 187
   1624    GENXY(__NR_getpmsg,           sys_getpmsg),        // 188
   1625    GENX_(__NR_putpmsg,           sys_putpmsg),        // 189
   1626 
   1627    // Nb: we treat vfork as fork
   1628    GENX_(__NR_vfork,             sys_fork),           // 190
   1629    GENXY(__NR_ugetrlimit,        sys_getrlimit),      // 191
   1630    PLAX_(__NR_mmap2,             sys_mmap2),          // 192
   1631    GENX_(__NR_truncate64,        sys_truncate64),     // 193
   1632    GENX_(__NR_ftruncate64,       sys_ftruncate64),    // 194
   1633 
   1634    PLAXY(__NR_stat64,            sys_stat64),         // 195
   1635    PLAXY(__NR_lstat64,           sys_lstat64),        // 196
   1636    PLAXY(__NR_fstat64,           sys_fstat64),        // 197
   1637    GENX_(__NR_lchown32,          sys_lchown),         // 198
   1638    GENX_(__NR_getuid32,          sys_getuid),         // 199
   1639 
   1640    GENX_(__NR_getgid32,          sys_getgid),         // 200
   1641    GENX_(__NR_geteuid32,         sys_geteuid),        // 201
   1642    GENX_(__NR_getegid32,         sys_getegid),        // 202
   1643    GENX_(__NR_setreuid32,        sys_setreuid),       // 203
   1644    GENX_(__NR_setregid32,        sys_setregid),       // 204
   1645 
   1646    GENXY(__NR_getgroups32,       sys_getgroups),      // 205
   1647    GENX_(__NR_setgroups32,       sys_setgroups),      // 206
   1648    GENX_(__NR_fchown32,          sys_fchown),         // 207
   1649    LINX_(__NR_setresuid32,       sys_setresuid),      // 208
   1650    LINXY(__NR_getresuid32,       sys_getresuid),      // 209
   1651 
   1652    LINX_(__NR_setresgid32,       sys_setresgid),      // 210
   1653    LINXY(__NR_getresgid32,       sys_getresgid),      // 211
   1654    GENX_(__NR_chown32,           sys_chown),          // 212
   1655    GENX_(__NR_setuid32,          sys_setuid),         // 213
   1656    GENX_(__NR_setgid32,          sys_setgid),         // 214
   1657 
   1658    LINX_(__NR_setfsuid32,        sys_setfsuid),       // 215
   1659    LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
   1660    LINX_(__NR_pivot_root,        sys_pivot_root),     // 217
   1661    GENXY(__NR_mincore,           sys_mincore),        // 218
   1662    GENX_(__NR_madvise,           sys_madvise),        // 219
   1663 
   1664    GENXY(__NR_getdents64,        sys_getdents64),     // 220
   1665    LINXY(__NR_fcntl64,           sys_fcntl64),        // 221
   1666    GENX_(222,                    sys_ni_syscall),     // 222
   1667    PLAXY(223,                    sys_syscall223),     // 223 // sys_bproc?
   1668    LINX_(__NR_gettid,            sys_gettid),         // 224
   1669 
   1670    LINX_(__NR_readahead,         sys_readahead),      // 225 */Linux
   1671    LINX_(__NR_setxattr,          sys_setxattr),       // 226
   1672    LINX_(__NR_lsetxattr,         sys_lsetxattr),      // 227
   1673    LINX_(__NR_fsetxattr,         sys_fsetxattr),      // 228
   1674    LINXY(__NR_getxattr,          sys_getxattr),       // 229
   1675 
   1676    LINXY(__NR_lgetxattr,         sys_lgetxattr),      // 230
   1677    LINXY(__NR_fgetxattr,         sys_fgetxattr),      // 231
   1678    LINXY(__NR_listxattr,         sys_listxattr),      // 232
   1679    LINXY(__NR_llistxattr,        sys_llistxattr),     // 233
   1680    LINXY(__NR_flistxattr,        sys_flistxattr),     // 234
   1681 
   1682    LINX_(__NR_removexattr,       sys_removexattr),    // 235
   1683    LINX_(__NR_lremovexattr,      sys_lremovexattr),   // 236
   1684    LINX_(__NR_fremovexattr,      sys_fremovexattr),   // 237
   1685    LINXY(__NR_tkill,             sys_tkill),          // 238 */Linux
   1686    LINXY(__NR_sendfile64,        sys_sendfile64),     // 239
   1687 
   1688    LINXY(__NR_futex,             sys_futex),             // 240
   1689    LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
   1690    LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
   1691    PLAX_(__NR_set_thread_area,   sys_set_thread_area),   // 243
   1692    PLAX_(__NR_get_thread_area,   sys_get_thread_area),   // 244
   1693 
   1694    LINXY(__NR_io_setup,          sys_io_setup),       // 245
   1695    LINX_(__NR_io_destroy,        sys_io_destroy),     // 246
   1696    LINXY(__NR_io_getevents,      sys_io_getevents),   // 247
   1697    LINX_(__NR_io_submit,         sys_io_submit),      // 248
   1698    LINXY(__NR_io_cancel,         sys_io_cancel),      // 249
   1699 
   1700    LINX_(__NR_fadvise64,         sys_fadvise64),      // 250 */(Linux?)
   1701    GENX_(251,                    sys_ni_syscall),     // 251
   1702    LINX_(__NR_exit_group,        sys_exit_group),     // 252
   1703    LINXY(__NR_lookup_dcookie,    sys_lookup_dcookie), // 253
   1704    LINXY(__NR_epoll_create,      sys_epoll_create),   // 254
   1705 
   1706    LINX_(__NR_epoll_ctl,         sys_epoll_ctl),         // 255
   1707    LINXY(__NR_epoll_wait,        sys_epoll_wait),        // 256
   1708 //zz    //   (__NR_remap_file_pages,  sys_remap_file_pages),  // 257 */Linux
   1709    LINX_(__NR_set_tid_address,   sys_set_tid_address),   // 258
   1710    LINXY(__NR_timer_create,      sys_timer_create),      // 259
   1711 
   1712    LINXY(__NR_timer_settime,     sys_timer_settime),  // (timer_create+1)
   1713    LINXY(__NR_timer_gettime,     sys_timer_gettime),  // (timer_create+2)
   1714    LINX_(__NR_timer_getoverrun,  sys_timer_getoverrun),//(timer_create+3)
   1715    LINX_(__NR_timer_delete,      sys_timer_delete),   // (timer_create+4)
   1716    LINX_(__NR_clock_settime,     sys_clock_settime),  // (timer_create+5)
   1717 
   1718    LINXY(__NR_clock_gettime,     sys_clock_gettime),  // (timer_create+6)
   1719    LINXY(__NR_clock_getres,      sys_clock_getres),   // (timer_create+7)
   1720    LINXY(__NR_clock_nanosleep,   sys_clock_nanosleep),// (timer_create+8) */*
   1721    GENXY(__NR_statfs64,          sys_statfs64),       // 268
   1722    GENXY(__NR_fstatfs64,         sys_fstatfs64),      // 269
   1723 
   1724    LINX_(__NR_tgkill,            sys_tgkill),         // 270 */Linux
   1725    GENX_(__NR_utimes,            sys_utimes),         // 271
   1726    LINX_(__NR_fadvise64_64,      sys_fadvise64_64),   // 272 */(Linux?)
   1727    GENX_(__NR_vserver,           sys_ni_syscall),     // 273
   1728    LINX_(__NR_mbind,             sys_mbind),          // 274 ?/?
   1729 
   1730    LINXY(__NR_get_mempolicy,     sys_get_mempolicy),  // 275 ?/?
   1731    LINX_(__NR_set_mempolicy,     sys_set_mempolicy),  // 276 ?/?
   1732    LINXY(__NR_mq_open,           sys_mq_open),        // 277
   1733    LINX_(__NR_mq_unlink,         sys_mq_unlink),      // (mq_open+1)
   1734    LINX_(__NR_mq_timedsend,      sys_mq_timedsend),   // (mq_open+2)
   1735 
   1736    LINXY(__NR_mq_timedreceive,   sys_mq_timedreceive),// (mq_open+3)
   1737    LINX_(__NR_mq_notify,         sys_mq_notify),      // (mq_open+4)
   1738    LINXY(__NR_mq_getsetattr,     sys_mq_getsetattr),  // (mq_open+5)
   1739    GENX_(__NR_sys_kexec_load,    sys_ni_syscall),     // 283
   1740    LINXY(__NR_waitid,            sys_waitid),         // 284
   1741 
   1742    GENX_(285,                    sys_ni_syscall),     // 285
   1743    LINX_(__NR_add_key,           sys_add_key),        // 286
   1744    LINX_(__NR_request_key,       sys_request_key),    // 287
   1745    LINXY(__NR_keyctl,            sys_keyctl),         // 288
   1746    LINX_(__NR_ioprio_set,        sys_ioprio_set),     // 289
   1747 
   1748    LINX_(__NR_ioprio_get,        sys_ioprio_get),     // 290
   1749    LINX_(__NR_inotify_init,	 sys_inotify_init),   // 291
   1750    LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
   1751    LINX_(__NR_inotify_rm_watch,	 sys_inotify_rm_watch), // 293
   1752 //   LINX_(__NR_migrate_pages,	 sys_migrate_pages),    // 294
   1753 
   1754    LINXY(__NR_openat,		 sys_openat),           // 295
   1755    LINX_(__NR_mkdirat,		 sys_mkdirat),          // 296
   1756    LINX_(__NR_mknodat,		 sys_mknodat),          // 297
   1757    LINX_(__NR_fchownat,		 sys_fchownat),         // 298
   1758    LINX_(__NR_futimesat,	 sys_futimesat),        // 299
   1759 
   1760    PLAXY(__NR_fstatat64,	 sys_fstatat64),        // 300
   1761    LINX_(__NR_unlinkat,		 sys_unlinkat),         // 301
   1762    LINX_(__NR_renameat,		 sys_renameat),         // 302
   1763    LINX_(__NR_linkat,		 sys_linkat),           // 303
   1764    LINX_(__NR_symlinkat,	 sys_symlinkat),        // 304
   1765 
   1766    LINX_(__NR_readlinkat,	 sys_readlinkat),       // 305
   1767    LINX_(__NR_fchmodat,		 sys_fchmodat),         // 306
   1768    LINX_(__NR_faccessat,	 sys_faccessat),        // 307
   1769    LINX_(__NR_pselect6,		 sys_pselect6),         // 308
   1770    LINXY(__NR_ppoll,		 sys_ppoll),            // 309
   1771 
   1772    LINX_(__NR_unshare,		 sys_unshare),          // 310
   1773    LINX_(__NR_set_robust_list,	 sys_set_robust_list),  // 311
   1774    LINXY(__NR_get_robust_list,	 sys_get_robust_list),  // 312
   1775    LINX_(__NR_splice,            sys_splice),           // 313
   1776    LINX_(__NR_sync_file_range,   sys_sync_file_range),  // 314
   1777 
   1778    LINX_(__NR_tee,               sys_tee),              // 315
   1779    LINXY(__NR_vmsplice,          sys_vmsplice),         // 316
   1780    LINXY(__NR_move_pages,        sys_move_pages),       // 317
   1781    LINXY(__NR_getcpu,            sys_getcpu),           // 318
   1782    LINXY(__NR_epoll_pwait,       sys_epoll_pwait),      // 319
   1783 
   1784    LINX_(__NR_utimensat,         sys_utimensat),        // 320
   1785    LINXY(__NR_signalfd,          sys_signalfd),         // 321
   1786    LINXY(__NR_timerfd_create,    sys_timerfd_create),   // 322
   1787    LINXY(__NR_eventfd,           sys_eventfd),          // 323
   1788    LINX_(__NR_fallocate,         sys_fallocate),        // 324
   1789 
   1790    LINXY(__NR_timerfd_settime,   sys_timerfd_settime),  // 325
   1791    LINXY(__NR_timerfd_gettime,   sys_timerfd_gettime),  // 326
   1792    LINXY(__NR_signalfd4,         sys_signalfd4),        // 327
   1793    LINXY(__NR_eventfd2,          sys_eventfd2),         // 328
   1794    LINXY(__NR_epoll_create1,     sys_epoll_create1),     // 329
   1795 
   1796    LINXY(__NR_dup3,              sys_dup3),             // 330
   1797    LINXY(__NR_pipe2,             sys_pipe2),            // 331
   1798    LINXY(__NR_inotify_init1,     sys_inotify_init1),    // 332
   1799    LINXY(__NR_preadv,            sys_preadv),           // 333
   1800    LINX_(__NR_pwritev,           sys_pwritev),          // 334
   1801 
   1802    LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
   1803    LINXY(__NR_perf_event_open,   sys_perf_event_open),  // 336
   1804    LINXY(__NR_recvmmsg,          sys_recvmmsg),         // 337
   1805    LINXY(__NR_fanotify_init,     sys_fanotify_init),    // 338
   1806    LINX_(__NR_fanotify_mark,     sys_fanotify_mark),    // 339
   1807 
   1808    LINXY(__NR_prlimit64,         sys_prlimit64),        // 340
   1809    LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 341
   1810    LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),// 342
   1811    LINXY(__NR_clock_adjtime,     sys_clock_adjtime),    // 343
   1812    LINX_(__NR_syncfs,            sys_syncfs),           // 344
   1813 
   1814    LINXY(__NR_sendmmsg,          sys_sendmmsg),         // 345
   1815 //   LINX_(__NR_setns,             sys_ni_syscall),       // 346
   1816    LINXY(__NR_process_vm_readv,  sys_process_vm_readv), // 347
   1817    LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 348
   1818    LINX_(__NR_kcmp,              sys_kcmp),             // 349
   1819 
   1820 //   LIN__(__NR_finit_module,      sys_ni_syscall),       // 350
   1821 //   LIN__(__NR_sched_setattr,     sys_ni_syscall),       // 351
   1822 //   LIN__(__NR_sched_getattr,     sys_ni_syscall),       // 352
   1823 //   LIN__(__NR_renameat2,         sys_ni_syscall),       // 353
   1824 //   LIN__(__NR_seccomp,           sys_ni_syscall),       // 354
   1825 
   1826    LINXY(__NR_getrandom,         sys_getrandom),        // 355
   1827    LINXY(__NR_memfd_create,      sys_memfd_create)      // 356
   1828 //   LIN__(__NR_bpf,               sys_ni_syscall)        // 357
   1829 };
   1830 
   1831 SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
   1832 {
   1833    const UInt syscall_table_size
   1834       = sizeof(syscall_table) / sizeof(syscall_table[0]);
   1835 
   1836    /* Is it in the contiguous initial section of the table? */
   1837    if (sysno < syscall_table_size) {
   1838       SyscallTableEntry* sys = &syscall_table[sysno];
   1839       if (sys->before == NULL)
   1840          return NULL; /* no entry */
   1841       else
   1842          return sys;
   1843    }
   1844 
   1845    /* Can't find a wrapper */
   1846    return NULL;
   1847 }
   1848 
   1849 #endif // defined(VGP_x86_linux)
   1850 
   1851 /*--------------------------------------------------------------------*/
   1852 /*--- end                                                          ---*/
   1853 /*--------------------------------------------------------------------*/
   1854