Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Platform-specific syscalls stuff.        syswrap-x86-linux.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2013 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #if defined(VGP_x86_linux)
     32 
     33 /* TODO/FIXME jrs 20050207: assignments to the syscall return result
     34    in interrupted_syscall() need to be reviewed.  They don't seem
     35    to assign the shadow state.
     36 */
     37 
     38 #include "pub_core_basics.h"
     39 #include "pub_core_vki.h"
     40 #include "pub_core_vkiscnums.h"
     41 #include "pub_core_threadstate.h"
     42 #include "pub_core_aspacemgr.h"
     43 #include "pub_core_debuglog.h"
     44 #include "pub_core_libcbase.h"
     45 #include "pub_core_libcassert.h"
     46 #include "pub_core_libcprint.h"
     47 #include "pub_core_libcproc.h"
     48 #include "pub_core_libcsignal.h"
     49 #include "pub_core_mallocfree.h"
     50 #include "pub_core_options.h"
     51 #include "pub_core_scheduler.h"
     52 #include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
     53 #include "pub_core_signals.h"
     54 #include "pub_core_syscall.h"
     55 #include "pub_core_syswrap.h"
     56 #include "pub_core_tooliface.h"
     57 
     58 #include "priv_types_n_macros.h"
     59 #include "priv_syswrap-generic.h"    /* for decls of generic wrappers */
     60 #include "priv_syswrap-linux.h"      /* for decls of linux-ish wrappers */
     61 #include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
     62 #include "priv_syswrap-main.h"
     63 
     64 
     65 /* ---------------------------------------------------------------------
     66    clone() handling
     67    ------------------------------------------------------------------ */
     68 
     69 /* Call f(arg1), but first switch stacks, using 'stack' as the new
     70    stack, and use 'retaddr' as f's return-to address.  Also, clear all
     71    the integer registers before entering f.*/
     72 __attribute__((noreturn))
     73 void ML_(call_on_new_stack_0_1) ( Addr stack,
     74 			          Addr retaddr,
     75 			          void (*f)(Word),
     76                                   Word arg1 );
     77 //  4(%esp) == stack
     78 //  8(%esp) == retaddr
     79 // 12(%esp) == f
     80 // 16(%esp) == arg1
     81 asm(
     82 ".text\n"
     83 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
     84 "vgModuleLocal_call_on_new_stack_0_1:\n"
     85 "   movl %esp, %esi\n"     // remember old stack pointer
     86 "   movl 4(%esi), %esp\n"  // set stack
     87 "   pushl 16(%esi)\n"      // arg1 to stack
     88 "   pushl  8(%esi)\n"      // retaddr to stack
     89 "   pushl 12(%esi)\n"      // f to stack
     90 "   movl $0, %eax\n"       // zero all GP regs
     91 "   movl $0, %ebx\n"
     92 "   movl $0, %ecx\n"
     93 "   movl $0, %edx\n"
     94 "   movl $0, %esi\n"
     95 "   movl $0, %edi\n"
     96 "   movl $0, %ebp\n"
     97 "   ret\n"                 // jump to f
     98 "   ud2\n"                 // should never get here
     99 ".previous\n"
    100 );
    101 
    102 
    103 /*
    104         Perform a clone system call.  clone is strange because it has
    105         fork()-like return-twice semantics, so it needs special
    106         handling here.
    107 
    108         Upon entry, we have:
    109 
    110             int (fn)(void*)     in  0+FSZ(%esp)
    111             void* child_stack   in  4+FSZ(%esp)
    112             int flags           in  8+FSZ(%esp)
    113             void* arg           in 12+FSZ(%esp)
    114             pid_t* child_tid    in 16+FSZ(%esp)
    115             pid_t* parent_tid   in 20+FSZ(%esp)
    116             void* tls_ptr       in 24+FSZ(%esp)
    117 
    118         System call requires:
    119 
    120             int    $__NR_clone  in %eax
    121             int    flags        in %ebx
    122             void*  child_stack  in %ecx
    123             pid_t* parent_tid   in %edx
    124             pid_t* child_tid    in %edi
    125             void*  tls_ptr      in %esi
    126 
    127 	Returns an Int encoded in the linux-x86 way, not a SysRes.
    128  */
    129 #define FSZ               "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
    130 #define __NR_CLONE        VG_STRINGIFY(__NR_clone)
    131 #define __NR_EXIT         VG_STRINGIFY(__NR_exit)
    132 
    133 extern
    134 Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
    135                                  void* stack,
    136                                  Int   flags,
    137                                  void* arg,
    138                                  Int*  child_tid,
    139                                  Int*  parent_tid,
    140                                  vki_modify_ldt_t * );
    141 asm(
    142 ".text\n"
    143 ".globl do_syscall_clone_x86_linux\n"
    144 "do_syscall_clone_x86_linux:\n"
    145 "        push    %ebx\n"
    146 "        push    %edi\n"
    147 "        push    %esi\n"
    148 
    149          /* set up child stack with function and arg */
    150 "        movl     4+"FSZ"(%esp), %ecx\n"    /* syscall arg2: child stack */
    151 "        movl    12+"FSZ"(%esp), %ebx\n"    /* fn arg */
    152 "        movl     0+"FSZ"(%esp), %eax\n"    /* fn */
    153 "        lea     -8(%ecx), %ecx\n"          /* make space on stack */
    154 "        movl    %ebx, 4(%ecx)\n"           /*   fn arg */
    155 "        movl    %eax, 0(%ecx)\n"           /*   fn */
    156 
    157          /* get other args to clone */
    158 "        movl     8+"FSZ"(%esp), %ebx\n"    /* syscall arg1: flags */
    159 "        movl    20+"FSZ"(%esp), %edx\n"    /* syscall arg3: parent tid * */
    160 "        movl    16+"FSZ"(%esp), %edi\n"    /* syscall arg5: child tid * */
    161 "        movl    24+"FSZ"(%esp), %esi\n"    /* syscall arg4: tls_ptr * */
    162 "        movl    $"__NR_CLONE", %eax\n"
    163 "        int     $0x80\n"                   /* clone() */
    164 "        testl   %eax, %eax\n"              /* child if retval == 0 */
    165 "        jnz     1f\n"
    166 
    167          /* CHILD - call thread function */
    168 "        popl    %eax\n"
    169 "        call    *%eax\n"                   /* call fn */
    170 
    171          /* exit with result */
    172 "        movl    %eax, %ebx\n"              /* arg1: return value from fn */
    173 "        movl    $"__NR_EXIT", %eax\n"
    174 "        int     $0x80\n"
    175 
    176          /* Hm, exit returned */
    177 "        ud2\n"
    178 
    179 "1:\n"   /* PARENT or ERROR */
    180 "        pop     %esi\n"
    181 "        pop     %edi\n"
    182 "        pop     %ebx\n"
    183 "        ret\n"
    184 ".previous\n"
    185 );
    186 
    187 #undef FSZ
    188 #undef __NR_CLONE
    189 #undef __NR_EXIT
    190 
    191 
    192 // forward declarations
    193 static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
    194 static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
    195 
    196 /*
    197    When a client clones, we need to keep track of the new thread.  This means:
    198    1. allocate a ThreadId+ThreadState+stack for the the thread
    199 
    200    2. initialize the thread's new VCPU state
    201 
    202    3. create the thread using the same args as the client requested,
    203    but using the scheduler entrypoint for EIP, and a separate stack
    204    for ESP.
    205  */
    206 static SysRes do_clone ( ThreadId ptid,
    207                          UInt flags, Addr esp,
    208                          Int* parent_tidptr,
    209                          Int* child_tidptr,
    210                          vki_modify_ldt_t *tlsinfo)
    211 {
    212    static const Bool debug = False;
    213 
    214    ThreadId     ctid = VG_(alloc_ThreadState)();
    215    ThreadState* ptst = VG_(get_ThreadState)(ptid);
    216    ThreadState* ctst = VG_(get_ThreadState)(ctid);
    217    UWord*       stack;
    218    SysRes       res;
    219    Int          eax;
    220    vki_sigset_t blockall, savedmask;
    221 
    222    VG_(sigfillset)(&blockall);
    223 
    224    vg_assert(VG_(is_running_thread)(ptid));
    225    vg_assert(VG_(is_valid_tid)(ctid));
    226 
    227    stack = (UWord*)ML_(allocstack)(ctid);
    228    if (stack == NULL) {
    229       res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
    230       goto out;
    231    }
    232 
    233    /* Copy register state
    234 
    235       Both parent and child return to the same place, and the code
    236       following the clone syscall works out which is which, so we
    237       don't need to worry about it.
    238 
    239       The parent gets the child's new tid returned from clone, but the
    240       child gets 0.
    241 
    242       If the clone call specifies a NULL esp for the new thread, then
    243       it actually gets a copy of the parent's esp.
    244    */
    245    /* Note: the clone call done by the Quadrics Elan3 driver specifies
    246       clone flags of 0xF00, and it seems to rely on the assumption
    247       that the child inherits a copy of the parent's GDT.
    248       setup_child takes care of setting that up. */
    249    setup_child( &ctst->arch, &ptst->arch, True );
    250 
    251    /* Make sys_clone appear to have returned Success(0) in the
    252       child. */
    253    ctst->arch.vex.guest_EAX = 0;
    254 
    255    if (esp != 0)
    256       ctst->arch.vex.guest_ESP = esp;
    257 
    258    ctst->os_state.parent = ptid;
    259 
    260    /* inherit signal mask */
    261    ctst->sig_mask     = ptst->sig_mask;
    262    ctst->tmp_sig_mask = ptst->sig_mask;
    263 
    264    /* Start the child with its threadgroup being the same as the
    265       parent's.  This is so that any exit_group calls that happen
    266       after the child is created but before it sets its
    267       os_state.threadgroup field for real (in thread_wrapper in
    268       syswrap-linux.c), really kill the new thread.  a.k.a this avoids
    269       a race condition in which the thread is unkillable (via
    270       exit_group) because its threadgroup is not set.  The race window
    271       is probably only a few hundred or a few thousand cycles long.
    272       See #226116. */
    273    ctst->os_state.threadgroup = ptst->os_state.threadgroup;
    274 
    275    ML_(guess_and_register_stack) (esp, ctst);
    276 
    277    /* Assume the clone will succeed, and tell any tool that wants to
    278       know that this thread has come into existence.  We cannot defer
    279       it beyond this point because sys_set_thread_area, just below,
    280       causes tCheck to assert by making references to the new ThreadId
    281       if we don't state the new thread exists prior to that point.
    282       If the clone fails, we'll send out a ll_exit notification for it
    283       at the out: label below, to clean up. */
    284    vg_assert(VG_(owns_BigLock_LL)(ptid));
    285    VG_TRACK ( pre_thread_ll_create, ptid, ctid );
    286 
    287    if (flags & VKI_CLONE_SETTLS) {
    288       if (debug)
    289 	 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%d "
    290                      "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
    291 		     tlsinfo, tlsinfo->entry_number,
    292                      tlsinfo->base_addr, tlsinfo->limit,
    293 		     ptst->arch.vex.guest_ESP,
    294 		     ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
    295       res = sys_set_thread_area(ctid, tlsinfo);
    296       if (sr_isError(res))
    297 	 goto out;
    298    }
    299 
    300    flags &= ~VKI_CLONE_SETTLS;
    301 
    302    /* start the thread with everything blocked */
    303    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
    304 
    305    /* Create the new thread */
    306    eax = do_syscall_clone_x86_linux(
    307             ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
    308             child_tidptr, parent_tidptr, NULL
    309          );
    310    res = VG_(mk_SysRes_x86_linux)( eax );
    311 
    312    VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
    313 
    314   out:
    315    if (sr_isError(res)) {
    316       /* clone failed */
    317       VG_(cleanup_thread)(&ctst->arch);
    318       ctst->status = VgTs_Empty;
    319       /* oops.  Better tell the tool the thread exited in a hurry :-) */
    320       VG_TRACK( pre_thread_ll_exit, ctid );
    321    }
    322 
    323    return res;
    324 }
    325 
    326 
    327 /* ---------------------------------------------------------------------
    328    LDT/GDT simulation
    329    ------------------------------------------------------------------ */
    330 
    331 /* Details of the LDT simulation
    332    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    333 
    334    When a program runs natively, the linux kernel allows each *thread*
    335    in it to have its own LDT.  Almost all programs never do this --
    336    it's wildly unportable, after all -- and so the kernel never
    337    allocates the structure, which is just as well as an LDT occupies
    338    64k of memory (8192 entries of size 8 bytes).
    339 
    340    A thread may choose to modify its LDT entries, by doing the
    341    __NR_modify_ldt syscall.  In such a situation the kernel will then
    342    allocate an LDT structure for it.  Each LDT entry is basically a
    343    (base, limit) pair.  A virtual address in a specific segment is
    344    translated to a linear address by adding the segment's base value.
    345    In addition, the virtual address must not exceed the limit value.
    346 
    347    To use an LDT entry, a thread loads one of the segment registers
    348    (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
    349    .. 8191) it wants to use.  In fact, the required value is (index <<
    350    3) + 7, but that's not important right now.  Any normal instruction
    351    which includes an addressing mode can then be made relative to that
    352    LDT entry by prefixing the insn with a so-called segment-override
    353    prefix, a byte which indicates which of the 6 segment registers
    354    holds the LDT index.
    355 
    356    Now, a key constraint is that valgrind's address checks operate in
    357    terms of linear addresses.  So we have to explicitly translate
    358    virtual addrs into linear addrs, and that means doing a complete
    359    LDT simulation.
    360 
    361    Calls to modify_ldt are intercepted.  For each thread, we maintain
    362    an LDT (with the same normally-never-allocated optimisation that
    363    the kernel does).  This is updated as expected via calls to
    364    modify_ldt.
    365 
    366    When a thread does an amode calculation involving a segment
    367    override prefix, the relevant LDT entry for the thread is
    368    consulted.  It all works.
    369 
    370    There is a conceptual problem, which appears when switching back to
    371    native execution, either temporarily to pass syscalls to the
    372    kernel, or permanently, when debugging V.  Problem at such points
    373    is that it's pretty pointless to copy the simulated machine's
    374    segment registers to the real machine, because we'd also need to
    375    copy the simulated LDT into the real one, and that's prohibitively
    376    expensive.
    377 
    378    Fortunately it looks like no syscalls rely on the segment regs or
    379    LDT being correct, so we can get away with it.  Apart from that the
    380    simulation is pretty straightforward.  All 6 segment registers are
    381    tracked, although only %ds, %es, %fs and %gs are allowed as
    382    prefixes.  Perhaps it could be restricted even more than that -- I
    383    am not sure what is and isn't allowed in user-mode.
    384 */
    385 
    386 /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
    387    the Linux kernel's logic (cut-n-paste of code in
    388    linux/kernel/ldt.c).  */
    389 
    390 static
    391 void translate_to_hw_format ( /* IN  */ vki_modify_ldt_t* inn,
    392                               /* OUT */ VexGuestX86SegDescr* out,
    393                                         Int oldmode )
    394 {
    395    UInt entry_1, entry_2;
    396    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    397 
    398    if (0)
    399       VG_(printf)("translate_to_hw_format: base %#lx, limit %d\n",
    400                   inn->base_addr, inn->limit );
    401 
    402    /* Allow LDTs to be cleared by the user. */
    403    if (inn->base_addr == 0 && inn->limit == 0) {
    404       if (oldmode ||
    405           (inn->contents == 0      &&
    406            inn->read_exec_only == 1   &&
    407            inn->seg_32bit == 0      &&
    408            inn->limit_in_pages == 0   &&
    409            inn->seg_not_present == 1   &&
    410            inn->useable == 0 )) {
    411          entry_1 = 0;
    412          entry_2 = 0;
    413          goto install;
    414       }
    415    }
    416 
    417    entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
    418              (inn->limit & 0x0ffff);
    419    entry_2 = (inn->base_addr & 0xff000000) |
    420              ((inn->base_addr & 0x00ff0000) >> 16) |
    421              (inn->limit & 0xf0000) |
    422              ((inn->read_exec_only ^ 1) << 9) |
    423              (inn->contents << 10) |
    424              ((inn->seg_not_present ^ 1) << 15) |
    425              (inn->seg_32bit << 22) |
    426              (inn->limit_in_pages << 23) |
    427              0x7000;
    428    if (!oldmode)
    429       entry_2 |= (inn->useable << 20);
    430 
    431    /* Install the new entry ...  */
    432   install:
    433    out->LdtEnt.Words.word1 = entry_1;
    434    out->LdtEnt.Words.word2 = entry_2;
    435 }
    436 
    437 /* Create a zeroed-out GDT. */
    438 static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
    439 {
    440    Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
    441    return VG_(calloc)("di.syswrap-x86.azxG.1", nbytes, 1);
    442 }
    443 
    444 /* Create a zeroed-out LDT. */
    445 static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
    446 {
    447    Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
    448    return VG_(calloc)("di.syswrap-x86.azxL.1", nbytes, 1);
    449 }
    450 
    451 /* Free up an LDT or GDT allocated by the above fns. */
    452 static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
    453 {
    454    vg_assert(dt);
    455    VG_(free)(dt);
    456 }
    457 
    458 /* Copy contents between two existing LDTs. */
    459 static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
    460                                VexGuestX86SegDescr* dst )
    461 {
    462    Int i;
    463    vg_assert(src);
    464    vg_assert(dst);
    465    for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
    466       dst[i] = src[i];
    467 }
    468 
    469 /* Copy contents between two existing GDTs. */
    470 static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
    471                                VexGuestX86SegDescr* dst )
    472 {
    473    Int i;
    474    vg_assert(src);
    475    vg_assert(dst);
    476    for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
    477       dst[i] = src[i];
    478 }
    479 
    480 /* Free this thread's DTs, if it has any. */
    481 static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
    482 {
    483    vg_assert(sizeof(HWord) == sizeof(void*));
    484 
    485    if (0)
    486       VG_(printf)("deallocate_LGDTs_for_thread: "
    487                   "ldt = 0x%lx, gdt = 0x%lx\n",
    488                   vex->guest_LDT, vex->guest_GDT );
    489 
    490    if (vex->guest_LDT != (HWord)NULL) {
    491       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
    492       vex->guest_LDT = (HWord)NULL;
    493    }
    494 
    495    if (vex->guest_GDT != (HWord)NULL) {
    496       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
    497       vex->guest_GDT = (HWord)NULL;
    498    }
    499 }
    500 
    501 
    502 /*
    503  * linux/kernel/ldt.c
    504  *
    505  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
    506  * Copyright (C) 1999 Ingo Molnar <mingo (at) redhat.com>
    507  */
    508 
    509 /*
    510  * read_ldt() is not really atomic - this is not a problem since
    511  * synchronization of reads and writes done to the LDT has to be
    512  * assured by user-space anyway. Writes are atomic, to protect
    513  * the security checks done on new descriptors.
    514  */
    515 static
    516 SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
    517 {
    518    SysRes res;
    519    UInt   i, size;
    520    UChar* ldt;
    521 
    522    if (0)
    523       VG_(printf)("read_ldt: tid = %d, ptr = %p, bytecount = %d\n",
    524                   tid, ptr, bytecount );
    525 
    526    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    527    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    528 
    529    ldt = (UChar*)(VG_(threads)[tid].arch.vex.guest_LDT);
    530    res = VG_(mk_SysRes_Success)( 0 );
    531    if (ldt == NULL)
    532       /* LDT not allocated, meaning all entries are null */
    533       goto out;
    534 
    535    size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
    536    if (size > bytecount)
    537       size = bytecount;
    538 
    539    res = VG_(mk_SysRes_Success)( size );
    540    for (i = 0; i < size; i++)
    541       ptr[i] = ldt[i];
    542 
    543   out:
    544    return res;
    545 }
    546 
    547 
    548 static
    549 SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
    550 {
    551    SysRes res;
    552    VexGuestX86SegDescr* ldt;
    553    vki_modify_ldt_t* ldt_info;
    554 
    555    if (0)
    556       VG_(printf)("write_ldt: tid = %d, ptr = %p, "
    557                   "bytecount = %d, oldmode = %d\n",
    558                   tid, ptr, bytecount, oldmode );
    559 
    560    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    561    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    562 
    563    ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
    564    ldt_info = (vki_modify_ldt_t*)ptr;
    565 
    566    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
    567    if (bytecount != sizeof(vki_modify_ldt_t))
    568       goto out;
    569 
    570    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
    571    if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
    572       goto out;
    573    if (ldt_info->contents == 3) {
    574       if (oldmode)
    575          goto out;
    576       if (ldt_info->seg_not_present == 0)
    577          goto out;
    578    }
    579 
    580    /* If this thread doesn't have an LDT, we'd better allocate it
    581       now. */
    582    if (ldt == NULL) {
    583       ldt = alloc_zeroed_x86_LDT();
    584       VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
    585    }
    586 
    587    /* Install the new entry ...  */
    588    translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
    589    res = VG_(mk_SysRes_Success)( 0 );
    590 
    591   out:
    592    return res;
    593 }
    594 
    595 
    596 static SysRes sys_modify_ldt ( ThreadId tid,
    597                                Int func, void* ptr, UInt bytecount )
    598 {
    599    SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
    600 
    601    switch (func) {
    602    case 0:
    603       ret = read_ldt(tid, ptr, bytecount);
    604       break;
    605    case 1:
    606       ret = write_ldt(tid, ptr, bytecount, 1);
    607       break;
    608    case 2:
    609       VG_(unimplemented)("sys_modify_ldt: func == 2");
    610       /* god knows what this is about */
    611       /* ret = read_default_ldt(ptr, bytecount); */
    612       /*UNREACHED*/
    613       break;
    614    case 0x11:
    615       ret = write_ldt(tid, ptr, bytecount, 0);
    616       break;
    617    }
    618    return ret;
    619 }
    620 
    621 
    622 static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
    623 {
    624    Int                  idx;
    625    VexGuestX86SegDescr* gdt;
    626 
    627    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    628    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    629 
    630    if (info == NULL)
    631       return VG_(mk_SysRes_Error)( VKI_EFAULT );
    632 
    633    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
    634 
    635    /* If the thread doesn't have a GDT, allocate it now. */
    636    if (!gdt) {
    637       gdt = alloc_zeroed_x86_GDT();
    638       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
    639    }
    640 
    641    idx = info->entry_number;
    642 
    643    if (idx == -1) {
    644       /* Find and use the first free entry.  Don't allocate entry
    645          zero, because the hardware will never do that, and apparently
    646          doing so confuses some code (perhaps stuff running on
    647          Wine). */
    648       for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
    649          if (gdt[idx].LdtEnt.Words.word1 == 0
    650              && gdt[idx].LdtEnt.Words.word2 == 0)
    651             break;
    652       }
    653 
    654       if (idx == VEX_GUEST_X86_GDT_NENT)
    655          return VG_(mk_SysRes_Error)( VKI_ESRCH );
    656    } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
    657       /* Similarly, reject attempts to use GDT[0]. */
    658       return VG_(mk_SysRes_Error)( VKI_EINVAL );
    659    }
    660 
    661    translate_to_hw_format(info, &gdt[idx], 0);
    662 
    663    VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
    664              "set_thread_area(info->entry)",
    665              (Addr) & info->entry_number, sizeof(unsigned int) );
    666    info->entry_number = idx;
    667    VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
    668              (Addr) & info->entry_number, sizeof(unsigned int) );
    669 
    670    return VG_(mk_SysRes_Success)( 0 );
    671 }
    672 
    673 
    674 static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
    675 {
    676    Int idx;
    677    VexGuestX86SegDescr* gdt;
    678 
    679    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    680    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    681 
    682    if (info == NULL)
    683       return VG_(mk_SysRes_Error)( VKI_EFAULT );
    684 
    685    idx = info->entry_number;
    686 
    687    if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
    688       return VG_(mk_SysRes_Error)( VKI_EINVAL );
    689 
    690    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
    691 
    692    /* If the thread doesn't have a GDT, allocate it now. */
    693    if (!gdt) {
    694       gdt = alloc_zeroed_x86_GDT();
    695       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
    696    }
    697 
    698    info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
    699                      ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
    700                      gdt[idx].LdtEnt.Bits.BaseLow;
    701    info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
    702                    gdt[idx].LdtEnt.Bits.LimitLow;
    703    info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
    704    info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
    705    info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
    706    info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
    707    info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
    708    info->useable = gdt[idx].LdtEnt.Bits.Sys;
    709    info->reserved = 0;
    710 
    711    return VG_(mk_SysRes_Success)( 0 );
    712 }
    713 
    714 /* ---------------------------------------------------------------------
    715    More thread stuff
    716    ------------------------------------------------------------------ */
    717 
    718 void VG_(cleanup_thread) ( ThreadArchState* arch )
    719 {
    720    /* Release arch-specific resources held by this thread. */
    721    /* On x86, we have to dump the LDT and GDT. */
    722    deallocate_LGDTs_for_thread( &arch->vex );
    723 }
    724 
    725 
    726 static void setup_child ( /*OUT*/ ThreadArchState *child,
    727                           /*IN*/  ThreadArchState *parent,
    728                           Bool inherit_parents_GDT )
    729 {
    730    /* We inherit our parent's guest state. */
    731    child->vex = parent->vex;
    732    child->vex_shadow1 = parent->vex_shadow1;
    733    child->vex_shadow2 = parent->vex_shadow2;
    734 
    735    /* We inherit our parent's LDT. */
    736    if (parent->vex.guest_LDT == (HWord)NULL) {
    737       /* We hope this is the common case. */
    738       child->vex.guest_LDT = (HWord)NULL;
    739    } else {
    740       /* No luck .. we have to take a copy of the parent's. */
    741       child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
    742       copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
    743                         (VexGuestX86SegDescr*)child->vex.guest_LDT );
    744    }
    745 
    746    /* Either we start with an empty GDT (the usual case) or inherit a
    747       copy of our parents' one (Quadrics Elan3 driver -style clone
    748       only). */
    749    child->vex.guest_GDT = (HWord)NULL;
    750 
    751    if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
    752       child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
    753       copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
    754                         (VexGuestX86SegDescr*)child->vex.guest_GDT );
    755    }
    756 }
    757 
    758 
    759 /* ---------------------------------------------------------------------
    760    PRE/POST wrappers for x86/Linux-specific syscalls
    761    ------------------------------------------------------------------ */
    762 
    763 #define PRE(name)       DEFN_PRE_TEMPLATE(x86_linux, name)
    764 #define POST(name)      DEFN_POST_TEMPLATE(x86_linux, name)
    765 
    766 /* Add prototypes for the wrappers declared here, so that gcc doesn't
    767    harass us for not having prototypes.  Really this is a kludge --
    768    the right thing to do is to make these wrappers 'static' since they
    769    aren't visible outside this file, but that requires even more macro
    770    magic. */
    771 DECL_TEMPLATE(x86_linux, sys_stat64);
    772 DECL_TEMPLATE(x86_linux, sys_fstatat64);
    773 DECL_TEMPLATE(x86_linux, sys_fstat64);
    774 DECL_TEMPLATE(x86_linux, sys_lstat64);
    775 DECL_TEMPLATE(x86_linux, sys_clone);
    776 DECL_TEMPLATE(x86_linux, old_mmap);
    777 DECL_TEMPLATE(x86_linux, sys_mmap2);
    778 DECL_TEMPLATE(x86_linux, sys_sigreturn);
    779 DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
    780 DECL_TEMPLATE(x86_linux, sys_modify_ldt);
    781 DECL_TEMPLATE(x86_linux, sys_set_thread_area);
    782 DECL_TEMPLATE(x86_linux, sys_get_thread_area);
    783 DECL_TEMPLATE(x86_linux, sys_ptrace);
    784 DECL_TEMPLATE(x86_linux, sys_sigsuspend);
    785 DECL_TEMPLATE(x86_linux, old_select);
    786 DECL_TEMPLATE(x86_linux, sys_vm86old);
    787 DECL_TEMPLATE(x86_linux, sys_vm86);
    788 DECL_TEMPLATE(x86_linux, sys_syscall223);
    789 
    790 PRE(old_select)
    791 {
    792    /* struct sel_arg_struct {
    793       unsigned long n;
    794       fd_set *inp, *outp, *exp;
    795       struct timeval *tvp;
    796       };
    797    */
    798    PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
    799    PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
    800    *flags |= SfMayBlock;
    801    {
    802       UInt* arg_struct = (UInt*)ARG1;
    803       UInt a1, a2, a3, a4, a5;
    804 
    805       a1 = arg_struct[0];
    806       a2 = arg_struct[1];
    807       a3 = arg_struct[2];
    808       a4 = arg_struct[3];
    809       a5 = arg_struct[4];
    810 
    811       PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", a1,a2,a3,a4,a5);
    812       if (a2 != (Addr)NULL)
    813          PRE_MEM_READ( "old_select(readfds)",   a2, a1/8 /* __FD_SETSIZE/8 */ );
    814       if (a3 != (Addr)NULL)
    815          PRE_MEM_READ( "old_select(writefds)",  a3, a1/8 /* __FD_SETSIZE/8 */ );
    816       if (a4 != (Addr)NULL)
    817          PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
    818       if (a5 != (Addr)NULL)
    819          PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
    820    }
    821 }
    822 
    823 PRE(sys_clone)
    824 {
    825    UInt cloneflags;
    826    Bool badarg = False;
    827 
    828    PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
    829    PRE_REG_READ2(int, "clone",
    830                  unsigned long, flags,
    831                  void *, child_stack);
    832 
    833    if (ARG1 & VKI_CLONE_PARENT_SETTID) {
    834       if (VG_(tdict).track_pre_reg_read) {
    835          PRA3("clone", int *, parent_tidptr);
    836       }
    837       PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
    838       if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
    839                                              VKI_PROT_WRITE)) {
    840          badarg = True;
    841       }
    842    }
    843    if (ARG1 & VKI_CLONE_SETTLS) {
    844       if (VG_(tdict).track_pre_reg_read) {
    845          PRA4("clone", vki_modify_ldt_t *, tlsinfo);
    846       }
    847       PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
    848       if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
    849                                              VKI_PROT_READ)) {
    850          badarg = True;
    851       }
    852    }
    853    if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
    854       if (VG_(tdict).track_pre_reg_read) {
    855          PRA5("clone", int *, child_tidptr);
    856       }
    857       PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
    858       if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
    859                                              VKI_PROT_WRITE)) {
    860          badarg = True;
    861       }
    862    }
    863 
    864    if (badarg) {
    865       SET_STATUS_Failure( VKI_EFAULT );
    866       return;
    867    }
    868 
    869    cloneflags = ARG1;
    870 
    871    if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
    872       SET_STATUS_Failure( VKI_EINVAL );
    873       return;
    874    }
    875 
    876    /* Be ultra-paranoid and filter out any clone-variants we don't understand:
    877       - ??? specifies clone flags of 0x100011
    878       - ??? specifies clone flags of 0x1200011.
    879       - NPTL specifies clone flags of 0x7D0F00.
    880       - The Quadrics Elan3 driver specifies clone flags of 0xF00.
    881       - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
    882       Everything else is rejected.
    883    */
    884    if (
    885         1 ||
    886         /* 11 Nov 05: for the time being, disable this ultra-paranoia.
    887            The switch below probably does a good enough job. */
    888           (cloneflags == 0x100011 || cloneflags == 0x1200011
    889                                   || cloneflags == 0x7D0F00
    890                                   || cloneflags == 0x790F00
    891                                   || cloneflags == 0x3D0F00
    892                                   || cloneflags == 0x410F00
    893                                   || cloneflags == 0xF00
    894                                   || cloneflags == 0xF21)) {
    895      /* OK */
    896    }
    897    else {
    898       /* Nah.  We don't like it.  Go away. */
    899       goto reject;
    900    }
    901 
    902    /* Only look at the flags we really care about */
    903    switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
    904                          | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
    905    case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
    906       /* thread creation */
    907       SET_STATUS_from_SysRes(
    908          do_clone(tid,
    909                   ARG1,         /* flags */
    910                   (Addr)ARG2,   /* child ESP */
    911                   (Int *)ARG3,  /* parent_tidptr */
    912                   (Int *)ARG5,  /* child_tidptr */
    913                   (vki_modify_ldt_t *)ARG4)); /* set_tls */
    914       break;
    915 
    916    case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
    917       /* FALLTHROUGH - assume vfork == fork */
    918       cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
    919 
    920    case 0: /* plain fork */
    921       SET_STATUS_from_SysRes(
    922          ML_(do_fork_clone)(tid,
    923                        cloneflags,      /* flags */
    924                        (Int *)ARG3,     /* parent_tidptr */
    925                        (Int *)ARG5));   /* child_tidptr */
    926       break;
    927 
    928    default:
    929    reject:
    930       /* should we just ENOSYS? */
    931       VG_(message)(Vg_UserMsg, "\n");
    932       VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
    933       VG_(message)(Vg_UserMsg, "\n");
    934       VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
    935       VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
    936       VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
    937       VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
    938       VG_(unimplemented)
    939          ("Valgrind does not support general clone().");
    940    }
    941 
    942    if (SUCCESS) {
    943       if (ARG1 & VKI_CLONE_PARENT_SETTID)
    944          POST_MEM_WRITE(ARG3, sizeof(Int));
    945       if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
    946          POST_MEM_WRITE(ARG5, sizeof(Int));
    947 
    948       /* Thread creation was successful; let the child have the chance
    949          to run */
    950       *flags |= SfYieldAfter;
    951    }
    952 }
    953 
    954 PRE(sys_sigreturn)
    955 {
    956    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
    957       an explanation of what follows. */
    958 
    959    ThreadState* tst;
    960    PRINT("sys_sigreturn ( )");
    961 
    962    vg_assert(VG_(is_valid_tid)(tid));
    963    vg_assert(tid >= 1 && tid < VG_N_THREADS);
    964    vg_assert(VG_(is_running_thread)(tid));
    965 
    966    /* Adjust esp to point to start of frame; skip back up over
    967       sigreturn sequence's "popl %eax" and handler ret addr */
    968    tst = VG_(get_ThreadState)(tid);
    969    tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
    970    /* XXX why does ESP change differ from rt_sigreturn case below? */
    971 
    972    /* This is only so that the EIP is (might be) useful to report if
    973       something goes wrong in the sigreturn */
    974    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
    975 
    976    /* Restore register state from frame and remove it */
    977    VG_(sigframe_destroy)(tid, False);
    978 
    979    /* Tell the driver not to update the guest state with the "result",
    980       and set a bogus result to keep it happy. */
    981    *flags |= SfNoWriteResult;
    982    SET_STATUS_Success(0);
    983 
    984    /* Check to see if any signals arose as a result of this. */
    985    *flags |= SfPollAfter;
    986 }
    987 
    988 PRE(sys_rt_sigreturn)
    989 {
    990    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
    991       an explanation of what follows. */
    992 
    993    ThreadState* tst;
    994    PRINT("sys_rt_sigreturn ( )");
    995 
    996    vg_assert(VG_(is_valid_tid)(tid));
    997    vg_assert(tid >= 1 && tid < VG_N_THREADS);
    998    vg_assert(VG_(is_running_thread)(tid));
    999 
   1000    /* Adjust esp to point to start of frame; skip back up over handler
   1001       ret addr */
   1002    tst = VG_(get_ThreadState)(tid);
   1003    tst->arch.vex.guest_ESP -= sizeof(Addr);
   1004    /* XXX why does ESP change differ from sigreturn case above? */
   1005 
   1006    /* This is only so that the EIP is (might be) useful to report if
   1007       something goes wrong in the sigreturn */
   1008    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
   1009 
   1010    /* Restore register state from frame and remove it */
   1011    VG_(sigframe_destroy)(tid, True);
   1012 
   1013    /* Tell the driver not to update the guest state with the "result",
   1014       and set a bogus result to keep it happy. */
   1015    *flags |= SfNoWriteResult;
   1016    SET_STATUS_Success(0);
   1017 
   1018    /* Check to see if any signals arose as a result of this. */
   1019    *flags |= SfPollAfter;
   1020 }
   1021 
   1022 PRE(sys_modify_ldt)
   1023 {
   1024    PRINT("sys_modify_ldt ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
   1025    PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
   1026                  unsigned long, bytecount);
   1027 
   1028    if (ARG1 == 0) {
   1029       /* read the LDT into ptr */
   1030       PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
   1031    }
   1032    if (ARG1 == 1 || ARG1 == 0x11) {
   1033       /* write the LDT with the entry pointed at by ptr */
   1034       PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
   1035    }
   1036    /* "do" the syscall ourselves; the kernel never sees it */
   1037    SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
   1038 
   1039    if (ARG1 == 0 && SUCCESS && RES > 0) {
   1040       POST_MEM_WRITE( ARG2, RES );
   1041    }
   1042 }
   1043 
   1044 PRE(sys_set_thread_area)
   1045 {
   1046    PRINT("sys_set_thread_area ( %#lx )", ARG1);
   1047    PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
   1048    PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
   1049 
   1050    /* "do" the syscall ourselves; the kernel never sees it */
   1051    SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
   1052 }
   1053 
   1054 PRE(sys_get_thread_area)
   1055 {
   1056    PRINT("sys_get_thread_area ( %#lx )", ARG1);
   1057    PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
   1058    PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
   1059 
   1060    /* "do" the syscall ourselves; the kernel never sees it */
   1061    SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
   1062 
   1063    if (SUCCESS) {
   1064       POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
   1065    }
   1066 }
   1067 
   1068 // Parts of this are x86-specific, but the *PEEK* cases are generic.
   1069 //
   1070 // ARG3 is only used for pointers into the traced process's address
   1071 // space and for offsets into the traced process's struct
   1072 // user_regs_struct. It is never a pointer into this process's memory
   1073 // space, and we should therefore not check anything it points to.
   1074 PRE(sys_ptrace)
   1075 {
   1076    PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
   1077    PRE_REG_READ4(int, "ptrace",
   1078                  long, request, long, pid, long, addr, long, data);
   1079    switch (ARG1) {
   1080    case VKI_PTRACE_PEEKTEXT:
   1081    case VKI_PTRACE_PEEKDATA:
   1082    case VKI_PTRACE_PEEKUSR:
   1083       PRE_MEM_WRITE( "ptrace(peek)", ARG4,
   1084 		     sizeof (long));
   1085       break;
   1086    case VKI_PTRACE_GETREGS:
   1087       PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
   1088 		     sizeof (struct vki_user_regs_struct));
   1089       break;
   1090    case VKI_PTRACE_GETFPREGS:
   1091       PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
   1092 		     sizeof (struct vki_user_i387_struct));
   1093       break;
   1094    case VKI_PTRACE_GETFPXREGS:
   1095       PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
   1096                      sizeof(struct vki_user_fxsr_struct) );
   1097       break;
   1098    case VKI_PTRACE_GET_THREAD_AREA:
   1099       PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4,
   1100                      sizeof(struct vki_user_desc) );
   1101       break;
   1102    case VKI_PTRACE_SETREGS:
   1103       PRE_MEM_READ( "ptrace(setregs)", ARG4,
   1104 		     sizeof (struct vki_user_regs_struct));
   1105       break;
   1106    case VKI_PTRACE_SETFPREGS:
   1107       PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
   1108 		     sizeof (struct vki_user_i387_struct));
   1109       break;
   1110    case VKI_PTRACE_SETFPXREGS:
   1111       PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
   1112                      sizeof(struct vki_user_fxsr_struct) );
   1113       break;
   1114    case VKI_PTRACE_SET_THREAD_AREA:
   1115       PRE_MEM_READ( "ptrace(set_thread_area)", ARG4,
   1116                      sizeof(struct vki_user_desc) );
   1117       break;
   1118    case VKI_PTRACE_GETEVENTMSG:
   1119       PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
   1120       break;
   1121    case VKI_PTRACE_GETSIGINFO:
   1122       PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
   1123       break;
   1124    case VKI_PTRACE_SETSIGINFO:
   1125       PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
   1126       break;
   1127    case VKI_PTRACE_GETREGSET:
   1128       ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
   1129       break;
   1130    case VKI_PTRACE_SETREGSET:
   1131       ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
   1132       break;
   1133    default:
   1134       break;
   1135    }
   1136 }
   1137 
   1138 POST(sys_ptrace)
   1139 {
   1140    switch (ARG1) {
   1141    case VKI_PTRACE_PEEKTEXT:
   1142    case VKI_PTRACE_PEEKDATA:
   1143    case VKI_PTRACE_PEEKUSR:
   1144       POST_MEM_WRITE( ARG4, sizeof (long));
   1145       break;
   1146    case VKI_PTRACE_GETREGS:
   1147       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
   1148       break;
   1149    case VKI_PTRACE_GETFPREGS:
   1150       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
   1151       break;
   1152    case VKI_PTRACE_GETFPXREGS:
   1153       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
   1154       break;
   1155    case VKI_PTRACE_GET_THREAD_AREA:
   1156       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_desc) );
   1157       break;
   1158    case VKI_PTRACE_GETEVENTMSG:
   1159       POST_MEM_WRITE( ARG4, sizeof(unsigned long));
   1160       break;
   1161    case VKI_PTRACE_GETSIGINFO:
   1162       /* XXX: This is a simplification. Different parts of the
   1163        * siginfo_t are valid depending on the type of signal.
   1164        */
   1165       POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
   1166       break;
   1167    case VKI_PTRACE_GETREGSET:
   1168       ML_(linux_POST_getregset)(tid, ARG3, ARG4);
   1169       break;
   1170    default:
   1171       break;
   1172    }
   1173 }
   1174 
   1175 PRE(old_mmap)
   1176 {
   1177    /* struct mmap_arg_struct {
   1178          unsigned long addr;
   1179          unsigned long len;
   1180          unsigned long prot;
   1181          unsigned long flags;
   1182          unsigned long fd;
   1183          unsigned long offset;
   1184    }; */
   1185    UWord a1, a2, a3, a4, a5, a6;
   1186    SysRes r;
   1187 
   1188    UWord* args = (UWord*)ARG1;
   1189    PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
   1190    PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
   1191 
   1192    a1 = args[1-1];
   1193    a2 = args[2-1];
   1194    a3 = args[3-1];
   1195    a4 = args[4-1];
   1196    a5 = args[5-1];
   1197    a6 = args[6-1];
   1198 
   1199    PRINT("old_mmap ( %#lx, %llu, %ld, %ld, %ld, %ld )",
   1200          a1, (ULong)a2, a3, a4, a5, a6 );
   1201 
   1202    r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
   1203    SET_STATUS_from_SysRes(r);
   1204 }
   1205 
   1206 PRE(sys_mmap2)
   1207 {
   1208    SysRes r;
   1209 
   1210    // Exactly like old_mmap() except:
   1211    //  - all 6 args are passed in regs, rather than in a memory-block.
   1212    //  - the file offset is specified in pagesize units rather than bytes,
   1213    //    so that it can be used for files bigger than 2^32 bytes.
   1214    // pagesize or 4K-size units in offset?  For ppc32/64-linux, this is
   1215    // 4K-sized.  Assert that the page size is 4K here for safety.
   1216    vg_assert(VKI_PAGE_SIZE == 4096);
   1217    PRINT("sys_mmap2 ( %#lx, %llu, %ld, %ld, %ld, %ld )",
   1218          ARG1, (ULong)ARG2, ARG3, ARG4, ARG5, ARG6 );
   1219    PRE_REG_READ6(long, "mmap2",
   1220                  unsigned long, start, unsigned long, length,
   1221                  unsigned long, prot,  unsigned long, flags,
   1222                  unsigned long, fd,    unsigned long, offset);
   1223 
   1224    r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
   1225                                        4096 * (Off64T)ARG6 );
   1226    SET_STATUS_from_SysRes(r);
   1227 }
   1228 
   1229 // XXX: lstat64/fstat64/stat64 are generic, but not necessarily
   1230 // applicable to every architecture -- I think only to 32-bit archs.
   1231 // We're going to need something like linux/core_os32.h for such
   1232 // things, eventually, I think.  --njn
   1233 PRE(sys_lstat64)
   1234 {
   1235    PRINT("sys_lstat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
   1236    PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
   1237    PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
   1238    PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
   1239 }
   1240 
   1241 POST(sys_lstat64)
   1242 {
   1243    vg_assert(SUCCESS);
   1244    if (RES == 0) {
   1245       POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
   1246    }
   1247 }
   1248 
   1249 PRE(sys_stat64)
   1250 {
   1251    FUSE_COMPATIBLE_MAY_BLOCK();
   1252    PRINT("sys_stat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
   1253    PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
   1254    PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
   1255    PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
   1256 }
   1257 
   1258 POST(sys_stat64)
   1259 {
   1260    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
   1261 }
   1262 
   1263 PRE(sys_fstatat64)
   1264 {
   1265    FUSE_COMPATIBLE_MAY_BLOCK();
   1266    PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx )",ARG1,ARG2,(char*)ARG2,ARG3);
   1267    PRE_REG_READ3(long, "fstatat64",
   1268                  int, dfd, char *, file_name, struct stat64 *, buf);
   1269    PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
   1270    PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
   1271 }
   1272 
   1273 POST(sys_fstatat64)
   1274 {
   1275    POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
   1276 }
   1277 
   1278 PRE(sys_fstat64)
   1279 {
   1280    PRINT("sys_fstat64 ( %ld, %#lx )",ARG1,ARG2);
   1281    PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
   1282    PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
   1283 }
   1284 
   1285 POST(sys_fstat64)
   1286 {
   1287    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
   1288 }
   1289 
   1290 /* NB: arm-linux has a clone of this one, and ppc32-linux has an almost
   1291    identical version. */
   1292 PRE(sys_sigsuspend)
   1293 {
   1294    /* The C library interface to sigsuspend just takes a pointer to
   1295       a signal mask but this system call has three arguments - the first
   1296       two don't appear to be used by the kernel and are always passed as
   1297       zero by glibc and the third is the first word of the signal mask
   1298       so only 32 signals are supported.
   1299 
   1300       In fact glibc normally uses rt_sigsuspend if it is available as
   1301       that takes a pointer to the signal mask so supports more signals.
   1302     */
   1303    *flags |= SfMayBlock;
   1304    PRINT("sys_sigsuspend ( %ld, %ld, %ld )", ARG1,ARG2,ARG3 );
   1305    PRE_REG_READ3(int, "sigsuspend",
   1306                  int, history0, int, history1,
   1307                  vki_old_sigset_t, mask);
   1308 }
   1309 
   1310 PRE(sys_vm86old)
   1311 {
   1312    PRINT("sys_vm86old ( %#lx )", ARG1);
   1313    PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
   1314    PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
   1315 }
   1316 
   1317 POST(sys_vm86old)
   1318 {
   1319    POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
   1320 }
   1321 
   1322 PRE(sys_vm86)
   1323 {
   1324    PRINT("sys_vm86 ( %ld, %#lx )", ARG1,ARG2);
   1325    PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
   1326    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
   1327       PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
   1328 }
   1329 
   1330 POST(sys_vm86)
   1331 {
   1332    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
   1333       POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
   1334 }
   1335 
   1336 
   1337 /* ---------------------------------------------------------------
   1338    PRE/POST wrappers for x86/Linux-variant specific syscalls
   1339    ------------------------------------------------------------ */
   1340 
   1341 PRE(sys_syscall223)
   1342 {
   1343    Int err;
   1344 
   1345    /* 223 is used by sys_bproc.  If we're not on a declared bproc
   1346       variant, fail in the usual way. */
   1347 
   1348    if (!KernelVariantiS(KernelVariant_bproc, VG_(clo_kernel_variant))) {
   1349       PRINT("non-existent syscall! (syscall 223)");
   1350       PRE_REG_READ0(long, "ni_syscall(223)");
   1351       SET_STATUS_Failure( VKI_ENOSYS );
   1352       return;
   1353    }
   1354 
   1355    err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
   1356                                            ARG4, ARG5, ARG6 );
   1357    if (err) {
   1358       SET_STATUS_Failure( err );
   1359       return;
   1360    }
   1361    /* Let it go through. */
   1362    *flags |= SfMayBlock; /* who knows?  play safe. */
   1363 }
   1364 
   1365 POST(sys_syscall223)
   1366 {
   1367    ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
   1368                                       ARG4, ARG5, ARG6 );
   1369 }
   1370 
   1371 #undef PRE
   1372 #undef POST
   1373 
   1374 
   1375 /* ---------------------------------------------------------------------
   1376    The x86/Linux syscall table
   1377    ------------------------------------------------------------------ */
   1378 
   1379 /* Add an x86-linux specific wrapper to a syscall table. */
   1380 #define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(x86_linux, sysno, name)
   1381 #define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(x86_linux, sysno, name)
   1382 
   1383 
   1384 // This table maps from __NR_xxx syscall numbers (from
   1385 // linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
   1386 // wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
   1387 //
   1388 // For those syscalls not handled by Valgrind, the annotation indicate its
   1389 // arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
   1390 // (unknown).
   1391 
   1392 static SyscallTableEntry syscall_table[] = {
   1393 //zz    //   (restart_syscall)                             // 0
   1394    GENX_(__NR_exit,              sys_exit),           // 1
   1395    GENX_(__NR_fork,              sys_fork),           // 2
   1396    GENXY(__NR_read,              sys_read),           // 3
   1397    GENX_(__NR_write,             sys_write),          // 4
   1398 
   1399    GENXY(__NR_open,              sys_open),           // 5
   1400    GENXY(__NR_close,             sys_close),          // 6
   1401    GENXY(__NR_waitpid,           sys_waitpid),        // 7
   1402    GENXY(__NR_creat,             sys_creat),          // 8
   1403    GENX_(__NR_link,              sys_link),           // 9
   1404 
   1405    GENX_(__NR_unlink,            sys_unlink),         // 10
   1406    GENX_(__NR_execve,            sys_execve),         // 11
   1407    GENX_(__NR_chdir,             sys_chdir),          // 12
   1408    GENXY(__NR_time,              sys_time),           // 13
   1409    GENX_(__NR_mknod,             sys_mknod),          // 14
   1410 
   1411    GENX_(__NR_chmod,             sys_chmod),          // 15
   1412 //zz    LINX_(__NR_lchown,            sys_lchown16),       // 16
   1413    GENX_(__NR_break,             sys_ni_syscall),     // 17
   1414 //zz    //   (__NR_oldstat,           sys_stat),           // 18 (obsolete)
   1415    LINX_(__NR_lseek,             sys_lseek),          // 19
   1416 
   1417    GENX_(__NR_getpid,            sys_getpid),         // 20
   1418    LINX_(__NR_mount,             sys_mount),          // 21
   1419    LINX_(__NR_umount,            sys_oldumount),      // 22
   1420    LINX_(__NR_setuid,            sys_setuid16),       // 23 ## P
   1421    LINX_(__NR_getuid,            sys_getuid16),       // 24 ## P
   1422 
   1423    LINX_(__NR_stime,             sys_stime),          // 25 * (SVr4,SVID,X/OPEN)
   1424    PLAXY(__NR_ptrace,            sys_ptrace),         // 26
   1425    GENX_(__NR_alarm,             sys_alarm),          // 27
   1426 //zz    //   (__NR_oldfstat,          sys_fstat),          // 28 * L -- obsolete
   1427    GENX_(__NR_pause,             sys_pause),          // 29
   1428 
   1429    LINX_(__NR_utime,             sys_utime),          // 30
   1430    GENX_(__NR_stty,              sys_ni_syscall),     // 31
   1431    GENX_(__NR_gtty,              sys_ni_syscall),     // 32
   1432    GENX_(__NR_access,            sys_access),         // 33
   1433    GENX_(__NR_nice,              sys_nice),           // 34
   1434 
   1435    GENX_(__NR_ftime,             sys_ni_syscall),     // 35
   1436    GENX_(__NR_sync,              sys_sync),           // 36
   1437    GENX_(__NR_kill,              sys_kill),           // 37
   1438    GENX_(__NR_rename,            sys_rename),         // 38
   1439    GENX_(__NR_mkdir,             sys_mkdir),          // 39
   1440 
   1441    GENX_(__NR_rmdir,             sys_rmdir),          // 40
   1442    GENXY(__NR_dup,               sys_dup),            // 41
   1443    LINXY(__NR_pipe,              sys_pipe),           // 42
   1444    GENXY(__NR_times,             sys_times),          // 43
   1445    GENX_(__NR_prof,              sys_ni_syscall),     // 44
   1446 //zz
   1447    GENX_(__NR_brk,               sys_brk),            // 45
   1448    LINX_(__NR_setgid,            sys_setgid16),       // 46
   1449    LINX_(__NR_getgid,            sys_getgid16),       // 47
   1450 //zz    //   (__NR_signal,            sys_signal),         // 48 */* (ANSI C)
   1451    LINX_(__NR_geteuid,           sys_geteuid16),      // 49
   1452 
   1453    LINX_(__NR_getegid,           sys_getegid16),      // 50
   1454    GENX_(__NR_acct,              sys_acct),           // 51
   1455    LINX_(__NR_umount2,           sys_umount),         // 52
   1456    GENX_(__NR_lock,              sys_ni_syscall),     // 53
   1457    LINXY(__NR_ioctl,             sys_ioctl),          // 54
   1458 
   1459    LINXY(__NR_fcntl,             sys_fcntl),          // 55
   1460    GENX_(__NR_mpx,               sys_ni_syscall),     // 56
   1461    GENX_(__NR_setpgid,           sys_setpgid),        // 57
   1462    GENX_(__NR_ulimit,            sys_ni_syscall),     // 58
   1463 //zz    //   (__NR_oldolduname,       sys_olduname),       // 59 Linux -- obsolete
   1464 //zz
   1465    GENX_(__NR_umask,             sys_umask),          // 60
   1466    GENX_(__NR_chroot,            sys_chroot),         // 61
   1467 //zz    //   (__NR_ustat,             sys_ustat)           // 62 SVr4 -- deprecated
   1468    GENXY(__NR_dup2,              sys_dup2),           // 63
   1469    GENX_(__NR_getppid,           sys_getppid),        // 64
   1470 
   1471    GENX_(__NR_getpgrp,           sys_getpgrp),        // 65
   1472    GENX_(__NR_setsid,            sys_setsid),         // 66
   1473    LINXY(__NR_sigaction,         sys_sigaction),      // 67
   1474 //zz    //   (__NR_sgetmask,          sys_sgetmask),       // 68 */* (ANSI C)
   1475 //zz    //   (__NR_ssetmask,          sys_ssetmask),       // 69 */* (ANSI C)
   1476 //zz
   1477    LINX_(__NR_setreuid,          sys_setreuid16),     // 70
   1478    LINX_(__NR_setregid,          sys_setregid16),     // 71
   1479    PLAX_(__NR_sigsuspend,        sys_sigsuspend),     // 72
   1480    LINXY(__NR_sigpending,        sys_sigpending),     // 73
   1481    GENX_(__NR_sethostname,       sys_sethostname),    // 74
   1482 //zz
   1483    GENX_(__NR_setrlimit,         sys_setrlimit),      // 75
   1484    GENXY(__NR_getrlimit,         sys_old_getrlimit),  // 76
   1485    GENXY(__NR_getrusage,         sys_getrusage),      // 77
   1486    GENXY(__NR_gettimeofday,      sys_gettimeofday),   // 78
   1487    GENX_(__NR_settimeofday,      sys_settimeofday),   // 79
   1488 
   1489    LINXY(__NR_getgroups,         sys_getgroups16),    // 80
   1490    LINX_(__NR_setgroups,         sys_setgroups16),    // 81
   1491    PLAX_(__NR_select,            old_select),         // 82
   1492    GENX_(__NR_symlink,           sys_symlink),        // 83
   1493 //zz    //   (__NR_oldlstat,          sys_lstat),          // 84 -- obsolete
   1494 //zz
   1495    GENX_(__NR_readlink,          sys_readlink),       // 85
   1496 //zz    //   (__NR_uselib,            sys_uselib),         // 86 */Linux
   1497 //zz    //   (__NR_swapon,            sys_swapon),         // 87 */Linux
   1498 //zz    //   (__NR_reboot,            sys_reboot),         // 88 */Linux
   1499 //zz    //   (__NR_readdir,           old_readdir),        // 89 -- superseded
   1500 //zz
   1501    PLAX_(__NR_mmap,              old_mmap),           // 90
   1502    GENXY(__NR_munmap,            sys_munmap),         // 91
   1503    GENX_(__NR_truncate,          sys_truncate),       // 92
   1504    GENX_(__NR_ftruncate,         sys_ftruncate),      // 93
   1505    GENX_(__NR_fchmod,            sys_fchmod),         // 94
   1506 
   1507    LINX_(__NR_fchown,            sys_fchown16),       // 95
   1508    GENX_(__NR_getpriority,       sys_getpriority),    // 96
   1509    GENX_(__NR_setpriority,       sys_setpriority),    // 97
   1510    GENX_(__NR_profil,            sys_ni_syscall),     // 98
   1511    GENXY(__NR_statfs,            sys_statfs),         // 99
   1512 
   1513    GENXY(__NR_fstatfs,           sys_fstatfs),        // 100
   1514    LINX_(__NR_ioperm,            sys_ioperm),         // 101
   1515    LINXY(__NR_socketcall,        sys_socketcall),     // 102 x86/Linux-only
   1516    LINXY(__NR_syslog,            sys_syslog),         // 103
   1517    GENXY(__NR_setitimer,         sys_setitimer),      // 104
   1518 
   1519    GENXY(__NR_getitimer,         sys_getitimer),      // 105
   1520    GENXY(__NR_stat,              sys_newstat),        // 106
   1521    GENXY(__NR_lstat,             sys_newlstat),       // 107
   1522    GENXY(__NR_fstat,             sys_newfstat),       // 108
   1523 //zz    //   (__NR_olduname,          sys_uname),          // 109 -- obsolete
   1524 //zz
   1525    GENX_(__NR_iopl,              sys_iopl),           // 110
   1526    LINX_(__NR_vhangup,           sys_vhangup),        // 111
   1527    GENX_(__NR_idle,              sys_ni_syscall),     // 112
   1528    PLAXY(__NR_vm86old,           sys_vm86old),        // 113 x86/Linux-only
   1529    GENXY(__NR_wait4,             sys_wait4),          // 114
   1530 //zz
   1531 //zz    //   (__NR_swapoff,           sys_swapoff),        // 115 */Linux
   1532    LINXY(__NR_sysinfo,           sys_sysinfo),        // 116
   1533    LINXY(__NR_ipc,               sys_ipc),            // 117
   1534    GENX_(__NR_fsync,             sys_fsync),          // 118
   1535    PLAX_(__NR_sigreturn,         sys_sigreturn),      // 119 ?/Linux
   1536 
   1537    PLAX_(__NR_clone,             sys_clone),          // 120
   1538 //zz    //   (__NR_setdomainname,     sys_setdomainname),  // 121 */*(?)
   1539    GENXY(__NR_uname,             sys_newuname),       // 122
   1540    PLAX_(__NR_modify_ldt,        sys_modify_ldt),     // 123
   1541    LINXY(__NR_adjtimex,          sys_adjtimex),       // 124
   1542 
   1543    GENXY(__NR_mprotect,          sys_mprotect),       // 125
   1544    LINXY(__NR_sigprocmask,       sys_sigprocmask),    // 126
   1545 //zz    // Nb: create_module() was removed 2.4-->2.6
   1546    GENX_(__NR_create_module,     sys_ni_syscall),     // 127
   1547    LINX_(__NR_init_module,       sys_init_module),    // 128
   1548    LINX_(__NR_delete_module,     sys_delete_module),  // 129
   1549 //zz
   1550 //zz    // Nb: get_kernel_syms() was removed 2.4-->2.6
   1551    GENX_(__NR_get_kernel_syms,   sys_ni_syscall),     // 130
   1552    LINX_(__NR_quotactl,          sys_quotactl),       // 131
   1553    GENX_(__NR_getpgid,           sys_getpgid),        // 132
   1554    GENX_(__NR_fchdir,            sys_fchdir),         // 133
   1555 //zz    //   (__NR_bdflush,           sys_bdflush),        // 134 */Linux
   1556 //zz
   1557 //zz    //   (__NR_sysfs,             sys_sysfs),          // 135 SVr4
   1558    LINX_(__NR_personality,       sys_personality),    // 136
   1559    GENX_(__NR_afs_syscall,       sys_ni_syscall),     // 137
   1560    LINX_(__NR_setfsuid,          sys_setfsuid16),     // 138
   1561    LINX_(__NR_setfsgid,          sys_setfsgid16),     // 139
   1562 
   1563    LINXY(__NR__llseek,           sys_llseek),         // 140
   1564    GENXY(__NR_getdents,          sys_getdents),       // 141
   1565    GENX_(__NR__newselect,        sys_select),         // 142
   1566    GENX_(__NR_flock,             sys_flock),          // 143
   1567    GENX_(__NR_msync,             sys_msync),          // 144
   1568 
   1569    GENXY(__NR_readv,             sys_readv),          // 145
   1570    GENX_(__NR_writev,            sys_writev),         // 146
   1571    GENX_(__NR_getsid,            sys_getsid),         // 147
   1572    GENX_(__NR_fdatasync,         sys_fdatasync),      // 148
   1573    LINXY(__NR__sysctl,           sys_sysctl),         // 149
   1574 
   1575    GENX_(__NR_mlock,             sys_mlock),          // 150
   1576    GENX_(__NR_munlock,           sys_munlock),        // 151
   1577    GENX_(__NR_mlockall,          sys_mlockall),       // 152
   1578    LINX_(__NR_munlockall,        sys_munlockall),     // 153
   1579    LINXY(__NR_sched_setparam,    sys_sched_setparam), // 154
   1580 
   1581    LINXY(__NR_sched_getparam,         sys_sched_getparam),        // 155
   1582    LINX_(__NR_sched_setscheduler,     sys_sched_setscheduler),    // 156
   1583    LINX_(__NR_sched_getscheduler,     sys_sched_getscheduler),    // 157
   1584    LINX_(__NR_sched_yield,            sys_sched_yield),           // 158
   1585    LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
   1586 
   1587    LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
   1588    LINXY(__NR_sched_rr_get_interval,  sys_sched_rr_get_interval), // 161
   1589    GENXY(__NR_nanosleep,         sys_nanosleep),      // 162
   1590    GENX_(__NR_mremap,            sys_mremap),         // 163
   1591    LINX_(__NR_setresuid,         sys_setresuid16),    // 164
   1592 
   1593    LINXY(__NR_getresuid,         sys_getresuid16),    // 165
   1594    PLAXY(__NR_vm86,              sys_vm86),           // 166 x86/Linux-only
   1595    GENX_(__NR_query_module,      sys_ni_syscall),     // 167
   1596    GENXY(__NR_poll,              sys_poll),           // 168
   1597 //zz    //   (__NR_nfsservctl,        sys_nfsservctl),     // 169 */Linux
   1598 //zz
   1599    LINX_(__NR_setresgid,         sys_setresgid16),    // 170
   1600    LINXY(__NR_getresgid,         sys_getresgid16),    // 171
   1601    LINXY(__NR_prctl,             sys_prctl),          // 172
   1602    PLAX_(__NR_rt_sigreturn,      sys_rt_sigreturn),   // 173 x86/Linux only?
   1603    LINXY(__NR_rt_sigaction,      sys_rt_sigaction),   // 174
   1604 
   1605    LINXY(__NR_rt_sigprocmask,    sys_rt_sigprocmask), // 175
   1606    LINXY(__NR_rt_sigpending,     sys_rt_sigpending),  // 176
   1607    LINXY(__NR_rt_sigtimedwait,   sys_rt_sigtimedwait),// 177
   1608    LINXY(__NR_rt_sigqueueinfo,   sys_rt_sigqueueinfo),// 178
   1609    LINX_(__NR_rt_sigsuspend,     sys_rt_sigsuspend),  // 179
   1610 
   1611    GENXY(__NR_pread64,           sys_pread64),        // 180
   1612    GENX_(__NR_pwrite64,          sys_pwrite64),       // 181
   1613    LINX_(__NR_chown,             sys_chown16),        // 182
   1614    GENXY(__NR_getcwd,            sys_getcwd),         // 183
   1615    LINXY(__NR_capget,            sys_capget),         // 184
   1616 
   1617    LINX_(__NR_capset,            sys_capset),         // 185
   1618    GENXY(__NR_sigaltstack,       sys_sigaltstack),    // 186
   1619    LINXY(__NR_sendfile,          sys_sendfile),       // 187
   1620    GENXY(__NR_getpmsg,           sys_getpmsg),        // 188
   1621    GENX_(__NR_putpmsg,           sys_putpmsg),        // 189
   1622 
   1623    // Nb: we treat vfork as fork
   1624    GENX_(__NR_vfork,             sys_fork),           // 190
   1625    GENXY(__NR_ugetrlimit,        sys_getrlimit),      // 191
   1626    PLAX_(__NR_mmap2,             sys_mmap2),          // 192
   1627    GENX_(__NR_truncate64,        sys_truncate64),     // 193
   1628    GENX_(__NR_ftruncate64,       sys_ftruncate64),    // 194
   1629 
   1630    PLAXY(__NR_stat64,            sys_stat64),         // 195
   1631    PLAXY(__NR_lstat64,           sys_lstat64),        // 196
   1632    PLAXY(__NR_fstat64,           sys_fstat64),        // 197
   1633    GENX_(__NR_lchown32,          sys_lchown),         // 198
   1634    GENX_(__NR_getuid32,          sys_getuid),         // 199
   1635 
   1636    GENX_(__NR_getgid32,          sys_getgid),         // 200
   1637    GENX_(__NR_geteuid32,         sys_geteuid),        // 201
   1638    GENX_(__NR_getegid32,         sys_getegid),        // 202
   1639    GENX_(__NR_setreuid32,        sys_setreuid),       // 203
   1640    GENX_(__NR_setregid32,        sys_setregid),       // 204
   1641 
   1642    GENXY(__NR_getgroups32,       sys_getgroups),      // 205
   1643    GENX_(__NR_setgroups32,       sys_setgroups),      // 206
   1644    GENX_(__NR_fchown32,          sys_fchown),         // 207
   1645    LINX_(__NR_setresuid32,       sys_setresuid),      // 208
   1646    LINXY(__NR_getresuid32,       sys_getresuid),      // 209
   1647 
   1648    LINX_(__NR_setresgid32,       sys_setresgid),      // 210
   1649    LINXY(__NR_getresgid32,       sys_getresgid),      // 211
   1650    GENX_(__NR_chown32,           sys_chown),          // 212
   1651    GENX_(__NR_setuid32,          sys_setuid),         // 213
   1652    GENX_(__NR_setgid32,          sys_setgid),         // 214
   1653 
   1654    LINX_(__NR_setfsuid32,        sys_setfsuid),       // 215
   1655    LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
   1656    LINX_(__NR_pivot_root,        sys_pivot_root),     // 217
   1657    GENXY(__NR_mincore,           sys_mincore),        // 218
   1658    GENX_(__NR_madvise,           sys_madvise),        // 219
   1659 
   1660    GENXY(__NR_getdents64,        sys_getdents64),     // 220
   1661    LINXY(__NR_fcntl64,           sys_fcntl64),        // 221
   1662    GENX_(222,                    sys_ni_syscall),     // 222
   1663    PLAXY(223,                    sys_syscall223),     // 223 // sys_bproc?
   1664    LINX_(__NR_gettid,            sys_gettid),         // 224
   1665 
   1666    LINX_(__NR_readahead,         sys_readahead),      // 225 */Linux
   1667    LINX_(__NR_setxattr,          sys_setxattr),       // 226
   1668    LINX_(__NR_lsetxattr,         sys_lsetxattr),      // 227
   1669    LINX_(__NR_fsetxattr,         sys_fsetxattr),      // 228
   1670    LINXY(__NR_getxattr,          sys_getxattr),       // 229
   1671 
   1672    LINXY(__NR_lgetxattr,         sys_lgetxattr),      // 230
   1673    LINXY(__NR_fgetxattr,         sys_fgetxattr),      // 231
   1674    LINXY(__NR_listxattr,         sys_listxattr),      // 232
   1675    LINXY(__NR_llistxattr,        sys_llistxattr),     // 233
   1676    LINXY(__NR_flistxattr,        sys_flistxattr),     // 234
   1677 
   1678    LINX_(__NR_removexattr,       sys_removexattr),    // 235
   1679    LINX_(__NR_lremovexattr,      sys_lremovexattr),   // 236
   1680    LINX_(__NR_fremovexattr,      sys_fremovexattr),   // 237
   1681    LINXY(__NR_tkill,             sys_tkill),          // 238 */Linux
   1682    LINXY(__NR_sendfile64,        sys_sendfile64),     // 239
   1683 
   1684    LINXY(__NR_futex,             sys_futex),             // 240
   1685    LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
   1686    LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
   1687    PLAX_(__NR_set_thread_area,   sys_set_thread_area),   // 243
   1688    PLAX_(__NR_get_thread_area,   sys_get_thread_area),   // 244
   1689 
   1690    LINXY(__NR_io_setup,          sys_io_setup),       // 245
   1691    LINX_(__NR_io_destroy,        sys_io_destroy),     // 246
   1692    LINXY(__NR_io_getevents,      sys_io_getevents),   // 247
   1693    LINX_(__NR_io_submit,         sys_io_submit),      // 248
   1694    LINXY(__NR_io_cancel,         sys_io_cancel),      // 249
   1695 
   1696    LINX_(__NR_fadvise64,         sys_fadvise64),      // 250 */(Linux?)
   1697    GENX_(251,                    sys_ni_syscall),     // 251
   1698    LINX_(__NR_exit_group,        sys_exit_group),     // 252
   1699    LINXY(__NR_lookup_dcookie,    sys_lookup_dcookie), // 253
   1700    LINXY(__NR_epoll_create,      sys_epoll_create),   // 254
   1701 
   1702    LINX_(__NR_epoll_ctl,         sys_epoll_ctl),         // 255
   1703    LINXY(__NR_epoll_wait,        sys_epoll_wait),        // 256
   1704 //zz    //   (__NR_remap_file_pages,  sys_remap_file_pages),  // 257 */Linux
   1705    LINX_(__NR_set_tid_address,   sys_set_tid_address),   // 258
   1706    LINXY(__NR_timer_create,      sys_timer_create),      // 259
   1707 
   1708    LINXY(__NR_timer_settime,     sys_timer_settime),  // (timer_create+1)
   1709    LINXY(__NR_timer_gettime,     sys_timer_gettime),  // (timer_create+2)
   1710    LINX_(__NR_timer_getoverrun,  sys_timer_getoverrun),//(timer_create+3)
   1711    LINX_(__NR_timer_delete,      sys_timer_delete),   // (timer_create+4)
   1712    LINX_(__NR_clock_settime,     sys_clock_settime),  // (timer_create+5)
   1713 
   1714    LINXY(__NR_clock_gettime,     sys_clock_gettime),  // (timer_create+6)
   1715    LINXY(__NR_clock_getres,      sys_clock_getres),   // (timer_create+7)
   1716    LINXY(__NR_clock_nanosleep,   sys_clock_nanosleep),// (timer_create+8) */*
   1717    GENXY(__NR_statfs64,          sys_statfs64),       // 268
   1718    GENXY(__NR_fstatfs64,         sys_fstatfs64),      // 269
   1719 
   1720    LINX_(__NR_tgkill,            sys_tgkill),         // 270 */Linux
   1721    GENX_(__NR_utimes,            sys_utimes),         // 271
   1722    LINX_(__NR_fadvise64_64,      sys_fadvise64_64),   // 272 */(Linux?)
   1723    GENX_(__NR_vserver,           sys_ni_syscall),     // 273
   1724    LINX_(__NR_mbind,             sys_mbind),          // 274 ?/?
   1725 
   1726    LINXY(__NR_get_mempolicy,     sys_get_mempolicy),  // 275 ?/?
   1727    LINX_(__NR_set_mempolicy,     sys_set_mempolicy),  // 276 ?/?
   1728    LINXY(__NR_mq_open,           sys_mq_open),        // 277
   1729    LINX_(__NR_mq_unlink,         sys_mq_unlink),      // (mq_open+1)
   1730    LINX_(__NR_mq_timedsend,      sys_mq_timedsend),   // (mq_open+2)
   1731 
   1732    LINXY(__NR_mq_timedreceive,   sys_mq_timedreceive),// (mq_open+3)
   1733    LINX_(__NR_mq_notify,         sys_mq_notify),      // (mq_open+4)
   1734    LINXY(__NR_mq_getsetattr,     sys_mq_getsetattr),  // (mq_open+5)
   1735    GENX_(__NR_sys_kexec_load,    sys_ni_syscall),     // 283
   1736    LINXY(__NR_waitid,            sys_waitid),         // 284
   1737 
   1738    GENX_(285,                    sys_ni_syscall),     // 285
   1739    LINX_(__NR_add_key,           sys_add_key),        // 286
   1740    LINX_(__NR_request_key,       sys_request_key),    // 287
   1741    LINXY(__NR_keyctl,            sys_keyctl),         // 288
   1742    LINX_(__NR_ioprio_set,        sys_ioprio_set),     // 289
   1743 
   1744    LINX_(__NR_ioprio_get,        sys_ioprio_get),     // 290
   1745    LINX_(__NR_inotify_init,	 sys_inotify_init),   // 291
   1746    LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
   1747    LINX_(__NR_inotify_rm_watch,	 sys_inotify_rm_watch), // 293
   1748 //   LINX_(__NR_migrate_pages,	 sys_migrate_pages),    // 294
   1749 
   1750    LINXY(__NR_openat,		 sys_openat),           // 295
   1751    LINX_(__NR_mkdirat,		 sys_mkdirat),          // 296
   1752    LINX_(__NR_mknodat,		 sys_mknodat),          // 297
   1753    LINX_(__NR_fchownat,		 sys_fchownat),         // 298
   1754    LINX_(__NR_futimesat,	 sys_futimesat),        // 299
   1755 
   1756    PLAXY(__NR_fstatat64,	 sys_fstatat64),        // 300
   1757    LINX_(__NR_unlinkat,		 sys_unlinkat),         // 301
   1758    LINX_(__NR_renameat,		 sys_renameat),         // 302
   1759    LINX_(__NR_linkat,		 sys_linkat),           // 303
   1760    LINX_(__NR_symlinkat,	 sys_symlinkat),        // 304
   1761 
   1762    LINX_(__NR_readlinkat,	 sys_readlinkat),       // 305
   1763    LINX_(__NR_fchmodat,		 sys_fchmodat),         // 306
   1764    LINX_(__NR_faccessat,	 sys_faccessat),        // 307
   1765    LINX_(__NR_pselect6,		 sys_pselect6),         // 308
   1766    LINXY(__NR_ppoll,		 sys_ppoll),            // 309
   1767 
   1768    LINX_(__NR_unshare,		 sys_unshare),          // 310
   1769    LINX_(__NR_set_robust_list,	 sys_set_robust_list),  // 311
   1770    LINXY(__NR_get_robust_list,	 sys_get_robust_list),  // 312
   1771    LINX_(__NR_splice,            sys_splice),           // 313
   1772    LINX_(__NR_sync_file_range,   sys_sync_file_range),  // 314
   1773 
   1774    LINX_(__NR_tee,               sys_tee),              // 315
   1775    LINXY(__NR_vmsplice,          sys_vmsplice),         // 316
   1776    LINXY(__NR_move_pages,        sys_move_pages),       // 317
   1777    LINXY(__NR_getcpu,            sys_getcpu),           // 318
   1778    LINXY(__NR_epoll_pwait,       sys_epoll_pwait),      // 319
   1779 
   1780    LINX_(__NR_utimensat,         sys_utimensat),        // 320
   1781    LINXY(__NR_signalfd,          sys_signalfd),         // 321
   1782    LINXY(__NR_timerfd_create,    sys_timerfd_create),   // 322
   1783    LINXY(__NR_eventfd,           sys_eventfd),          // 323
   1784    LINX_(__NR_fallocate,         sys_fallocate),        // 324
   1785 
   1786    LINXY(__NR_timerfd_settime,   sys_timerfd_settime),  // 325
   1787    LINXY(__NR_timerfd_gettime,   sys_timerfd_gettime),  // 326
   1788    LINXY(__NR_signalfd4,         sys_signalfd4),        // 327
   1789    LINXY(__NR_eventfd2,          sys_eventfd2),         // 328
   1790    LINXY(__NR_epoll_create1,     sys_epoll_create1),     // 329
   1791 
   1792    LINXY(__NR_dup3,              sys_dup3),             // 330
   1793    LINXY(__NR_pipe2,             sys_pipe2),            // 331
   1794    LINXY(__NR_inotify_init1,     sys_inotify_init1),    // 332
   1795    LINXY(__NR_preadv,            sys_preadv),           // 333
   1796    LINX_(__NR_pwritev,           sys_pwritev),          // 334
   1797 
   1798    LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
   1799    LINXY(__NR_perf_event_open,   sys_perf_event_open),  // 336
   1800    LINXY(__NR_recvmmsg,          sys_recvmmsg),         // 337
   1801    LINXY(__NR_fanotify_init,     sys_fanotify_init),    // 338
   1802    LINX_(__NR_fanotify_mark,     sys_fanotify_mark),    // 339
   1803 
   1804    LINXY(__NR_prlimit64,         sys_prlimit64),        // 340
   1805    LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 341
   1806    LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),// 342
   1807    LINXY(__NR_clock_adjtime,     sys_clock_adjtime),    // 343
   1808    LINX_(__NR_syncfs,            sys_syncfs),           // 344
   1809 
   1810    LINXY(__NR_sendmmsg,          sys_sendmmsg),         // 345
   1811 //   LINX_(__NR_setns,             sys_ni_syscall),       // 346
   1812    LINXY(__NR_process_vm_readv,  sys_process_vm_readv), // 347
   1813    LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 348
   1814    LINX_(__NR_kcmp,              sys_kcmp),             // 349
   1815 
   1816 //   LIN__(__NR_finit_module,      sys_ni_syscall),       // 350
   1817 //   LIN__(__NR_sched_setattr,     sys_ni_syscall),       // 351
   1818 //   LIN__(__NR_sched_getattr,     sys_ni_syscall),       // 352
   1819 //   LIN__(__NR_renameat2,         sys_ni_syscall),       // 353
   1820 //   LIN__(__NR_seccomp,           sys_ni_syscall),       // 354
   1821 
   1822    LINXY(__NR_getrandom,         sys_getrandom),        // 355
   1823    LINXY(__NR_memfd_create,      sys_memfd_create)      // 356
   1824 //   LIN__(__NR_bpf,               sys_ni_syscall)        // 357
   1825 };
   1826 
   1827 SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
   1828 {
   1829    const UInt syscall_table_size
   1830       = sizeof(syscall_table) / sizeof(syscall_table[0]);
   1831 
   1832    /* Is it in the contiguous initial section of the table? */
   1833    if (sysno < syscall_table_size) {
   1834       SyscallTableEntry* sys = &syscall_table[sysno];
   1835       if (sys->before == NULL)
   1836          return NULL; /* no entry */
   1837       else
   1838          return sys;
   1839    }
   1840 
   1841    /* Can't find a wrapper */
   1842    return NULL;
   1843 }
   1844 
   1845 #endif // defined(VGP_x86_linux)
   1846 
   1847 /*--------------------------------------------------------------------*/
   1848 /*--- end                                                          ---*/
   1849 /*--------------------------------------------------------------------*/
   1850