Home | History | Annotate | Download | only in m_syswrap
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Platform-specific syscalls stuff.        syswrap-x86-linux.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2013 Nicholas Nethercote
     11       njn (at) valgrind.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #if defined(VGP_x86_linux)
     32 
     33 /* TODO/FIXME jrs 20050207: assignments to the syscall return result
     34    in interrupted_syscall() need to be reviewed.  They don't seem
     35    to assign the shadow state.
     36 */
     37 
     38 #include "pub_core_basics.h"
     39 #include "pub_core_vki.h"
     40 #include "pub_core_vkiscnums.h"
     41 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
     42 #include "pub_core_threadstate.h"
     43 #include "pub_core_aspacemgr.h"
     44 #include "pub_core_debuglog.h"
     45 #include "pub_core_libcbase.h"
     46 #include "pub_core_libcassert.h"
     47 #include "pub_core_libcprint.h"
     48 #include "pub_core_libcproc.h"
     49 #include "pub_core_libcsignal.h"
     50 #include "pub_core_mallocfree.h"
     51 #include "pub_core_options.h"
     52 #include "pub_core_scheduler.h"
     53 #include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
     54 #include "pub_core_signals.h"
     55 #include "pub_core_syscall.h"
     56 #include "pub_core_syswrap.h"
     57 #include "pub_core_tooliface.h"
     58 #include "pub_core_stacks.h"        // VG_(register_stack)
     59 
     60 #include "priv_types_n_macros.h"
     61 #include "priv_syswrap-generic.h"    /* for decls of generic wrappers */
     62 #include "priv_syswrap-linux.h"      /* for decls of linux-ish wrappers */
     63 #include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
     64 #include "priv_syswrap-main.h"
     65 
     66 
     67 /* ---------------------------------------------------------------------
     68    clone() handling
     69    ------------------------------------------------------------------ */
     70 
     71 /* Call f(arg1), but first switch stacks, using 'stack' as the new
     72    stack, and use 'retaddr' as f's return-to address.  Also, clear all
     73    the integer registers before entering f.*/
     74 __attribute__((noreturn))
     75 void ML_(call_on_new_stack_0_1) ( Addr stack,
     76 			          Addr retaddr,
     77 			          void (*f)(Word),
     78                                   Word arg1 );
     79 //  4(%esp) == stack
     80 //  8(%esp) == retaddr
     81 // 12(%esp) == f
     82 // 16(%esp) == arg1
     83 asm(
     84 ".text\n"
     85 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
     86 "vgModuleLocal_call_on_new_stack_0_1:\n"
     87 "   movl %esp, %esi\n"     // remember old stack pointer
     88 "   movl 4(%esi), %esp\n"  // set stack
     89 "   pushl 16(%esi)\n"      // arg1 to stack
     90 "   pushl  8(%esi)\n"      // retaddr to stack
     91 "   pushl 12(%esi)\n"      // f to stack
     92 "   movl $0, %eax\n"       // zero all GP regs
     93 "   movl $0, %ebx\n"
     94 "   movl $0, %ecx\n"
     95 "   movl $0, %edx\n"
     96 "   movl $0, %esi\n"
     97 "   movl $0, %edi\n"
     98 "   movl $0, %ebp\n"
     99 "   ret\n"                 // jump to f
    100 "   ud2\n"                 // should never get here
    101 ".previous\n"
    102 );
    103 
    104 
    105 /*
    106         Perform a clone system call.  clone is strange because it has
    107         fork()-like return-twice semantics, so it needs special
    108         handling here.
    109 
    110         Upon entry, we have:
    111 
    112             int (fn)(void*)     in  0+FSZ(%esp)
    113             void* child_stack   in  4+FSZ(%esp)
    114             int flags           in  8+FSZ(%esp)
    115             void* arg           in 12+FSZ(%esp)
    116             pid_t* child_tid    in 16+FSZ(%esp)
    117             pid_t* parent_tid   in 20+FSZ(%esp)
    118             void* tls_ptr       in 24+FSZ(%esp)
    119 
    120         System call requires:
    121 
    122             int    $__NR_clone  in %eax
    123             int    flags        in %ebx
    124             void*  child_stack  in %ecx
    125             pid_t* parent_tid   in %edx
    126             pid_t* child_tid    in %edi
    127             void*  tls_ptr      in %esi
    128 
    129 	Returns an Int encoded in the linux-x86 way, not a SysRes.
    130  */
    131 #define FSZ               "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
    132 #define __NR_CLONE        VG_STRINGIFY(__NR_clone)
    133 #define __NR_EXIT         VG_STRINGIFY(__NR_exit)
    134 
    135 extern
    136 Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
    137                                  void* stack,
    138                                  Int   flags,
    139                                  void* arg,
    140                                  Int*  child_tid,
    141                                  Int*  parent_tid,
    142                                  vki_modify_ldt_t * );
    143 asm(
    144 ".text\n"
    145 ".globl do_syscall_clone_x86_linux\n"
    146 "do_syscall_clone_x86_linux:\n"
    147 "        push    %ebx\n"
    148 "        push    %edi\n"
    149 "        push    %esi\n"
    150 
    151          /* set up child stack with function and arg */
    152 "        movl     4+"FSZ"(%esp), %ecx\n"    /* syscall arg2: child stack */
    153 "        movl    12+"FSZ"(%esp), %ebx\n"    /* fn arg */
    154 "        movl     0+"FSZ"(%esp), %eax\n"    /* fn */
    155 "        lea     -8(%ecx), %ecx\n"          /* make space on stack */
    156 "        movl    %ebx, 4(%ecx)\n"           /*   fn arg */
    157 "        movl    %eax, 0(%ecx)\n"           /*   fn */
    158 
    159          /* get other args to clone */
    160 "        movl     8+"FSZ"(%esp), %ebx\n"    /* syscall arg1: flags */
    161 "        movl    20+"FSZ"(%esp), %edx\n"    /* syscall arg3: parent tid * */
    162 "        movl    16+"FSZ"(%esp), %edi\n"    /* syscall arg5: child tid * */
    163 "        movl    24+"FSZ"(%esp), %esi\n"    /* syscall arg4: tls_ptr * */
    164 "        movl    $"__NR_CLONE", %eax\n"
    165 "        int     $0x80\n"                   /* clone() */
    166 "        testl   %eax, %eax\n"              /* child if retval == 0 */
    167 "        jnz     1f\n"
    168 
    169          /* CHILD - call thread function */
    170 "        popl    %eax\n"
    171 "        call    *%eax\n"                   /* call fn */
    172 
    173          /* exit with result */
    174 "        movl    %eax, %ebx\n"              /* arg1: return value from fn */
    175 "        movl    $"__NR_EXIT", %eax\n"
    176 "        int     $0x80\n"
    177 
    178          /* Hm, exit returned */
    179 "        ud2\n"
    180 
    181 "1:\n"   /* PARENT or ERROR */
    182 "        pop     %esi\n"
    183 "        pop     %edi\n"
    184 "        pop     %ebx\n"
    185 "        ret\n"
    186 ".previous\n"
    187 );
    188 
    189 #undef FSZ
    190 #undef __NR_CLONE
    191 #undef __NR_EXIT
    192 
    193 
    194 // forward declarations
    195 static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
    196 static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
    197 
    198 /*
    199    When a client clones, we need to keep track of the new thread.  This means:
    200    1. allocate a ThreadId+ThreadState+stack for the the thread
    201 
    202    2. initialize the thread's new VCPU state
    203 
    204    3. create the thread using the same args as the client requested,
    205    but using the scheduler entrypoint for EIP, and a separate stack
    206    for ESP.
    207  */
    208 static SysRes do_clone ( ThreadId ptid,
    209                          UInt flags, Addr esp,
    210                          Int* parent_tidptr,
    211                          Int* child_tidptr,
    212                          vki_modify_ldt_t *tlsinfo)
    213 {
    214    static const Bool debug = False;
    215 
    216    ThreadId     ctid = VG_(alloc_ThreadState)();
    217    ThreadState* ptst = VG_(get_ThreadState)(ptid);
    218    ThreadState* ctst = VG_(get_ThreadState)(ctid);
    219    UWord*       stack;
    220    NSegment const* seg;
    221    SysRes       res;
    222    Int          eax;
    223    vki_sigset_t blockall, savedmask;
    224 
    225    VG_(sigfillset)(&blockall);
    226 
    227    vg_assert(VG_(is_running_thread)(ptid));
    228    vg_assert(VG_(is_valid_tid)(ctid));
    229 
    230    stack = (UWord*)ML_(allocstack)(ctid);
    231    if (stack == NULL) {
    232       res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
    233       goto out;
    234    }
    235 
    236    /* Copy register state
    237 
    238       Both parent and child return to the same place, and the code
    239       following the clone syscall works out which is which, so we
    240       don't need to worry about it.
    241 
    242       The parent gets the child's new tid returned from clone, but the
    243       child gets 0.
    244 
    245       If the clone call specifies a NULL esp for the new thread, then
    246       it actually gets a copy of the parent's esp.
    247    */
    248    /* Note: the clone call done by the Quadrics Elan3 driver specifies
    249       clone flags of 0xF00, and it seems to rely on the assumption
    250       that the child inherits a copy of the parent's GDT.
    251       setup_child takes care of setting that up. */
    252    setup_child( &ctst->arch, &ptst->arch, True );
    253 
    254    /* Make sys_clone appear to have returned Success(0) in the
    255       child. */
    256    ctst->arch.vex.guest_EAX = 0;
    257 
    258    if (esp != 0)
    259       ctst->arch.vex.guest_ESP = esp;
    260 
    261    ctst->os_state.parent = ptid;
    262 
    263    /* inherit signal mask */
    264    ctst->sig_mask     = ptst->sig_mask;
    265    ctst->tmp_sig_mask = ptst->sig_mask;
    266 
    267    /* Start the child with its threadgroup being the same as the
    268       parent's.  This is so that any exit_group calls that happen
    269       after the child is created but before it sets its
    270       os_state.threadgroup field for real (in thread_wrapper in
    271       syswrap-linux.c), really kill the new thread.  a.k.a this avoids
    272       a race condition in which the thread is unkillable (via
    273       exit_group) because its threadgroup is not set.  The race window
    274       is probably only a few hundred or a few thousand cycles long.
    275       See #226116. */
    276    ctst->os_state.threadgroup = ptst->os_state.threadgroup;
    277 
    278    /* We don't really know where the client stack is, because its
    279       allocated by the client.  The best we can do is look at the
    280       memory mappings and try to derive some useful information.  We
    281       assume that esp starts near its highest possible value, and can
    282       only go down to the start of the mmaped segment. */
    283    seg = VG_(am_find_nsegment)((Addr)esp);
    284    if (seg && seg->kind != SkResvn) {
    285       ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(esp);
    286       ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;
    287 
    288       VG_(register_stack)(seg->start, ctst->client_stack_highest_word);
    289 
    290       if (debug)
    291 	 VG_(printf)("tid %d: guessed client stack range %#lx-%#lx\n",
    292 		     ctid, seg->start, VG_PGROUNDUP(esp));
    293    } else {
    294       VG_(message)(Vg_UserMsg,
    295                    "!? New thread %d starts with ESP(%#lx) unmapped\n",
    296 		   ctid, esp);
    297       ctst->client_stack_szB  = 0;
    298    }
    299 
    300    /* Assume the clone will succeed, and tell any tool that wants to
    301       know that this thread has come into existence.  We cannot defer
    302       it beyond this point because sys_set_thread_area, just below,
    303       causes tCheck to assert by making references to the new ThreadId
    304       if we don't state the new thread exists prior to that point.
    305       If the clone fails, we'll send out a ll_exit notification for it
    306       at the out: label below, to clean up. */
    307    vg_assert(VG_(owns_BigLock_LL)(ptid));
    308    VG_TRACK ( pre_thread_ll_create, ptid, ctid );
    309 
    310    if (flags & VKI_CLONE_SETTLS) {
    311       if (debug)
    312 	 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%d "
    313                      "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
    314 		     tlsinfo, tlsinfo->entry_number,
    315                      tlsinfo->base_addr, tlsinfo->limit,
    316 		     ptst->arch.vex.guest_ESP,
    317 		     ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
    318       res = sys_set_thread_area(ctid, tlsinfo);
    319       if (sr_isError(res))
    320 	 goto out;
    321    }
    322 
    323    flags &= ~VKI_CLONE_SETTLS;
    324 
    325    /* start the thread with everything blocked */
    326    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
    327 
    328    /* Create the new thread */
    329    eax = do_syscall_clone_x86_linux(
    330             ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
    331             child_tidptr, parent_tidptr, NULL
    332          );
    333    res = VG_(mk_SysRes_x86_linux)( eax );
    334 
    335    VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
    336 
    337   out:
    338    if (sr_isError(res)) {
    339       /* clone failed */
    340       VG_(cleanup_thread)(&ctst->arch);
    341       ctst->status = VgTs_Empty;
    342       /* oops.  Better tell the tool the thread exited in a hurry :-) */
    343       VG_TRACK( pre_thread_ll_exit, ctid );
    344    }
    345 
    346    return res;
    347 }
    348 
    349 
    350 /* ---------------------------------------------------------------------
    351    LDT/GDT simulation
    352    ------------------------------------------------------------------ */
    353 
    354 /* Details of the LDT simulation
    355    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    356 
    357    When a program runs natively, the linux kernel allows each *thread*
    358    in it to have its own LDT.  Almost all programs never do this --
    359    it's wildly unportable, after all -- and so the kernel never
    360    allocates the structure, which is just as well as an LDT occupies
    361    64k of memory (8192 entries of size 8 bytes).
    362 
    363    A thread may choose to modify its LDT entries, by doing the
    364    __NR_modify_ldt syscall.  In such a situation the kernel will then
    365    allocate an LDT structure for it.  Each LDT entry is basically a
    366    (base, limit) pair.  A virtual address in a specific segment is
    367    translated to a linear address by adding the segment's base value.
    368    In addition, the virtual address must not exceed the limit value.
    369 
    370    To use an LDT entry, a thread loads one of the segment registers
    371    (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
    372    .. 8191) it wants to use.  In fact, the required value is (index <<
    373    3) + 7, but that's not important right now.  Any normal instruction
    374    which includes an addressing mode can then be made relative to that
    375    LDT entry by prefixing the insn with a so-called segment-override
    376    prefix, a byte which indicates which of the 6 segment registers
    377    holds the LDT index.
    378 
    379    Now, a key constraint is that valgrind's address checks operate in
    380    terms of linear addresses.  So we have to explicitly translate
    381    virtual addrs into linear addrs, and that means doing a complete
    382    LDT simulation.
    383 
    384    Calls to modify_ldt are intercepted.  For each thread, we maintain
    385    an LDT (with the same normally-never-allocated optimisation that
    386    the kernel does).  This is updated as expected via calls to
    387    modify_ldt.
    388 
    389    When a thread does an amode calculation involving a segment
    390    override prefix, the relevant LDT entry for the thread is
    391    consulted.  It all works.
    392 
    393    There is a conceptual problem, which appears when switching back to
    394    native execution, either temporarily to pass syscalls to the
    395    kernel, or permanently, when debugging V.  Problem at such points
    396    is that it's pretty pointless to copy the simulated machine's
    397    segment registers to the real machine, because we'd also need to
    398    copy the simulated LDT into the real one, and that's prohibitively
    399    expensive.
    400 
    401    Fortunately it looks like no syscalls rely on the segment regs or
    402    LDT being correct, so we can get away with it.  Apart from that the
    403    simulation is pretty straightforward.  All 6 segment registers are
    404    tracked, although only %ds, %es, %fs and %gs are allowed as
    405    prefixes.  Perhaps it could be restricted even more than that -- I
    406    am not sure what is and isn't allowed in user-mode.
    407 */
    408 
    409 /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
    410    the Linux kernel's logic (cut-n-paste of code in
    411    linux/kernel/ldt.c).  */
    412 
    413 static
    414 void translate_to_hw_format ( /* IN  */ vki_modify_ldt_t* inn,
    415                               /* OUT */ VexGuestX86SegDescr* out,
    416                                         Int oldmode )
    417 {
    418    UInt entry_1, entry_2;
    419    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    420 
    421    if (0)
    422       VG_(printf)("translate_to_hw_format: base %#lx, limit %d\n",
    423                   inn->base_addr, inn->limit );
    424 
    425    /* Allow LDTs to be cleared by the user. */
    426    if (inn->base_addr == 0 && inn->limit == 0) {
    427       if (oldmode ||
    428           (inn->contents == 0      &&
    429            inn->read_exec_only == 1   &&
    430            inn->seg_32bit == 0      &&
    431            inn->limit_in_pages == 0   &&
    432            inn->seg_not_present == 1   &&
    433            inn->useable == 0 )) {
    434          entry_1 = 0;
    435          entry_2 = 0;
    436          goto install;
    437       }
    438    }
    439 
    440    entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
    441              (inn->limit & 0x0ffff);
    442    entry_2 = (inn->base_addr & 0xff000000) |
    443              ((inn->base_addr & 0x00ff0000) >> 16) |
    444              (inn->limit & 0xf0000) |
    445              ((inn->read_exec_only ^ 1) << 9) |
    446              (inn->contents << 10) |
    447              ((inn->seg_not_present ^ 1) << 15) |
    448              (inn->seg_32bit << 22) |
    449              (inn->limit_in_pages << 23) |
    450              0x7000;
    451    if (!oldmode)
    452       entry_2 |= (inn->useable << 20);
    453 
    454    /* Install the new entry ...  */
    455   install:
    456    out->LdtEnt.Words.word1 = entry_1;
    457    out->LdtEnt.Words.word2 = entry_2;
    458 }
    459 
    460 /* Create a zeroed-out GDT. */
    461 static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
    462 {
    463    Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
    464    return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxG.1", nbytes, 1);
    465 }
    466 
    467 /* Create a zeroed-out LDT. */
    468 static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
    469 {
    470    Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
    471    return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxL.1", nbytes, 1);
    472 }
    473 
    474 /* Free up an LDT or GDT allocated by the above fns. */
    475 static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
    476 {
    477    vg_assert(dt);
    478    VG_(arena_free)(VG_AR_CORE, (void*)dt);
    479 }
    480 
    481 /* Copy contents between two existing LDTs. */
    482 static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
    483                                VexGuestX86SegDescr* dst )
    484 {
    485    Int i;
    486    vg_assert(src);
    487    vg_assert(dst);
    488    for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
    489       dst[i] = src[i];
    490 }
    491 
    492 /* Copy contents between two existing GDTs. */
    493 static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
    494                                VexGuestX86SegDescr* dst )
    495 {
    496    Int i;
    497    vg_assert(src);
    498    vg_assert(dst);
    499    for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
    500       dst[i] = src[i];
    501 }
    502 
    503 /* Free this thread's DTs, if it has any. */
    504 static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
    505 {
    506    vg_assert(sizeof(HWord) == sizeof(void*));
    507 
    508    if (0)
    509       VG_(printf)("deallocate_LGDTs_for_thread: "
    510                   "ldt = 0x%lx, gdt = 0x%lx\n",
    511                   vex->guest_LDT, vex->guest_GDT );
    512 
    513    if (vex->guest_LDT != (HWord)NULL) {
    514       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
    515       vex->guest_LDT = (HWord)NULL;
    516    }
    517 
    518    if (vex->guest_GDT != (HWord)NULL) {
    519       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
    520       vex->guest_GDT = (HWord)NULL;
    521    }
    522 }
    523 
    524 
    525 /*
    526  * linux/kernel/ldt.c
    527  *
    528  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
    529  * Copyright (C) 1999 Ingo Molnar <mingo (at) redhat.com>
    530  */
    531 
    532 /*
    533  * read_ldt() is not really atomic - this is not a problem since
    534  * synchronization of reads and writes done to the LDT has to be
    535  * assured by user-space anyway. Writes are atomic, to protect
    536  * the security checks done on new descriptors.
    537  */
    538 static
    539 SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
    540 {
    541    SysRes res;
    542    UInt   i, size;
    543    UChar* ldt;
    544 
    545    if (0)
    546       VG_(printf)("read_ldt: tid = %d, ptr = %p, bytecount = %d\n",
    547                   tid, ptr, bytecount );
    548 
    549    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    550    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    551 
    552    ldt = (UChar*)(VG_(threads)[tid].arch.vex.guest_LDT);
    553    res = VG_(mk_SysRes_Success)( 0 );
    554    if (ldt == NULL)
    555       /* LDT not allocated, meaning all entries are null */
    556       goto out;
    557 
    558    size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
    559    if (size > bytecount)
    560       size = bytecount;
    561 
    562    res = VG_(mk_SysRes_Success)( size );
    563    for (i = 0; i < size; i++)
    564       ptr[i] = ldt[i];
    565 
    566   out:
    567    return res;
    568 }
    569 
    570 
    571 static
    572 SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
    573 {
    574    SysRes res;
    575    VexGuestX86SegDescr* ldt;
    576    vki_modify_ldt_t* ldt_info;
    577 
    578    if (0)
    579       VG_(printf)("write_ldt: tid = %d, ptr = %p, "
    580                   "bytecount = %d, oldmode = %d\n",
    581                   tid, ptr, bytecount, oldmode );
    582 
    583    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    584    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    585 
    586    ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
    587    ldt_info = (vki_modify_ldt_t*)ptr;
    588 
    589    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
    590    if (bytecount != sizeof(vki_modify_ldt_t))
    591       goto out;
    592 
    593    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
    594    if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
    595       goto out;
    596    if (ldt_info->contents == 3) {
    597       if (oldmode)
    598          goto out;
    599       if (ldt_info->seg_not_present == 0)
    600          goto out;
    601    }
    602 
    603    /* If this thread doesn't have an LDT, we'd better allocate it
    604       now. */
    605    if (ldt == NULL) {
    606       ldt = alloc_zeroed_x86_LDT();
    607       VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
    608    }
    609 
    610    /* Install the new entry ...  */
    611    translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
    612    res = VG_(mk_SysRes_Success)( 0 );
    613 
    614   out:
    615    return res;
    616 }
    617 
    618 
    619 static SysRes sys_modify_ldt ( ThreadId tid,
    620                                Int func, void* ptr, UInt bytecount )
    621 {
    622    SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
    623 
    624    switch (func) {
    625    case 0:
    626       ret = read_ldt(tid, ptr, bytecount);
    627       break;
    628    case 1:
    629       ret = write_ldt(tid, ptr, bytecount, 1);
    630       break;
    631    case 2:
    632       VG_(unimplemented)("sys_modify_ldt: func == 2");
    633       /* god knows what this is about */
    634       /* ret = read_default_ldt(ptr, bytecount); */
    635       /*UNREACHED*/
    636       break;
    637    case 0x11:
    638       ret = write_ldt(tid, ptr, bytecount, 0);
    639       break;
    640    }
    641    return ret;
    642 }
    643 
    644 
    645 static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
    646 {
    647    Int                  idx;
    648    VexGuestX86SegDescr* gdt;
    649 
    650    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    651    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    652 
    653    if (info == NULL)
    654       return VG_(mk_SysRes_Error)( VKI_EFAULT );
    655 
    656    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
    657 
    658    /* If the thread doesn't have a GDT, allocate it now. */
    659    if (!gdt) {
    660       gdt = alloc_zeroed_x86_GDT();
    661       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
    662    }
    663 
    664    idx = info->entry_number;
    665 
    666    if (idx == -1) {
    667       /* Find and use the first free entry.  Don't allocate entry
    668          zero, because the hardware will never do that, and apparently
    669          doing so confuses some code (perhaps stuff running on
    670          Wine). */
    671       for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
    672          if (gdt[idx].LdtEnt.Words.word1 == 0
    673              && gdt[idx].LdtEnt.Words.word2 == 0)
    674             break;
    675       }
    676 
    677       if (idx == VEX_GUEST_X86_GDT_NENT)
    678          return VG_(mk_SysRes_Error)( VKI_ESRCH );
    679    } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
    680       /* Similarly, reject attempts to use GDT[0]. */
    681       return VG_(mk_SysRes_Error)( VKI_EINVAL );
    682    }
    683 
    684    translate_to_hw_format(info, &gdt[idx], 0);
    685 
    686    VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
    687              "set_thread_area(info->entry)",
    688              (Addr) & info->entry_number, sizeof(unsigned int) );
    689    info->entry_number = idx;
    690    VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
    691              (Addr) & info->entry_number, sizeof(unsigned int) );
    692 
    693    return VG_(mk_SysRes_Success)( 0 );
    694 }
    695 
    696 
    697 static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
    698 {
    699    Int idx;
    700    VexGuestX86SegDescr* gdt;
    701 
    702    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
    703    vg_assert(8 == sizeof(VexGuestX86SegDescr));
    704 
    705    if (info == NULL)
    706       return VG_(mk_SysRes_Error)( VKI_EFAULT );
    707 
    708    idx = info->entry_number;
    709 
    710    if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
    711       return VG_(mk_SysRes_Error)( VKI_EINVAL );
    712 
    713    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
    714 
    715    /* If the thread doesn't have a GDT, allocate it now. */
    716    if (!gdt) {
    717       gdt = alloc_zeroed_x86_GDT();
    718       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
    719    }
    720 
    721    info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
    722                      ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
    723                      gdt[idx].LdtEnt.Bits.BaseLow;
    724    info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
    725                    gdt[idx].LdtEnt.Bits.LimitLow;
    726    info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
    727    info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
    728    info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
    729    info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
    730    info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
    731    info->useable = gdt[idx].LdtEnt.Bits.Sys;
    732    info->reserved = 0;
    733 
    734    return VG_(mk_SysRes_Success)( 0 );
    735 }
    736 
    737 /* ---------------------------------------------------------------------
    738    More thread stuff
    739    ------------------------------------------------------------------ */
    740 
    741 void VG_(cleanup_thread) ( ThreadArchState* arch )
    742 {
    743    /* Release arch-specific resources held by this thread. */
    744    /* On x86, we have to dump the LDT and GDT. */
    745    deallocate_LGDTs_for_thread( &arch->vex );
    746 }
    747 
    748 
    749 static void setup_child ( /*OUT*/ ThreadArchState *child,
    750                           /*IN*/  ThreadArchState *parent,
    751                           Bool inherit_parents_GDT )
    752 {
    753    /* We inherit our parent's guest state. */
    754    child->vex = parent->vex;
    755    child->vex_shadow1 = parent->vex_shadow1;
    756    child->vex_shadow2 = parent->vex_shadow2;
    757 
    758    /* We inherit our parent's LDT. */
    759    if (parent->vex.guest_LDT == (HWord)NULL) {
    760       /* We hope this is the common case. */
    761       child->vex.guest_LDT = (HWord)NULL;
    762    } else {
    763       /* No luck .. we have to take a copy of the parent's. */
    764       child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
    765       copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
    766                         (VexGuestX86SegDescr*)child->vex.guest_LDT );
    767    }
    768 
    769    /* Either we start with an empty GDT (the usual case) or inherit a
    770       copy of our parents' one (Quadrics Elan3 driver -style clone
    771       only). */
    772    child->vex.guest_GDT = (HWord)NULL;
    773 
    774    if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
    775       child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
    776       copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
    777                         (VexGuestX86SegDescr*)child->vex.guest_GDT );
    778    }
    779 }
    780 
    781 
    782 /* ---------------------------------------------------------------------
    783    PRE/POST wrappers for x86/Linux-specific syscalls
    784    ------------------------------------------------------------------ */
    785 
    786 #define PRE(name)       DEFN_PRE_TEMPLATE(x86_linux, name)
    787 #define POST(name)      DEFN_POST_TEMPLATE(x86_linux, name)
    788 
    789 /* Add prototypes for the wrappers declared here, so that gcc doesn't
    790    harass us for not having prototypes.  Really this is a kludge --
    791    the right thing to do is to make these wrappers 'static' since they
    792    aren't visible outside this file, but that requires even more macro
    793    magic. */
    794 DECL_TEMPLATE(x86_linux, sys_stat64);
    795 DECL_TEMPLATE(x86_linux, sys_fstatat64);
    796 DECL_TEMPLATE(x86_linux, sys_fstat64);
    797 DECL_TEMPLATE(x86_linux, sys_lstat64);
    798 DECL_TEMPLATE(x86_linux, sys_clone);
    799 DECL_TEMPLATE(x86_linux, old_mmap);
    800 DECL_TEMPLATE(x86_linux, sys_mmap2);
    801 DECL_TEMPLATE(x86_linux, sys_sigreturn);
    802 DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
    803 DECL_TEMPLATE(x86_linux, sys_modify_ldt);
    804 DECL_TEMPLATE(x86_linux, sys_set_thread_area);
    805 DECL_TEMPLATE(x86_linux, sys_get_thread_area);
    806 DECL_TEMPLATE(x86_linux, sys_ptrace);
    807 DECL_TEMPLATE(x86_linux, sys_sigsuspend);
    808 DECL_TEMPLATE(x86_linux, old_select);
    809 DECL_TEMPLATE(x86_linux, sys_vm86old);
    810 DECL_TEMPLATE(x86_linux, sys_vm86);
    811 DECL_TEMPLATE(x86_linux, sys_syscall223);
    812 
    813 PRE(old_select)
    814 {
    815    /* struct sel_arg_struct {
    816       unsigned long n;
    817       fd_set *inp, *outp, *exp;
    818       struct timeval *tvp;
    819       };
    820    */
    821    PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
    822    PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
    823    *flags |= SfMayBlock;
    824    {
    825       UInt* arg_struct = (UInt*)ARG1;
    826       UInt a1, a2, a3, a4, a5;
    827 
    828       a1 = arg_struct[0];
    829       a2 = arg_struct[1];
    830       a3 = arg_struct[2];
    831       a4 = arg_struct[3];
    832       a5 = arg_struct[4];
    833 
    834       PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", a1,a2,a3,a4,a5);
    835       if (a2 != (Addr)NULL)
    836          PRE_MEM_READ( "old_select(readfds)",   a2, a1/8 /* __FD_SETSIZE/8 */ );
    837       if (a3 != (Addr)NULL)
    838          PRE_MEM_READ( "old_select(writefds)",  a3, a1/8 /* __FD_SETSIZE/8 */ );
    839       if (a4 != (Addr)NULL)
    840          PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
    841       if (a5 != (Addr)NULL)
    842          PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
    843    }
    844 }
    845 
    846 PRE(sys_clone)
    847 {
    848    UInt cloneflags;
    849    Bool badarg = False;
    850 
    851    PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
    852    PRE_REG_READ2(int, "clone",
    853                  unsigned long, flags,
    854                  void *, child_stack);
    855 
    856    if (ARG1 & VKI_CLONE_PARENT_SETTID) {
    857       if (VG_(tdict).track_pre_reg_read) {
    858          PRA3("clone", int *, parent_tidptr);
    859       }
    860       PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
    861       if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
    862                                              VKI_PROT_WRITE)) {
    863          badarg = True;
    864       }
    865    }
    866    if (ARG1 & VKI_CLONE_SETTLS) {
    867       if (VG_(tdict).track_pre_reg_read) {
    868          PRA4("clone", vki_modify_ldt_t *, tlsinfo);
    869       }
    870       PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
    871       if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
    872                                              VKI_PROT_READ)) {
    873          badarg = True;
    874       }
    875    }
    876    if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
    877       if (VG_(tdict).track_pre_reg_read) {
    878          PRA5("clone", int *, child_tidptr);
    879       }
    880       PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
    881       if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
    882                                              VKI_PROT_WRITE)) {
    883          badarg = True;
    884       }
    885    }
    886 
    887    if (badarg) {
    888       SET_STATUS_Failure( VKI_EFAULT );
    889       return;
    890    }
    891 
    892    cloneflags = ARG1;
    893 
    894    if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
    895       SET_STATUS_Failure( VKI_EINVAL );
    896       return;
    897    }
    898 
    899    /* Be ultra-paranoid and filter out any clone-variants we don't understand:
    900       - ??? specifies clone flags of 0x100011
    901       - ??? specifies clone flags of 0x1200011.
    902       - NPTL specifies clone flags of 0x7D0F00.
    903       - The Quadrics Elan3 driver specifies clone flags of 0xF00.
    904       - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
    905       Everything else is rejected.
    906    */
    907    if (
    908         1 ||
    909         /* 11 Nov 05: for the time being, disable this ultra-paranoia.
    910            The switch below probably does a good enough job. */
    911           (cloneflags == 0x100011 || cloneflags == 0x1200011
    912                                   || cloneflags == 0x7D0F00
    913                                   || cloneflags == 0x790F00
    914                                   || cloneflags == 0x3D0F00
    915                                   || cloneflags == 0x410F00
    916                                   || cloneflags == 0xF00
    917                                   || cloneflags == 0xF21)) {
    918      /* OK */
    919    }
    920    else {
    921       /* Nah.  We don't like it.  Go away. */
    922       goto reject;
    923    }
    924 
    925    /* Only look at the flags we really care about */
    926    switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
    927                          | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
    928    case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
    929       /* thread creation */
    930       SET_STATUS_from_SysRes(
    931          do_clone(tid,
    932                   ARG1,         /* flags */
    933                   (Addr)ARG2,   /* child ESP */
    934                   (Int *)ARG3,  /* parent_tidptr */
    935                   (Int *)ARG5,  /* child_tidptr */
    936                   (vki_modify_ldt_t *)ARG4)); /* set_tls */
    937       break;
    938 
    939    case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
    940       /* FALLTHROUGH - assume vfork == fork */
    941       cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
    942 
    943    case 0: /* plain fork */
    944       SET_STATUS_from_SysRes(
    945          ML_(do_fork_clone)(tid,
    946                        cloneflags,      /* flags */
    947                        (Int *)ARG3,     /* parent_tidptr */
    948                        (Int *)ARG5));   /* child_tidptr */
    949       break;
    950 
    951    default:
    952    reject:
    953       /* should we just ENOSYS? */
    954       VG_(message)(Vg_UserMsg, "\n");
    955       VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
    956       VG_(message)(Vg_UserMsg, "\n");
    957       VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
    958       VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
    959       VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
    960       VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
    961       VG_(unimplemented)
    962          ("Valgrind does not support general clone().");
    963    }
    964 
    965    if (SUCCESS) {
    966       if (ARG1 & VKI_CLONE_PARENT_SETTID)
    967          POST_MEM_WRITE(ARG3, sizeof(Int));
    968       if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
    969          POST_MEM_WRITE(ARG5, sizeof(Int));
    970 
    971       /* Thread creation was successful; let the child have the chance
    972          to run */
    973       *flags |= SfYieldAfter;
    974    }
    975 }
    976 
    977 PRE(sys_sigreturn)
    978 {
    979    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
    980       an explanation of what follows. */
    981 
    982    ThreadState* tst;
    983    PRINT("sys_sigreturn ( )");
    984 
    985    vg_assert(VG_(is_valid_tid)(tid));
    986    vg_assert(tid >= 1 && tid < VG_N_THREADS);
    987    vg_assert(VG_(is_running_thread)(tid));
    988 
    989    /* Adjust esp to point to start of frame; skip back up over
    990       sigreturn sequence's "popl %eax" and handler ret addr */
    991    tst = VG_(get_ThreadState)(tid);
    992    tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
    993    /* XXX why does ESP change differ from rt_sigreturn case below? */
    994 
    995    /* This is only so that the EIP is (might be) useful to report if
    996       something goes wrong in the sigreturn */
    997    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
    998 
    999    /* Restore register state from frame and remove it */
   1000    VG_(sigframe_destroy)(tid, False);
   1001 
   1002    /* Tell the driver not to update the guest state with the "result",
   1003       and set a bogus result to keep it happy. */
   1004    *flags |= SfNoWriteResult;
   1005    SET_STATUS_Success(0);
   1006 
   1007    /* Check to see if any signals arose as a result of this. */
   1008    *flags |= SfPollAfter;
   1009 }
   1010 
   1011 PRE(sys_rt_sigreturn)
   1012 {
   1013    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
   1014       an explanation of what follows. */
   1015 
   1016    ThreadState* tst;
   1017    PRINT("sys_rt_sigreturn ( )");
   1018 
   1019    vg_assert(VG_(is_valid_tid)(tid));
   1020    vg_assert(tid >= 1 && tid < VG_N_THREADS);
   1021    vg_assert(VG_(is_running_thread)(tid));
   1022 
   1023    /* Adjust esp to point to start of frame; skip back up over handler
   1024       ret addr */
   1025    tst = VG_(get_ThreadState)(tid);
   1026    tst->arch.vex.guest_ESP -= sizeof(Addr);
   1027    /* XXX why does ESP change differ from sigreturn case above? */
   1028 
   1029    /* This is only so that the EIP is (might be) useful to report if
   1030       something goes wrong in the sigreturn */
   1031    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
   1032 
   1033    /* Restore register state from frame and remove it */
   1034    VG_(sigframe_destroy)(tid, True);
   1035 
   1036    /* Tell the driver not to update the guest state with the "result",
   1037       and set a bogus result to keep it happy. */
   1038    *flags |= SfNoWriteResult;
   1039    SET_STATUS_Success(0);
   1040 
   1041    /* Check to see if any signals arose as a result of this. */
   1042    *flags |= SfPollAfter;
   1043 }
   1044 
   1045 PRE(sys_modify_ldt)
   1046 {
   1047    PRINT("sys_modify_ldt ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
   1048    PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
   1049                  unsigned long, bytecount);
   1050 
   1051    if (ARG1 == 0) {
   1052       /* read the LDT into ptr */
   1053       PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
   1054    }
   1055    if (ARG1 == 1 || ARG1 == 0x11) {
   1056       /* write the LDT with the entry pointed at by ptr */
   1057       PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
   1058    }
   1059    /* "do" the syscall ourselves; the kernel never sees it */
   1060    SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
   1061 
   1062    if (ARG1 == 0 && SUCCESS && RES > 0) {
   1063       POST_MEM_WRITE( ARG2, RES );
   1064    }
   1065 }
   1066 
   1067 PRE(sys_set_thread_area)
   1068 {
   1069    PRINT("sys_set_thread_area ( %#lx )", ARG1);
   1070    PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
   1071    PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
   1072 
   1073    /* "do" the syscall ourselves; the kernel never sees it */
   1074    SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
   1075 }
   1076 
   1077 PRE(sys_get_thread_area)
   1078 {
   1079    PRINT("sys_get_thread_area ( %#lx )", ARG1);
   1080    PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
   1081    PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
   1082 
   1083    /* "do" the syscall ourselves; the kernel never sees it */
   1084    SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
   1085 
   1086    if (SUCCESS) {
   1087       POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
   1088    }
   1089 }
   1090 
   1091 // Parts of this are x86-specific, but the *PEEK* cases are generic.
   1092 //
   1093 // ARG3 is only used for pointers into the traced process's address
   1094 // space and for offsets into the traced process's struct
   1095 // user_regs_struct. It is never a pointer into this process's memory
   1096 // space, and we should therefore not check anything it points to.
   1097 PRE(sys_ptrace)
   1098 {
   1099    PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
   1100    PRE_REG_READ4(int, "ptrace",
   1101                  long, request, long, pid, long, addr, long, data);
   1102    switch (ARG1) {
   1103    case VKI_PTRACE_PEEKTEXT:
   1104    case VKI_PTRACE_PEEKDATA:
   1105    case VKI_PTRACE_PEEKUSR:
   1106       PRE_MEM_WRITE( "ptrace(peek)", ARG4,
   1107 		     sizeof (long));
   1108       break;
   1109    case VKI_PTRACE_GETREGS:
   1110       PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
   1111 		     sizeof (struct vki_user_regs_struct));
   1112       break;
   1113    case VKI_PTRACE_GETFPREGS:
   1114       PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
   1115 		     sizeof (struct vki_user_i387_struct));
   1116       break;
   1117    case VKI_PTRACE_GETFPXREGS:
   1118       PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
   1119                      sizeof(struct vki_user_fxsr_struct) );
   1120       break;
   1121    case VKI_PTRACE_GET_THREAD_AREA:
   1122       PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4,
   1123                      sizeof(struct vki_user_desc) );
   1124       break;
   1125    case VKI_PTRACE_SETREGS:
   1126       PRE_MEM_READ( "ptrace(setregs)", ARG4,
   1127 		     sizeof (struct vki_user_regs_struct));
   1128       break;
   1129    case VKI_PTRACE_SETFPREGS:
   1130       PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
   1131 		     sizeof (struct vki_user_i387_struct));
   1132       break;
   1133    case VKI_PTRACE_SETFPXREGS:
   1134       PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
   1135                      sizeof(struct vki_user_fxsr_struct) );
   1136       break;
   1137    case VKI_PTRACE_SET_THREAD_AREA:
   1138       PRE_MEM_READ( "ptrace(set_thread_area)", ARG4,
   1139                      sizeof(struct vki_user_desc) );
   1140       break;
   1141    case VKI_PTRACE_GETEVENTMSG:
   1142       PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
   1143       break;
   1144    case VKI_PTRACE_GETSIGINFO:
   1145       PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
   1146       break;
   1147    case VKI_PTRACE_SETSIGINFO:
   1148       PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
   1149       break;
   1150    case VKI_PTRACE_GETREGSET:
   1151       ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
   1152       break;
   1153    case VKI_PTRACE_SETREGSET:
   1154       ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
   1155       break;
   1156    default:
   1157       break;
   1158    }
   1159 }
   1160 
   1161 POST(sys_ptrace)
   1162 {
   1163    switch (ARG1) {
   1164    case VKI_PTRACE_PEEKTEXT:
   1165    case VKI_PTRACE_PEEKDATA:
   1166    case VKI_PTRACE_PEEKUSR:
   1167       POST_MEM_WRITE( ARG4, sizeof (long));
   1168       break;
   1169    case VKI_PTRACE_GETREGS:
   1170       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
   1171       break;
   1172    case VKI_PTRACE_GETFPREGS:
   1173       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
   1174       break;
   1175    case VKI_PTRACE_GETFPXREGS:
   1176       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
   1177       break;
   1178    case VKI_PTRACE_GET_THREAD_AREA:
   1179       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_desc) );
   1180       break;
   1181    case VKI_PTRACE_GETEVENTMSG:
   1182       POST_MEM_WRITE( ARG4, sizeof(unsigned long));
   1183       break;
   1184    case VKI_PTRACE_GETSIGINFO:
   1185       /* XXX: This is a simplification. Different parts of the
   1186        * siginfo_t are valid depending on the type of signal.
   1187        */
   1188       POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
   1189       break;
   1190    case VKI_PTRACE_GETREGSET:
   1191       ML_(linux_POST_getregset)(tid, ARG3, ARG4);
   1192       break;
   1193    default:
   1194       break;
   1195    }
   1196 }
   1197 
   1198 PRE(old_mmap)
   1199 {
   1200    /* struct mmap_arg_struct {
   1201          unsigned long addr;
   1202          unsigned long len;
   1203          unsigned long prot;
   1204          unsigned long flags;
   1205          unsigned long fd;
   1206          unsigned long offset;
   1207    }; */
   1208    UWord a1, a2, a3, a4, a5, a6;
   1209    SysRes r;
   1210 
   1211    UWord* args = (UWord*)ARG1;
   1212    PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
   1213    PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
   1214 
   1215    a1 = args[1-1];
   1216    a2 = args[2-1];
   1217    a3 = args[3-1];
   1218    a4 = args[4-1];
   1219    a5 = args[5-1];
   1220    a6 = args[6-1];
   1221 
   1222    PRINT("old_mmap ( %#lx, %llu, %ld, %ld, %ld, %ld )",
   1223          a1, (ULong)a2, a3, a4, a5, a6 );
   1224 
   1225    r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
   1226    SET_STATUS_from_SysRes(r);
   1227 }
   1228 
   1229 PRE(sys_mmap2)
   1230 {
   1231    SysRes r;
   1232 
   1233    // Exactly like old_mmap() except:
   1234    //  - all 6 args are passed in regs, rather than in a memory-block.
   1235    //  - the file offset is specified in pagesize units rather than bytes,
   1236    //    so that it can be used for files bigger than 2^32 bytes.
   1237    // pagesize or 4K-size units in offset?  For ppc32/64-linux, this is
   1238    // 4K-sized.  Assert that the page size is 4K here for safety.
   1239    vg_assert(VKI_PAGE_SIZE == 4096);
   1240    PRINT("sys_mmap2 ( %#lx, %llu, %ld, %ld, %ld, %ld )",
   1241          ARG1, (ULong)ARG2, ARG3, ARG4, ARG5, ARG6 );
   1242    PRE_REG_READ6(long, "mmap2",
   1243                  unsigned long, start, unsigned long, length,
   1244                  unsigned long, prot,  unsigned long, flags,
   1245                  unsigned long, fd,    unsigned long, offset);
   1246 
   1247    r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
   1248                                        4096 * (Off64T)ARG6 );
   1249    SET_STATUS_from_SysRes(r);
   1250 }
   1251 
   1252 // XXX: lstat64/fstat64/stat64 are generic, but not necessarily
   1253 // applicable to every architecture -- I think only to 32-bit archs.
   1254 // We're going to need something like linux/core_os32.h for such
   1255 // things, eventually, I think.  --njn
   1256 PRE(sys_lstat64)
   1257 {
   1258    PRINT("sys_lstat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
   1259    PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
   1260    PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
   1261    PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
   1262 }
   1263 
   1264 POST(sys_lstat64)
   1265 {
   1266    vg_assert(SUCCESS);
   1267    if (RES == 0) {
   1268       POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
   1269    }
   1270 }
   1271 
   1272 PRE(sys_stat64)
   1273 {
   1274    FUSE_COMPATIBLE_MAY_BLOCK();
   1275    PRINT("sys_stat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
   1276    PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
   1277    PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
   1278    PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
   1279 }
   1280 
   1281 POST(sys_stat64)
   1282 {
   1283    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
   1284 }
   1285 
   1286 PRE(sys_fstatat64)
   1287 {
   1288    FUSE_COMPATIBLE_MAY_BLOCK();
   1289    PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx )",ARG1,ARG2,(char*)ARG2,ARG3);
   1290    PRE_REG_READ3(long, "fstatat64",
   1291                  int, dfd, char *, file_name, struct stat64 *, buf);
   1292    PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
   1293    PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
   1294 }
   1295 
   1296 POST(sys_fstatat64)
   1297 {
   1298    POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
   1299 }
   1300 
   1301 PRE(sys_fstat64)
   1302 {
   1303    PRINT("sys_fstat64 ( %ld, %#lx )",ARG1,ARG2);
   1304    PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
   1305    PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
   1306 }
   1307 
   1308 POST(sys_fstat64)
   1309 {
   1310    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
   1311 }
   1312 
   1313 /* NB: arm-linux has a clone of this one, and ppc32-linux has an almost
   1314    identical version. */
   1315 PRE(sys_sigsuspend)
   1316 {
   1317    /* The C library interface to sigsuspend just takes a pointer to
   1318       a signal mask but this system call has three arguments - the first
   1319       two don't appear to be used by the kernel and are always passed as
   1320       zero by glibc and the third is the first word of the signal mask
   1321       so only 32 signals are supported.
   1322 
   1323       In fact glibc normally uses rt_sigsuspend if it is available as
   1324       that takes a pointer to the signal mask so supports more signals.
   1325     */
   1326    *flags |= SfMayBlock;
   1327    PRINT("sys_sigsuspend ( %ld, %ld, %ld )", ARG1,ARG2,ARG3 );
   1328    PRE_REG_READ3(int, "sigsuspend",
   1329                  int, history0, int, history1,
   1330                  vki_old_sigset_t, mask);
   1331 }
   1332 
   1333 PRE(sys_vm86old)
   1334 {
   1335    PRINT("sys_vm86old ( %#lx )", ARG1);
   1336    PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
   1337    PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
   1338 }
   1339 
   1340 POST(sys_vm86old)
   1341 {
   1342    POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
   1343 }
   1344 
   1345 PRE(sys_vm86)
   1346 {
   1347    PRINT("sys_vm86 ( %ld, %#lx )", ARG1,ARG2);
   1348    PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
   1349    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
   1350       PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
   1351 }
   1352 
   1353 POST(sys_vm86)
   1354 {
   1355    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
   1356       POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
   1357 }
   1358 
   1359 
   1360 /* ---------------------------------------------------------------
   1361    PRE/POST wrappers for x86/Linux-variant specific syscalls
   1362    ------------------------------------------------------------ */
   1363 
   1364 PRE(sys_syscall223)
   1365 {
   1366    Int err;
   1367 
   1368    /* 223 is used by sys_bproc.  If we're not on a declared bproc
   1369       variant, fail in the usual way. */
   1370 
   1371    if (!VG_(strstr)(VG_(clo_kernel_variant), "bproc")) {
   1372       PRINT("non-existent syscall! (syscall 223)");
   1373       PRE_REG_READ0(long, "ni_syscall(223)");
   1374       SET_STATUS_Failure( VKI_ENOSYS );
   1375       return;
   1376    }
   1377 
   1378    err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
   1379                                            ARG4, ARG5, ARG6 );
   1380    if (err) {
   1381       SET_STATUS_Failure( err );
   1382       return;
   1383    }
   1384    /* Let it go through. */
   1385    *flags |= SfMayBlock; /* who knows?  play safe. */
   1386 }
   1387 
   1388 POST(sys_syscall223)
   1389 {
   1390    ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
   1391                                       ARG4, ARG5, ARG6 );
   1392 }
   1393 
   1394 #undef PRE
   1395 #undef POST
   1396 
   1397 
   1398 /* ---------------------------------------------------------------------
   1399    The x86/Linux syscall table
   1400    ------------------------------------------------------------------ */
   1401 
   1402 /* Add an x86-linux specific wrapper to a syscall table. */
   1403 #define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(x86_linux, sysno, name)
   1404 #define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(x86_linux, sysno, name)
   1405 
   1406 
   1407 // This table maps from __NR_xxx syscall numbers (from
   1408 // linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
   1409 // wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
   1410 //
   1411 // For those syscalls not handled by Valgrind, the annotation indicate its
   1412 // arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
   1413 // (unknown).
   1414 
   1415 static SyscallTableEntry syscall_table[] = {
   1416 //zz    //   (restart_syscall)                             // 0
   1417    GENX_(__NR_exit,              sys_exit),           // 1
   1418    GENX_(__NR_fork,              sys_fork),           // 2
   1419    GENXY(__NR_read,              sys_read),           // 3
   1420    GENX_(__NR_write,             sys_write),          // 4
   1421 
   1422    GENXY(__NR_open,              sys_open),           // 5
   1423    GENXY(__NR_close,             sys_close),          // 6
   1424    GENXY(__NR_waitpid,           sys_waitpid),        // 7
   1425    GENXY(__NR_creat,             sys_creat),          // 8
   1426    GENX_(__NR_link,              sys_link),           // 9
   1427 
   1428    GENX_(__NR_unlink,            sys_unlink),         // 10
   1429    GENX_(__NR_execve,            sys_execve),         // 11
   1430    GENX_(__NR_chdir,             sys_chdir),          // 12
   1431    GENXY(__NR_time,              sys_time),           // 13
   1432    GENX_(__NR_mknod,             sys_mknod),          // 14
   1433 
   1434    GENX_(__NR_chmod,             sys_chmod),          // 15
   1435 //zz    LINX_(__NR_lchown,            sys_lchown16),       // 16
   1436    GENX_(__NR_break,             sys_ni_syscall),     // 17
   1437 //zz    //   (__NR_oldstat,           sys_stat),           // 18 (obsolete)
   1438    LINX_(__NR_lseek,             sys_lseek),          // 19
   1439 
   1440    GENX_(__NR_getpid,            sys_getpid),         // 20
   1441    LINX_(__NR_mount,             sys_mount),          // 21
   1442    LINX_(__NR_umount,            sys_oldumount),      // 22
   1443    LINX_(__NR_setuid,            sys_setuid16),       // 23 ## P
   1444    LINX_(__NR_getuid,            sys_getuid16),       // 24 ## P
   1445 
   1446    LINX_(__NR_stime,             sys_stime),          // 25 * (SVr4,SVID,X/OPEN)
   1447    PLAXY(__NR_ptrace,            sys_ptrace),         // 26
   1448    GENX_(__NR_alarm,             sys_alarm),          // 27
   1449 //zz    //   (__NR_oldfstat,          sys_fstat),          // 28 * L -- obsolete
   1450    GENX_(__NR_pause,             sys_pause),          // 29
   1451 
   1452    LINX_(__NR_utime,             sys_utime),          // 30
   1453    GENX_(__NR_stty,              sys_ni_syscall),     // 31
   1454    GENX_(__NR_gtty,              sys_ni_syscall),     // 32
   1455    GENX_(__NR_access,            sys_access),         // 33
   1456    GENX_(__NR_nice,              sys_nice),           // 34
   1457 
   1458    GENX_(__NR_ftime,             sys_ni_syscall),     // 35
   1459    GENX_(__NR_sync,              sys_sync),           // 36
   1460    GENX_(__NR_kill,              sys_kill),           // 37
   1461    GENX_(__NR_rename,            sys_rename),         // 38
   1462    GENX_(__NR_mkdir,             sys_mkdir),          // 39
   1463 
   1464    GENX_(__NR_rmdir,             sys_rmdir),          // 40
   1465    GENXY(__NR_dup,               sys_dup),            // 41
   1466    LINXY(__NR_pipe,              sys_pipe),           // 42
   1467    GENXY(__NR_times,             sys_times),          // 43
   1468    GENX_(__NR_prof,              sys_ni_syscall),     // 44
   1469 //zz
   1470    GENX_(__NR_brk,               sys_brk),            // 45
   1471    LINX_(__NR_setgid,            sys_setgid16),       // 46
   1472    LINX_(__NR_getgid,            sys_getgid16),       // 47
   1473 //zz    //   (__NR_signal,            sys_signal),         // 48 */* (ANSI C)
   1474    LINX_(__NR_geteuid,           sys_geteuid16),      // 49
   1475 
   1476    LINX_(__NR_getegid,           sys_getegid16),      // 50
   1477    GENX_(__NR_acct,              sys_acct),           // 51
   1478    LINX_(__NR_umount2,           sys_umount),         // 52
   1479    GENX_(__NR_lock,              sys_ni_syscall),     // 53
   1480    LINXY(__NR_ioctl,             sys_ioctl),          // 54
   1481 
   1482    LINXY(__NR_fcntl,             sys_fcntl),          // 55
   1483    GENX_(__NR_mpx,               sys_ni_syscall),     // 56
   1484    GENX_(__NR_setpgid,           sys_setpgid),        // 57
   1485    GENX_(__NR_ulimit,            sys_ni_syscall),     // 58
   1486 //zz    //   (__NR_oldolduname,       sys_olduname),       // 59 Linux -- obsolete
   1487 //zz
   1488    GENX_(__NR_umask,             sys_umask),          // 60
   1489    GENX_(__NR_chroot,            sys_chroot),         // 61
   1490 //zz    //   (__NR_ustat,             sys_ustat)           // 62 SVr4 -- deprecated
   1491    GENXY(__NR_dup2,              sys_dup2),           // 63
   1492    GENX_(__NR_getppid,           sys_getppid),        // 64
   1493 
   1494    GENX_(__NR_getpgrp,           sys_getpgrp),        // 65
   1495    GENX_(__NR_setsid,            sys_setsid),         // 66
   1496    LINXY(__NR_sigaction,         sys_sigaction),      // 67
   1497 //zz    //   (__NR_sgetmask,          sys_sgetmask),       // 68 */* (ANSI C)
   1498 //zz    //   (__NR_ssetmask,          sys_ssetmask),       // 69 */* (ANSI C)
   1499 //zz
   1500    LINX_(__NR_setreuid,          sys_setreuid16),     // 70
   1501    LINX_(__NR_setregid,          sys_setregid16),     // 71
   1502    PLAX_(__NR_sigsuspend,        sys_sigsuspend),     // 72
   1503    LINXY(__NR_sigpending,        sys_sigpending),     // 73
   1504    GENX_(__NR_sethostname,       sys_sethostname),    // 74
   1505 //zz
   1506    GENX_(__NR_setrlimit,         sys_setrlimit),      // 75
   1507    GENXY(__NR_getrlimit,         sys_old_getrlimit),  // 76
   1508    GENXY(__NR_getrusage,         sys_getrusage),      // 77
   1509    GENXY(__NR_gettimeofday,      sys_gettimeofday),   // 78
   1510    GENX_(__NR_settimeofday,      sys_settimeofday),   // 79
   1511 
   1512    LINXY(__NR_getgroups,         sys_getgroups16),    // 80
   1513    LINX_(__NR_setgroups,         sys_setgroups16),    // 81
   1514    PLAX_(__NR_select,            old_select),         // 82
   1515    GENX_(__NR_symlink,           sys_symlink),        // 83
   1516 //zz    //   (__NR_oldlstat,          sys_lstat),          // 84 -- obsolete
   1517 //zz
   1518    GENX_(__NR_readlink,          sys_readlink),       // 85
   1519 //zz    //   (__NR_uselib,            sys_uselib),         // 86 */Linux
   1520 //zz    //   (__NR_swapon,            sys_swapon),         // 87 */Linux
   1521 //zz    //   (__NR_reboot,            sys_reboot),         // 88 */Linux
   1522 //zz    //   (__NR_readdir,           old_readdir),        // 89 -- superseded
   1523 //zz
   1524    PLAX_(__NR_mmap,              old_mmap),           // 90
   1525    GENXY(__NR_munmap,            sys_munmap),         // 91
   1526    GENX_(__NR_truncate,          sys_truncate),       // 92
   1527    GENX_(__NR_ftruncate,         sys_ftruncate),      // 93
   1528    GENX_(__NR_fchmod,            sys_fchmod),         // 94
   1529 
   1530    LINX_(__NR_fchown,            sys_fchown16),       // 95
   1531    GENX_(__NR_getpriority,       sys_getpriority),    // 96
   1532    GENX_(__NR_setpriority,       sys_setpriority),    // 97
   1533    GENX_(__NR_profil,            sys_ni_syscall),     // 98
   1534    GENXY(__NR_statfs,            sys_statfs),         // 99
   1535 
   1536    GENXY(__NR_fstatfs,           sys_fstatfs),        // 100
   1537    LINX_(__NR_ioperm,            sys_ioperm),         // 101
   1538    LINXY(__NR_socketcall,        sys_socketcall),     // 102 x86/Linux-only
   1539    LINXY(__NR_syslog,            sys_syslog),         // 103
   1540    GENXY(__NR_setitimer,         sys_setitimer),      // 104
   1541 
   1542    GENXY(__NR_getitimer,         sys_getitimer),      // 105
   1543    GENXY(__NR_stat,              sys_newstat),        // 106
   1544    GENXY(__NR_lstat,             sys_newlstat),       // 107
   1545    GENXY(__NR_fstat,             sys_newfstat),       // 108
   1546 //zz    //   (__NR_olduname,          sys_uname),          // 109 -- obsolete
   1547 //zz
   1548    GENX_(__NR_iopl,              sys_iopl),           // 110
   1549    LINX_(__NR_vhangup,           sys_vhangup),        // 111
   1550    GENX_(__NR_idle,              sys_ni_syscall),     // 112
   1551    PLAXY(__NR_vm86old,           sys_vm86old),        // 113 x86/Linux-only
   1552    GENXY(__NR_wait4,             sys_wait4),          // 114
   1553 //zz
   1554 //zz    //   (__NR_swapoff,           sys_swapoff),        // 115 */Linux
   1555    LINXY(__NR_sysinfo,           sys_sysinfo),        // 116
   1556    LINXY(__NR_ipc,               sys_ipc),            // 117
   1557    GENX_(__NR_fsync,             sys_fsync),          // 118
   1558    PLAX_(__NR_sigreturn,         sys_sigreturn),      // 119 ?/Linux
   1559 
   1560    PLAX_(__NR_clone,             sys_clone),          // 120
   1561 //zz    //   (__NR_setdomainname,     sys_setdomainname),  // 121 */*(?)
   1562    GENXY(__NR_uname,             sys_newuname),       // 122
   1563    PLAX_(__NR_modify_ldt,        sys_modify_ldt),     // 123
   1564    LINXY(__NR_adjtimex,          sys_adjtimex),       // 124
   1565 
   1566    GENXY(__NR_mprotect,          sys_mprotect),       // 125
   1567    LINXY(__NR_sigprocmask,       sys_sigprocmask),    // 126
   1568 //zz    // Nb: create_module() was removed 2.4-->2.6
   1569    GENX_(__NR_create_module,     sys_ni_syscall),     // 127
   1570    LINX_(__NR_init_module,       sys_init_module),    // 128
   1571    LINX_(__NR_delete_module,     sys_delete_module),  // 129
   1572 //zz
   1573 //zz    // Nb: get_kernel_syms() was removed 2.4-->2.6
   1574    GENX_(__NR_get_kernel_syms,   sys_ni_syscall),     // 130
   1575    LINX_(__NR_quotactl,          sys_quotactl),       // 131
   1576    GENX_(__NR_getpgid,           sys_getpgid),        // 132
   1577    GENX_(__NR_fchdir,            sys_fchdir),         // 133
   1578 //zz    //   (__NR_bdflush,           sys_bdflush),        // 134 */Linux
   1579 //zz
   1580 //zz    //   (__NR_sysfs,             sys_sysfs),          // 135 SVr4
   1581    LINX_(__NR_personality,       sys_personality),    // 136
   1582    GENX_(__NR_afs_syscall,       sys_ni_syscall),     // 137
   1583    LINX_(__NR_setfsuid,          sys_setfsuid16),     // 138
   1584    LINX_(__NR_setfsgid,          sys_setfsgid16),     // 139
   1585 
   1586    LINXY(__NR__llseek,           sys_llseek),         // 140
   1587    GENXY(__NR_getdents,          sys_getdents),       // 141
   1588    GENX_(__NR__newselect,        sys_select),         // 142
   1589    GENX_(__NR_flock,             sys_flock),          // 143
   1590    GENX_(__NR_msync,             sys_msync),          // 144
   1591 
   1592    GENXY(__NR_readv,             sys_readv),          // 145
   1593    GENX_(__NR_writev,            sys_writev),         // 146
   1594    GENX_(__NR_getsid,            sys_getsid),         // 147
   1595    GENX_(__NR_fdatasync,         sys_fdatasync),      // 148
   1596    LINXY(__NR__sysctl,           sys_sysctl),         // 149
   1597 
   1598    GENX_(__NR_mlock,             sys_mlock),          // 150
   1599    GENX_(__NR_munlock,           sys_munlock),        // 151
   1600    GENX_(__NR_mlockall,          sys_mlockall),       // 152
   1601    LINX_(__NR_munlockall,        sys_munlockall),     // 153
   1602    LINXY(__NR_sched_setparam,    sys_sched_setparam), // 154
   1603 
   1604    LINXY(__NR_sched_getparam,         sys_sched_getparam),        // 155
   1605    LINX_(__NR_sched_setscheduler,     sys_sched_setscheduler),    // 156
   1606    LINX_(__NR_sched_getscheduler,     sys_sched_getscheduler),    // 157
   1607    LINX_(__NR_sched_yield,            sys_sched_yield),           // 158
   1608    LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
   1609 
   1610    LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
   1611    LINXY(__NR_sched_rr_get_interval,  sys_sched_rr_get_interval), // 161
   1612    GENXY(__NR_nanosleep,         sys_nanosleep),      // 162
   1613    GENX_(__NR_mremap,            sys_mremap),         // 163
   1614    LINX_(__NR_setresuid,         sys_setresuid16),    // 164
   1615 
   1616    LINXY(__NR_getresuid,         sys_getresuid16),    // 165
   1617    PLAXY(__NR_vm86,              sys_vm86),           // 166 x86/Linux-only
   1618    GENX_(__NR_query_module,      sys_ni_syscall),     // 167
   1619    GENXY(__NR_poll,              sys_poll),           // 168
   1620 //zz    //   (__NR_nfsservctl,        sys_nfsservctl),     // 169 */Linux
   1621 //zz
   1622    LINX_(__NR_setresgid,         sys_setresgid16),    // 170
   1623    LINXY(__NR_getresgid,         sys_getresgid16),    // 171
   1624    LINXY(__NR_prctl,             sys_prctl),          // 172
   1625    PLAX_(__NR_rt_sigreturn,      sys_rt_sigreturn),   // 173 x86/Linux only?
   1626    LINXY(__NR_rt_sigaction,      sys_rt_sigaction),   // 174
   1627 
   1628    LINXY(__NR_rt_sigprocmask,    sys_rt_sigprocmask), // 175
   1629    LINXY(__NR_rt_sigpending,     sys_rt_sigpending),  // 176
   1630    LINXY(__NR_rt_sigtimedwait,   sys_rt_sigtimedwait),// 177
   1631    LINXY(__NR_rt_sigqueueinfo,   sys_rt_sigqueueinfo),// 178
   1632    LINX_(__NR_rt_sigsuspend,     sys_rt_sigsuspend),  // 179
   1633 
   1634    GENXY(__NR_pread64,           sys_pread64),        // 180
   1635    GENX_(__NR_pwrite64,          sys_pwrite64),       // 181
   1636    LINX_(__NR_chown,             sys_chown16),        // 182
   1637    GENXY(__NR_getcwd,            sys_getcwd),         // 183
   1638    LINXY(__NR_capget,            sys_capget),         // 184
   1639 
   1640    LINX_(__NR_capset,            sys_capset),         // 185
   1641    GENXY(__NR_sigaltstack,       sys_sigaltstack),    // 186
   1642    LINXY(__NR_sendfile,          sys_sendfile),       // 187
   1643    GENXY(__NR_getpmsg,           sys_getpmsg),        // 188
   1644    GENX_(__NR_putpmsg,           sys_putpmsg),        // 189
   1645 
   1646    // Nb: we treat vfork as fork
   1647    GENX_(__NR_vfork,             sys_fork),           // 190
   1648    GENXY(__NR_ugetrlimit,        sys_getrlimit),      // 191
   1649    PLAX_(__NR_mmap2,             sys_mmap2),          // 192
   1650    GENX_(__NR_truncate64,        sys_truncate64),     // 193
   1651    GENX_(__NR_ftruncate64,       sys_ftruncate64),    // 194
   1652 
   1653    PLAXY(__NR_stat64,            sys_stat64),         // 195
   1654    PLAXY(__NR_lstat64,           sys_lstat64),        // 196
   1655    PLAXY(__NR_fstat64,           sys_fstat64),        // 197
   1656    GENX_(__NR_lchown32,          sys_lchown),         // 198
   1657    GENX_(__NR_getuid32,          sys_getuid),         // 199
   1658 
   1659    GENX_(__NR_getgid32,          sys_getgid),         // 200
   1660    GENX_(__NR_geteuid32,         sys_geteuid),        // 201
   1661    GENX_(__NR_getegid32,         sys_getegid),        // 202
   1662    GENX_(__NR_setreuid32,        sys_setreuid),       // 203
   1663    GENX_(__NR_setregid32,        sys_setregid),       // 204
   1664 
   1665    GENXY(__NR_getgroups32,       sys_getgroups),      // 205
   1666    GENX_(__NR_setgroups32,       sys_setgroups),      // 206
   1667    GENX_(__NR_fchown32,          sys_fchown),         // 207
   1668    LINX_(__NR_setresuid32,       sys_setresuid),      // 208
   1669    LINXY(__NR_getresuid32,       sys_getresuid),      // 209
   1670 
   1671    LINX_(__NR_setresgid32,       sys_setresgid),      // 210
   1672    LINXY(__NR_getresgid32,       sys_getresgid),      // 211
   1673    GENX_(__NR_chown32,           sys_chown),          // 212
   1674    GENX_(__NR_setuid32,          sys_setuid),         // 213
   1675    GENX_(__NR_setgid32,          sys_setgid),         // 214
   1676 
   1677    LINX_(__NR_setfsuid32,        sys_setfsuid),       // 215
   1678    LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
   1679 //zz    //   (__NR_pivot_root,        sys_pivot_root),     // 217 */Linux
   1680    GENXY(__NR_mincore,           sys_mincore),        // 218
   1681    GENX_(__NR_madvise,           sys_madvise),        // 219
   1682 
   1683    GENXY(__NR_getdents64,        sys_getdents64),     // 220
   1684    LINXY(__NR_fcntl64,           sys_fcntl64),        // 221
   1685    GENX_(222,                    sys_ni_syscall),     // 222
   1686    PLAXY(223,                    sys_syscall223),     // 223 // sys_bproc?
   1687    LINX_(__NR_gettid,            sys_gettid),         // 224
   1688 
   1689    LINX_(__NR_readahead,         sys_readahead),      // 225 */Linux
   1690    LINX_(__NR_setxattr,          sys_setxattr),       // 226
   1691    LINX_(__NR_lsetxattr,         sys_lsetxattr),      // 227
   1692    LINX_(__NR_fsetxattr,         sys_fsetxattr),      // 228
   1693    LINXY(__NR_getxattr,          sys_getxattr),       // 229
   1694 
   1695    LINXY(__NR_lgetxattr,         sys_lgetxattr),      // 230
   1696    LINXY(__NR_fgetxattr,         sys_fgetxattr),      // 231
   1697    LINXY(__NR_listxattr,         sys_listxattr),      // 232
   1698    LINXY(__NR_llistxattr,        sys_llistxattr),     // 233
   1699    LINXY(__NR_flistxattr,        sys_flistxattr),     // 234
   1700 
   1701    LINX_(__NR_removexattr,       sys_removexattr),    // 235
   1702    LINX_(__NR_lremovexattr,      sys_lremovexattr),   // 236
   1703    LINX_(__NR_fremovexattr,      sys_fremovexattr),   // 237
   1704    LINXY(__NR_tkill,             sys_tkill),          // 238 */Linux
   1705    LINXY(__NR_sendfile64,        sys_sendfile64),     // 239
   1706 
   1707    LINXY(__NR_futex,             sys_futex),             // 240
   1708    LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
   1709    LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
   1710    PLAX_(__NR_set_thread_area,   sys_set_thread_area),   // 243
   1711    PLAX_(__NR_get_thread_area,   sys_get_thread_area),   // 244
   1712 
   1713    LINXY(__NR_io_setup,          sys_io_setup),       // 245
   1714    LINX_(__NR_io_destroy,        sys_io_destroy),     // 246
   1715    LINXY(__NR_io_getevents,      sys_io_getevents),   // 247
   1716    LINX_(__NR_io_submit,         sys_io_submit),      // 248
   1717    LINXY(__NR_io_cancel,         sys_io_cancel),      // 249
   1718 
   1719    LINX_(__NR_fadvise64,         sys_fadvise64),      // 250 */(Linux?)
   1720    GENX_(251,                    sys_ni_syscall),     // 251
   1721    LINX_(__NR_exit_group,        sys_exit_group),     // 252
   1722    LINXY(__NR_lookup_dcookie,    sys_lookup_dcookie), // 253
   1723    LINXY(__NR_epoll_create,      sys_epoll_create),   // 254
   1724 
   1725    LINX_(__NR_epoll_ctl,         sys_epoll_ctl),         // 255
   1726    LINXY(__NR_epoll_wait,        sys_epoll_wait),        // 256
   1727 //zz    //   (__NR_remap_file_pages,  sys_remap_file_pages),  // 257 */Linux
   1728    LINX_(__NR_set_tid_address,   sys_set_tid_address),   // 258
   1729    LINXY(__NR_timer_create,      sys_timer_create),      // 259
   1730 
   1731    LINXY(__NR_timer_settime,     sys_timer_settime),  // (timer_create+1)
   1732    LINXY(__NR_timer_gettime,     sys_timer_gettime),  // (timer_create+2)
   1733    LINX_(__NR_timer_getoverrun,  sys_timer_getoverrun),//(timer_create+3)
   1734    LINX_(__NR_timer_delete,      sys_timer_delete),   // (timer_create+4)
   1735    LINX_(__NR_clock_settime,     sys_clock_settime),  // (timer_create+5)
   1736 
   1737    LINXY(__NR_clock_gettime,     sys_clock_gettime),  // (timer_create+6)
   1738    LINXY(__NR_clock_getres,      sys_clock_getres),   // (timer_create+7)
   1739    LINXY(__NR_clock_nanosleep,   sys_clock_nanosleep),// (timer_create+8) */*
   1740    GENXY(__NR_statfs64,          sys_statfs64),       // 268
   1741    GENXY(__NR_fstatfs64,         sys_fstatfs64),      // 269
   1742 
   1743    LINX_(__NR_tgkill,            sys_tgkill),         // 270 */Linux
   1744    GENX_(__NR_utimes,            sys_utimes),         // 271
   1745    LINX_(__NR_fadvise64_64,      sys_fadvise64_64),   // 272 */(Linux?)
   1746    GENX_(__NR_vserver,           sys_ni_syscall),     // 273
   1747    LINX_(__NR_mbind,             sys_mbind),          // 274 ?/?
   1748 
   1749    LINXY(__NR_get_mempolicy,     sys_get_mempolicy),  // 275 ?/?
   1750    LINX_(__NR_set_mempolicy,     sys_set_mempolicy),  // 276 ?/?
   1751    LINXY(__NR_mq_open,           sys_mq_open),        // 277
   1752    LINX_(__NR_mq_unlink,         sys_mq_unlink),      // (mq_open+1)
   1753    LINX_(__NR_mq_timedsend,      sys_mq_timedsend),   // (mq_open+2)
   1754 
   1755    LINXY(__NR_mq_timedreceive,   sys_mq_timedreceive),// (mq_open+3)
   1756    LINX_(__NR_mq_notify,         sys_mq_notify),      // (mq_open+4)
   1757    LINXY(__NR_mq_getsetattr,     sys_mq_getsetattr),  // (mq_open+5)
   1758    GENX_(__NR_sys_kexec_load,    sys_ni_syscall),     // 283
   1759    LINXY(__NR_waitid,            sys_waitid),         // 284
   1760 
   1761    GENX_(285,                    sys_ni_syscall),     // 285
   1762    LINX_(__NR_add_key,           sys_add_key),        // 286
   1763    LINX_(__NR_request_key,       sys_request_key),    // 287
   1764    LINXY(__NR_keyctl,            sys_keyctl),         // 288
   1765    LINX_(__NR_ioprio_set,        sys_ioprio_set),     // 289
   1766 
   1767    LINX_(__NR_ioprio_get,        sys_ioprio_get),     // 290
   1768    LINX_(__NR_inotify_init,	 sys_inotify_init),   // 291
   1769    LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
   1770    LINX_(__NR_inotify_rm_watch,	 sys_inotify_rm_watch), // 293
   1771 //   LINX_(__NR_migrate_pages,	 sys_migrate_pages),    // 294
   1772 
   1773    LINXY(__NR_openat,		 sys_openat),           // 295
   1774    LINX_(__NR_mkdirat,		 sys_mkdirat),          // 296
   1775    LINX_(__NR_mknodat,		 sys_mknodat),          // 297
   1776    LINX_(__NR_fchownat,		 sys_fchownat),         // 298
   1777    LINX_(__NR_futimesat,	 sys_futimesat),        // 299
   1778 
   1779    PLAXY(__NR_fstatat64,	 sys_fstatat64),        // 300
   1780    LINX_(__NR_unlinkat,		 sys_unlinkat),         // 301
   1781    LINX_(__NR_renameat,		 sys_renameat),         // 302
   1782    LINX_(__NR_linkat,		 sys_linkat),           // 303
   1783    LINX_(__NR_symlinkat,	 sys_symlinkat),        // 304
   1784 
   1785    LINX_(__NR_readlinkat,	 sys_readlinkat),       // 305
   1786    LINX_(__NR_fchmodat,		 sys_fchmodat),         // 306
   1787    LINX_(__NR_faccessat,	 sys_faccessat),        // 307
   1788    LINX_(__NR_pselect6,		 sys_pselect6),         // 308
   1789    LINXY(__NR_ppoll,		 sys_ppoll),            // 309
   1790 
   1791 //   LINX_(__NR_unshare,		 sys_unshare),          // 310
   1792    LINX_(__NR_set_robust_list,	 sys_set_robust_list),  // 311
   1793    LINXY(__NR_get_robust_list,	 sys_get_robust_list),  // 312
   1794    LINX_(__NR_splice,            sys_splice),           // 313
   1795    LINX_(__NR_sync_file_range,   sys_sync_file_range),  // 314
   1796 
   1797    LINX_(__NR_tee,               sys_tee),              // 315
   1798    LINXY(__NR_vmsplice,          sys_vmsplice),         // 316
   1799    LINXY(__NR_move_pages,        sys_move_pages),       // 317
   1800    LINXY(__NR_getcpu,            sys_getcpu),           // 318
   1801    LINXY(__NR_epoll_pwait,       sys_epoll_pwait),      // 319
   1802 
   1803    LINX_(__NR_utimensat,         sys_utimensat),        // 320
   1804    LINXY(__NR_signalfd,          sys_signalfd),         // 321
   1805    LINXY(__NR_timerfd_create,    sys_timerfd_create),   // 322
   1806    LINXY(__NR_eventfd,           sys_eventfd),          // 323
   1807    LINX_(__NR_fallocate,         sys_fallocate),        // 324
   1808 
   1809    LINXY(__NR_timerfd_settime,   sys_timerfd_settime),  // 325
   1810    LINXY(__NR_timerfd_gettime,   sys_timerfd_gettime),  // 326
   1811    LINXY(__NR_signalfd4,         sys_signalfd4),        // 327
   1812    LINXY(__NR_eventfd2,          sys_eventfd2),         // 328
   1813    LINXY(__NR_epoll_create1,     sys_epoll_create1),     // 329
   1814 
   1815    LINXY(__NR_dup3,              sys_dup3),             // 330
   1816    LINXY(__NR_pipe2,             sys_pipe2),            // 331
   1817    LINXY(__NR_inotify_init1,     sys_inotify_init1),    // 332
   1818    LINXY(__NR_preadv,            sys_preadv),           // 333
   1819    LINX_(__NR_pwritev,           sys_pwritev),          // 334
   1820 
   1821    LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
   1822    LINXY(__NR_perf_event_open,   sys_perf_event_open),  // 336
   1823    LINXY(__NR_recvmmsg,          sys_recvmmsg),         // 337
   1824    LINXY(__NR_fanotify_init,     sys_fanotify_init),    // 338
   1825    LINX_(__NR_fanotify_mark,     sys_fanotify_mark),    // 339
   1826 
   1827    LINXY(__NR_prlimit64,         sys_prlimit64),        // 340
   1828    LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 341
   1829    LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),// 342
   1830    LINXY(__NR_clock_adjtime,     sys_clock_adjtime),    // 343
   1831 //   LINX_(__NR_syncfs,            sys_ni_syscall),       // 344
   1832 
   1833    LINXY(__NR_sendmmsg,          sys_sendmmsg),         // 345
   1834 //   LINX_(__NR_setns,             sys_ni_syscall),       // 346
   1835    LINXY(__NR_process_vm_readv,  sys_process_vm_readv), // 347
   1836    LINX_(__NR_process_vm_writev, sys_process_vm_writev) // 348
   1837 };
   1838 
   1839 SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
   1840 {
   1841    const UInt syscall_table_size
   1842       = sizeof(syscall_table) / sizeof(syscall_table[0]);
   1843 
   1844    /* Is it in the contiguous initial section of the table? */
   1845    if (sysno < syscall_table_size) {
   1846       SyscallTableEntry* sys = &syscall_table[sysno];
   1847       if (sys->before == NULL)
   1848          return NULL; /* no entry */
   1849       else
   1850          return sys;
   1851    }
   1852 
   1853    /* Can't find a wrapper */
   1854    return NULL;
   1855 }
   1856 
   1857 #endif // defined(VGP_x86_linux)
   1858 
   1859 /*--------------------------------------------------------------------*/
   1860 /*--- end                                                          ---*/
   1861 /*--------------------------------------------------------------------*/
   1862