Home | History | Annotate | Download | only in coregrind
      1 /*--------------------------------------------------------------------*/
      2 /*--- Machine-related stuff.                           m_machine.c ---*/
      3 /*--------------------------------------------------------------------*/
      4 
      5 /*
      6    This file is part of Valgrind, a dynamic binary instrumentation
      7    framework.
      8 
      9    Copyright (C) 2000-2011 Julian Seward
     10       jseward (at) acm.org
     11 
     12    This program is free software; you can redistribute it and/or
     13    modify it under the terms of the GNU General Public License as
     14    published by the Free Software Foundation; either version 2 of the
     15    License, or (at your option) any later version.
     16 
     17    This program is distributed in the hope that it will be useful, but
     18    WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    General Public License for more details.
     21 
     22    You should have received a copy of the GNU General Public License
     23    along with this program; if not, write to the Free Software
     24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     25    02111-1307, USA.
     26 
     27    The GNU General Public License is contained in the file COPYING.
     28 */
     29 
     30 #include "pub_core_basics.h"
     31 #include "pub_core_vki.h"
     32 #include "pub_core_libcsetjmp.h"   // setjmp facilities
     33 #include "pub_core_threadstate.h"
     34 #include "pub_core_libcassert.h"
     35 #include "pub_core_libcbase.h"
     36 #include "pub_core_libcfile.h"
     37 #include "pub_core_mallocfree.h"
     38 #include "pub_core_machine.h"
     39 #include "pub_core_cpuid.h"
     40 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
     41 #include "pub_core_debuglog.h"
     42 
     43 
     44 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
     45 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
     46 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
     47 
     48 Addr VG_(get_IP) ( ThreadId tid ) {
     49    return INSTR_PTR( VG_(threads)[tid].arch );
     50 }
     51 Addr VG_(get_SP) ( ThreadId tid ) {
     52    return STACK_PTR( VG_(threads)[tid].arch );
     53 }
     54 Addr VG_(get_FP) ( ThreadId tid ) {
     55    return FRAME_PTR( VG_(threads)[tid].arch );
     56 }
     57 
     58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
     59    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
     60 }
     61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
     62    STACK_PTR( VG_(threads)[tid].arch ) = sp;
     63 }
     64 
     65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
     66                                 ThreadId tid )
     67 {
     68 #  if defined(VGA_x86)
     69    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
     70    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
     71    regs->misc.X86.r_ebp
     72       = VG_(threads)[tid].arch.vex.guest_EBP;
     73 #  elif defined(VGA_amd64)
     74    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
     75    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
     76    regs->misc.AMD64.r_rbp
     77       = VG_(threads)[tid].arch.vex.guest_RBP;
     78 #  elif defined(VGA_ppc32)
     79    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
     80    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
     81    regs->misc.PPC32.r_lr
     82       = VG_(threads)[tid].arch.vex.guest_LR;
     83 #  elif defined(VGA_ppc64)
     84    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
     85    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
     86    regs->misc.PPC64.r_lr
     87       = VG_(threads)[tid].arch.vex.guest_LR;
     88 #  elif defined(VGA_arm)
     89    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
     90    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
     91    regs->misc.ARM.r14
     92       = VG_(threads)[tid].arch.vex.guest_R14;
     93    regs->misc.ARM.r12
     94       = VG_(threads)[tid].arch.vex.guest_R12;
     95    regs->misc.ARM.r11
     96       = VG_(threads)[tid].arch.vex.guest_R11;
     97    regs->misc.ARM.r7
     98       = VG_(threads)[tid].arch.vex.guest_R7;
     99 #  elif defined(VGA_s390x)
    100    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
    101    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
    102    regs->misc.S390X.r_fp
    103       = VG_(threads)[tid].arch.vex.guest_r11;
    104    regs->misc.S390X.r_lr
    105       = VG_(threads)[tid].arch.vex.guest_r14;
    106 #  else
    107 #    error "Unknown arch"
    108 #  endif
    109 }
    110 
    111 
    112 void VG_(set_syscall_return_shadows) ( ThreadId tid,
    113                                        /* shadow vals for the result */
    114                                        UWord s1res, UWord s2res,
    115                                        /* shadow vals for the error val */
    116                                        UWord s1err, UWord s2err )
    117 {
    118 #  if defined(VGP_x86_linux)
    119    VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
    120    VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
    121 #  elif defined(VGP_amd64_linux)
    122    VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
    123    VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
    124 #  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
    125    VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
    126    VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
    127 #  elif defined(VGP_arm_linux)
    128    VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
    129    VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
    130 #  elif defined(VGO_darwin)
    131    // GrP fixme darwin syscalls may return more values (2 registers plus error)
    132 #  elif defined(VGP_s390x_linux)
    133    VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
    134    VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
    135 #  else
    136 #    error "Unknown plat"
    137 #  endif
    138 }
    139 
    140 void
    141 VG_(get_shadow_regs_area) ( ThreadId tid,
    142                             /*DST*/UChar* dst,
    143                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
    144 {
    145    void*        src;
    146    ThreadState* tst;
    147    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    148    vg_assert(VG_(is_valid_tid)(tid));
    149    // Bounds check
    150    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    151    vg_assert(offset + size <= sizeof(VexGuestArchState));
    152    // Copy
    153    tst = & VG_(threads)[tid];
    154    src = NULL;
    155    switch (shadowNo) {
    156       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    157       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    158       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    159    }
    160    tl_assert(src != NULL);
    161    VG_(memcpy)( dst, src, size);
    162 }
    163 
    164 void
    165 VG_(set_shadow_regs_area) ( ThreadId tid,
    166                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
    167                             /*SRC*/const UChar* src )
    168 {
    169    void*        dst;
    170    ThreadState* tst;
    171    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    172    vg_assert(VG_(is_valid_tid)(tid));
    173    // Bounds check
    174    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    175    vg_assert(offset + size <= sizeof(VexGuestArchState));
    176    // Copy
    177    tst = & VG_(threads)[tid];
    178    dst = NULL;
    179    switch (shadowNo) {
    180       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    181       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    182       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    183    }
    184    tl_assert(dst != NULL);
    185    VG_(memcpy)( dst, src, size);
    186 }
    187 
    188 
    189 static void apply_to_GPs_of_tid(VexGuestArchState* vex, void (*f)(Addr))
    190 {
    191 #if defined(VGA_x86)
    192    (*f)(vex->guest_EAX);
    193    (*f)(vex->guest_ECX);
    194    (*f)(vex->guest_EDX);
    195    (*f)(vex->guest_EBX);
    196    (*f)(vex->guest_ESI);
    197    (*f)(vex->guest_EDI);
    198    (*f)(vex->guest_ESP);
    199    (*f)(vex->guest_EBP);
    200 #elif defined(VGA_amd64)
    201    (*f)(vex->guest_RAX);
    202    (*f)(vex->guest_RCX);
    203    (*f)(vex->guest_RDX);
    204    (*f)(vex->guest_RBX);
    205    (*f)(vex->guest_RSI);
    206    (*f)(vex->guest_RDI);
    207    (*f)(vex->guest_RSP);
    208    (*f)(vex->guest_RBP);
    209    (*f)(vex->guest_R8);
    210    (*f)(vex->guest_R9);
    211    (*f)(vex->guest_R10);
    212    (*f)(vex->guest_R11);
    213    (*f)(vex->guest_R12);
    214    (*f)(vex->guest_R13);
    215    (*f)(vex->guest_R14);
    216    (*f)(vex->guest_R15);
    217 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
    218    (*f)(vex->guest_GPR0);
    219    (*f)(vex->guest_GPR1);
    220    (*f)(vex->guest_GPR2);
    221    (*f)(vex->guest_GPR3);
    222    (*f)(vex->guest_GPR4);
    223    (*f)(vex->guest_GPR5);
    224    (*f)(vex->guest_GPR6);
    225    (*f)(vex->guest_GPR7);
    226    (*f)(vex->guest_GPR8);
    227    (*f)(vex->guest_GPR9);
    228    (*f)(vex->guest_GPR10);
    229    (*f)(vex->guest_GPR11);
    230    (*f)(vex->guest_GPR12);
    231    (*f)(vex->guest_GPR13);
    232    (*f)(vex->guest_GPR14);
    233    (*f)(vex->guest_GPR15);
    234    (*f)(vex->guest_GPR16);
    235    (*f)(vex->guest_GPR17);
    236    (*f)(vex->guest_GPR18);
    237    (*f)(vex->guest_GPR19);
    238    (*f)(vex->guest_GPR20);
    239    (*f)(vex->guest_GPR21);
    240    (*f)(vex->guest_GPR22);
    241    (*f)(vex->guest_GPR23);
    242    (*f)(vex->guest_GPR24);
    243    (*f)(vex->guest_GPR25);
    244    (*f)(vex->guest_GPR26);
    245    (*f)(vex->guest_GPR27);
    246    (*f)(vex->guest_GPR28);
    247    (*f)(vex->guest_GPR29);
    248    (*f)(vex->guest_GPR30);
    249    (*f)(vex->guest_GPR31);
    250    (*f)(vex->guest_CTR);
    251    (*f)(vex->guest_LR);
    252 #elif defined(VGA_arm)
    253    (*f)(vex->guest_R0);
    254    (*f)(vex->guest_R1);
    255    (*f)(vex->guest_R2);
    256    (*f)(vex->guest_R3);
    257    (*f)(vex->guest_R4);
    258    (*f)(vex->guest_R5);
    259    (*f)(vex->guest_R6);
    260    (*f)(vex->guest_R8);
    261    (*f)(vex->guest_R9);
    262    (*f)(vex->guest_R10);
    263    (*f)(vex->guest_R11);
    264    (*f)(vex->guest_R12);
    265    (*f)(vex->guest_R13);
    266    (*f)(vex->guest_R14);
    267 #elif defined(VGA_s390x)
    268    (*f)(vex->guest_r0);
    269    (*f)(vex->guest_r1);
    270    (*f)(vex->guest_r2);
    271    (*f)(vex->guest_r3);
    272    (*f)(vex->guest_r4);
    273    (*f)(vex->guest_r5);
    274    (*f)(vex->guest_r6);
    275    (*f)(vex->guest_r7);
    276    (*f)(vex->guest_r8);
    277    (*f)(vex->guest_r9);
    278    (*f)(vex->guest_r10);
    279    (*f)(vex->guest_r11);
    280    (*f)(vex->guest_r12);
    281    (*f)(vex->guest_r13);
    282    (*f)(vex->guest_r14);
    283    (*f)(vex->guest_r15);
    284 #else
    285 #  error Unknown arch
    286 #endif
    287 }
    288 
    289 
    290 void VG_(apply_to_GP_regs)(void (*f)(UWord))
    291 {
    292    ThreadId tid;
    293 
    294    for (tid = 1; tid < VG_N_THREADS; tid++) {
    295       if (VG_(is_valid_tid)(tid)) {
    296          ThreadState* tst = VG_(get_ThreadState)(tid);
    297          apply_to_GPs_of_tid(&(tst->arch.vex), f);
    298       }
    299    }
    300 }
    301 
    302 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
    303 {
    304    *tid = (ThreadId)(-1);
    305 }
    306 
    307 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
    308                             /*OUT*/Addr* stack_min,
    309                             /*OUT*/Addr* stack_max)
    310 {
    311    ThreadId i;
    312    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
    313       if (i == VG_INVALID_THREADID)
    314          continue;
    315       if (VG_(threads)[i].status != VgTs_Empty) {
    316          *tid       = i;
    317          *stack_min = VG_(get_SP)(i);
    318          *stack_max = VG_(threads)[i].client_stack_highest_word;
    319          return True;
    320       }
    321    }
    322    return False;
    323 }
    324 
    325 Addr VG_(thread_get_stack_max)(ThreadId tid)
    326 {
    327    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    328    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    329    return VG_(threads)[tid].client_stack_highest_word;
    330 }
    331 
    332 SizeT VG_(thread_get_stack_size)(ThreadId tid)
    333 {
    334    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    335    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    336    return VG_(threads)[tid].client_stack_szB;
    337 }
    338 
    339 Addr VG_(thread_get_altstack_min)(ThreadId tid)
    340 {
    341    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    342    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    343    return (Addr)VG_(threads)[tid].altstack.ss_sp;
    344 }
    345 
    346 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
    347 {
    348    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    349    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    350    return VG_(threads)[tid].altstack.ss_size;
    351 }
    352 
    353 //-------------------------------------------------------------
    354 /* Details about the capabilities of the underlying (host) CPU.  These
    355    details are acquired by (1) enquiring with the CPU at startup, or
    356    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
    357    line size).  It's a bit nasty in the sense that there's no obvious
    358    way to stop uses of some of this info before it's ready to go.
    359 
    360    Current dependencies are:
    361 
    362    x86:   initially:  call VG_(machine_get_hwcaps)
    363 
    364           then safe to use VG_(machine_get_VexArchInfo)
    365                        and VG_(machine_x86_have_mxcsr)
    366    -------------
    367    amd64: initially:  call VG_(machine_get_hwcaps)
    368 
    369           then safe to use VG_(machine_get_VexArchInfo)
    370    -------------
    371    ppc32: initially:  call VG_(machine_get_hwcaps)
    372                       call VG_(machine_ppc32_set_clszB)
    373 
    374           then safe to use VG_(machine_get_VexArchInfo)
    375                        and VG_(machine_ppc32_has_FP)
    376                        and VG_(machine_ppc32_has_VMX)
    377    -------------
    378    ppc64: initially:  call VG_(machine_get_hwcaps)
    379                       call VG_(machine_ppc64_set_clszB)
    380 
    381           then safe to use VG_(machine_get_VexArchInfo)
    382                        and VG_(machine_ppc64_has_VMX)
    383 
    384    -------------
    385    s390x: initially:  call VG_(machine_get_hwcaps)
    386 
    387           then safe to use VG_(machine_get_VexArchInfo)
    388 
    389    VG_(machine_get_hwcaps) may use signals (although it attempts to
    390    leave signal state unchanged) and therefore should only be
    391    called before m_main sets up the client's signal state.
    392 */
    393 
    394 /* --------- State --------- */
    395 static Bool hwcaps_done = False;
    396 
    397 /* --- all archs --- */
    398 static VexArch     va;
    399 static VexArchInfo vai;
    400 
    401 #if defined(VGA_x86)
    402 UInt VG_(machine_x86_have_mxcsr) = 0;
    403 #endif
    404 #if defined(VGA_ppc32)
    405 UInt VG_(machine_ppc32_has_FP)  = 0;
    406 UInt VG_(machine_ppc32_has_VMX) = 0;
    407 #endif
    408 #if defined(VGA_ppc64)
    409 ULong VG_(machine_ppc64_has_VMX) = 0;
    410 #endif
    411 #if defined(VGA_arm)
    412 Int VG_(machine_arm_archlevel) = 4;
    413 #endif
    414 
    415 /* fixs390: anything for s390x here ? */
    416 
    417 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
    418    testing, so we need a VG_MINIMAL_JMP_BUF. */
    419 #if defined(VGA_ppc32) || defined(VGA_ppc64) \
    420     || defined(VGA_arm) || defined(VGA_s390x)
    421 #include "pub_tool_libcsetjmp.h"
    422 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
    423 static void handler_unsup_insn ( Int x ) {
    424    VG_MINIMAL_LONGJMP(env_unsup_insn);
    425 }
    426 #endif
    427 
    428 
    429 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
    430  * handlers are installed.  Determines the the sizes affected by dcbz
    431  * and dcbzl instructions and updates the given VexArchInfo structure
    432  * accordingly.
    433  *
    434  * Not very defensive: assumes that as long as the dcbz/dcbzl
    435  * instructions don't raise a SIGILL, that they will zero an aligned,
    436  * contiguous block of memory of a sensible size. */
    437 #if defined(VGA_ppc32) || defined(VGA_ppc64)
    438 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
    439 {
    440    Int dcbz_szB = 0;
    441    Int dcbzl_szB;
    442 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
    443    char test_block[4*MAX_DCBZL_SZB];
    444    char *aligned = test_block;
    445    Int i;
    446 
    447    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
    448    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
    449    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
    450 
    451    /* dcbz often clears 32B, although sometimes whatever the native cache
    452     * block size is */
    453    VG_(memset)(test_block, 0xff, sizeof(test_block));
    454    __asm__ __volatile__("dcbz 0,%0"
    455                         : /*out*/
    456                         : "r" (aligned) /*in*/
    457                         : "memory" /*clobber*/);
    458    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    459       if (!test_block[i])
    460          ++dcbz_szB;
    461    }
    462    vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
    463 
    464    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
    465    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    466       dcbzl_szB = 0; /* indicates unsupported */
    467    }
    468    else {
    469       VG_(memset)(test_block, 0xff, sizeof(test_block));
    470       /* some older assemblers won't understand the dcbzl instruction
    471        * variant, so we directly emit the instruction ourselves */
    472       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
    473                            : /*out*/
    474                            : "r" (aligned) /*in*/
    475                            : "memory", "r9" /*clobber*/);
    476       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    477          if (!test_block[i])
    478             ++dcbzl_szB;
    479       }
    480       vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
    481    }
    482 
    483    arch_info->ppc_dcbz_szB  = dcbz_szB;
    484    arch_info->ppc_dcbzl_szB = dcbzl_szB;
    485 
    486    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
    487                  dcbz_szB, dcbzl_szB);
    488 #  undef MAX_DCBZL_SZB
    489 }
    490 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
    491 
    492 #ifdef VGA_s390x
    493 
    494 /* Read /proc/cpuinfo. Look for lines like these
    495 
    496    processor 0: version = FF,  identification = 0117C9,  machine = 2064
    497 
    498    and return the machine model or VEX_S390X_MODEL_INVALID on error. */
    499 
    500 static UInt VG_(get_machine_model)(void)
    501 {
    502    static struct model_map {
    503       HChar name[5];
    504       UInt  id;
    505    } model_map[] = {
    506       { "2064", VEX_S390X_MODEL_Z900 },
    507       { "2066", VEX_S390X_MODEL_Z800 },
    508       { "2084", VEX_S390X_MODEL_Z990 },
    509       { "2086", VEX_S390X_MODEL_Z890 },
    510       { "2094", VEX_S390X_MODEL_Z9_EC },
    511       { "2096", VEX_S390X_MODEL_Z9_BC },
    512       { "2097", VEX_S390X_MODEL_Z10_EC },
    513       { "2098", VEX_S390X_MODEL_Z10_BC },
    514       { "2817", VEX_S390X_MODEL_Z196 },
    515       { "2818", VEX_S390X_MODEL_Z114 },
    516    };
    517 
    518    Int    model, n, fh;
    519    SysRes fd;
    520    SizeT  num_bytes, file_buf_size;
    521    HChar *p, *m, *model_name, *file_buf;
    522 
    523    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    524    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    525    if ( sr_isError(fd) ) return VEX_S390X_MODEL_INVALID;
    526 
    527    fh  = sr_Res(fd);
    528 
    529    /* Determine the size of /proc/cpuinfo.
    530       Work around broken-ness in /proc file system implementation.
    531       fstat returns a zero size for /proc/cpuinfo although it is
    532       claimed to be a regular file. */
    533    num_bytes = 0;
    534    file_buf_size = 1000;
    535    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    536    while (42) {
    537       n = VG_(read)(fh, file_buf, file_buf_size);
    538       if (n < 0) break;
    539 
    540       num_bytes += n;
    541       if (n < file_buf_size) break;  /* reached EOF */
    542    }
    543 
    544    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    545 
    546    if (num_bytes > file_buf_size) {
    547       VG_(free)( file_buf );
    548       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    549       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    550       n = VG_(read)( fh, file_buf, num_bytes );
    551       if (n < 0) num_bytes = 0;
    552    }
    553 
    554    file_buf[num_bytes] = '\0';
    555    VG_(close)(fh);
    556 
    557    /* Parse file */
    558    model = VEX_S390X_MODEL_INVALID;
    559    for (p = file_buf; *p; ++p) {
    560       /* Beginning of line */
    561      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
    562 
    563      m = VG_(strstr)( p, "machine" );
    564      if (m == NULL) continue;
    565 
    566      p = m + sizeof "machine" - 1;
    567      while ( VG_(isspace)( *p ) || *p == '=') {
    568        if (*p == '\n') goto next_line;
    569        ++p;
    570      }
    571 
    572      model_name = p;
    573      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
    574        struct model_map *mm = model_map + n;
    575        SizeT len = VG_(strlen)( mm->name );
    576        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
    577             VG_(isspace)( model_name[len] )) {
    578          if (mm->id < model) model = mm->id;
    579          p = model_name + len;
    580          break;
    581        }
    582      }
    583      /* Skip until end-of-line */
    584      while (*p != '\n')
    585        ++p;
    586    next_line: ;
    587    }
    588 
    589    VG_(free)( file_buf );
    590    VG_(debugLog)(1, "machine", "model = %s\n", model_map[model].name);
    591 
    592    return model;
    593 }
    594 
    595 #endif /* VGA_s390x */
    596 
    597 /* Determine what insn set and insn set variant the host has, and
    598    record it.  To be called once at system startup.  Returns False if
    599    this a CPU incapable of running Valgrind. */
    600 
    601 Bool VG_(machine_get_hwcaps)( void )
    602 {
    603    vg_assert(hwcaps_done == False);
    604    hwcaps_done = True;
    605 
    606    // Whack default settings into vai, so that we only need to fill in
    607    // any interesting bits.
    608    LibVEX_default_VexArchInfo(&vai);
    609 
    610 #if defined(VGA_x86)
    611    { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
    612      UInt eax, ebx, ecx, edx, max_extended;
    613      UChar vstr[13];
    614      vstr[0] = 0;
    615 
    616      if (!VG_(has_cpuid)())
    617         /* we can't do cpuid at all.  Give up. */
    618         return False;
    619 
    620      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    621      if (eax < 1)
    622         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    623         return False;
    624 
    625      /* Get processor ID string, and max basic/extended index
    626         values. */
    627      VG_(memcpy)(&vstr[0], &ebx, 4);
    628      VG_(memcpy)(&vstr[4], &edx, 4);
    629      VG_(memcpy)(&vstr[8], &ecx, 4);
    630      vstr[12] = 0;
    631 
    632      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    633      max_extended = eax;
    634 
    635      /* get capabilities bits into edx */
    636      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    637 
    638      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
    639      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
    640 
    641      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    642         must simply give up.  But all CPUs since Pentium-I have it, so
    643         that doesn't seem like much of a restriction. */
    644      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    645      if (!have_cx8)
    646         return False;
    647 
    648      /* Figure out if this is an AMD that can do LZCNT. */
    649      have_lzcnt = False;
    650      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
    651          && max_extended >= 0x80000001) {
    652         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    653         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    654      }
    655 
    656      if (have_sse2 && have_sse1) {
    657         va          = VexArchX86;
    658         vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
    659         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
    660         if (have_lzcnt)
    661            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
    662         VG_(machine_x86_have_mxcsr) = 1;
    663         return True;
    664      }
    665 
    666      if (have_sse1) {
    667         va          = VexArchX86;
    668         vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
    669         VG_(machine_x86_have_mxcsr) = 1;
    670         return True;
    671      }
    672 
    673      va         = VexArchX86;
    674      vai.hwcaps = 0; /*baseline - no sse at all*/
    675      VG_(machine_x86_have_mxcsr) = 0;
    676      return True;
    677    }
    678 
    679 #elif defined(VGA_amd64)
    680    { Bool have_sse3, have_cx8, have_cx16;
    681      Bool have_lzcnt;
    682      UInt eax, ebx, ecx, edx, max_extended;
    683      UChar vstr[13];
    684      vstr[0] = 0;
    685 
    686      if (!VG_(has_cpuid)())
    687         /* we can't do cpuid at all.  Give up. */
    688         return False;
    689 
    690      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    691      if (eax < 1)
    692         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    693         return False;
    694 
    695      /* Get processor ID string, and max basic/extended index
    696         values. */
    697      VG_(memcpy)(&vstr[0], &ebx, 4);
    698      VG_(memcpy)(&vstr[4], &edx, 4);
    699      VG_(memcpy)(&vstr[8], &ecx, 4);
    700      vstr[12] = 0;
    701 
    702      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    703      max_extended = eax;
    704 
    705      /* get capabilities bits into edx */
    706      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    707 
    708      // we assume that SSE1 and SSE2 are available by default
    709      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
    710      // ssse3  is ecx:9
    711      // sse41  is ecx:19
    712      // sse42  is ecx:20
    713 
    714      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    715         must simply give up.  But all CPUs since Pentium-I have it, so
    716         that doesn't seem like much of a restriction. */
    717      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    718      if (!have_cx8)
    719         return False;
    720 
    721      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
    722      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
    723 
    724      /* Figure out if this is an AMD that can do LZCNT. */
    725      have_lzcnt = False;
    726      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
    727          && max_extended >= 0x80000001) {
    728         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    729         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    730      }
    731 
    732      va         = VexArchAMD64;
    733      vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
    734                   | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
    735                   | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0);
    736      return True;
    737    }
    738 
    739 #elif defined(VGA_ppc32)
    740    {
    741      /* Find out which subset of the ppc32 instruction set is supported by
    742         verifying whether various ppc32 instructions generate a SIGILL
    743         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
    744         AT_PLATFORM entries in the ELF auxiliary table -- see also
    745         the_iifii.client_auxv in m_main.c.
    746       */
    747      vki_sigset_t          saved_set, tmp_set;
    748      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
    749      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
    750 
    751      volatile Bool have_F, have_V, have_FX, have_GX, have_VX;
    752      Int r;
    753 
    754      /* This is a kludge.  Really we ought to back-convert saved_act
    755         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
    756         since that's a no-op on all ppc32 platforms so far supported,
    757         it's not worth the typing effort.  At least include most basic
    758         sanity check: */
    759      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
    760 
    761      VG_(sigemptyset)(&tmp_set);
    762      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
    763      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
    764 
    765      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
    766      vg_assert(r == 0);
    767 
    768      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
    769      vg_assert(r == 0);
    770      tmp_sigill_act = saved_sigill_act;
    771 
    772      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
    773      vg_assert(r == 0);
    774      tmp_sigfpe_act = saved_sigfpe_act;
    775 
    776      /* NODEFER: signal handler does not return (from the kernel's point of
    777         view), hence if it is to successfully catch a signal more than once,
    778         we need the NODEFER flag. */
    779      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
    780      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
    781      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
    782      tmp_sigill_act.ksa_handler = handler_unsup_insn;
    783      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
    784      vg_assert(r == 0);
    785 
    786      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
    787      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
    788      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
    789      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
    790      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
    791      vg_assert(r == 0);
    792 
    793      /* standard FP insns */
    794      have_F = True;
    795      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    796         have_F = False;
    797      } else {
    798         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
    799      }
    800 
    801      /* Altivec insns */
    802      have_V = True;
    803      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    804         have_V = False;
    805      } else {
    806         /* Unfortunately some older assemblers don't speak Altivec (or
    807            choose not to), so to be safe we directly emit the 32-bit
    808            word corresponding to "vor 0,0,0".  This fixes a build
    809            problem that happens on Debian 3.1 (ppc32), and probably
    810            various other places. */
    811         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
    812      }
    813 
    814      /* General-Purpose optional (fsqrt, fsqrts) */
    815      have_FX = True;
    816      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    817         have_FX = False;
    818      } else {
    819         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
    820      }
    821 
    822      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
    823      have_GX = True;
    824      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    825         have_GX = False;
    826      } else {
    827         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
    828      }
    829 
    830      /* VSX support implies Power ISA 2.06 */
    831      have_VX = True;
    832      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    833         have_VX = False;
    834      } else {
    835         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
    836      }
    837 
    838 
    839      /* determine dcbz/dcbzl sizes while we still have the signal
    840       * handlers registered */
    841      find_ppc_dcbz_sz(&vai);
    842 
    843      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
    844      vg_assert(r == 0);
    845      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
    846      vg_assert(r == 0);
    847      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
    848      vg_assert(r == 0);
    849      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d\n",
    850                     (Int)have_F, (Int)have_V, (Int)have_FX,
    851                     (Int)have_GX, (Int)have_VX);
    852      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
    853      if (have_V && !have_F)
    854         have_V = False;
    855      if (have_FX && !have_F)
    856         have_FX = False;
    857      if (have_GX && !have_F)
    858         have_GX = False;
    859 
    860      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
    861      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
    862 
    863      va = VexArchPPC32;
    864 
    865      vai.hwcaps = 0;
    866      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
    867      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
    868      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
    869      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
    870      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
    871 
    872      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
    873         called before we're ready to go. */
    874      return True;
    875    }
    876 
    877 #elif defined(VGA_ppc64)
    878    {
    879      /* Same instruction set detection algorithm as for ppc32. */
    880      vki_sigset_t          saved_set, tmp_set;
    881      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
    882      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
    883 
    884      volatile Bool have_F, have_V, have_FX, have_GX, have_VX;
    885      Int r;
    886 
    887      /* This is a kludge.  Really we ought to back-convert saved_act
    888         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
    889         since that's a no-op on all ppc64 platforms so far supported,
    890         it's not worth the typing effort.  At least include most basic
    891         sanity check: */
    892      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
    893 
    894      VG_(sigemptyset)(&tmp_set);
    895      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
    896      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
    897 
    898      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
    899      vg_assert(r == 0);
    900 
    901      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
    902      vg_assert(r == 0);
    903      tmp_sigill_act = saved_sigill_act;
    904 
    905      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
    906      tmp_sigfpe_act = saved_sigfpe_act;
    907 
    908      /* NODEFER: signal handler does not return (from the kernel's point of
    909         view), hence if it is to successfully catch a signal more than once,
    910         we need the NODEFER flag. */
    911      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
    912      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
    913      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
    914      tmp_sigill_act.ksa_handler = handler_unsup_insn;
    915      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
    916 
    917      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
    918      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
    919      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
    920      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
    921      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
    922 
    923      /* standard FP insns */
    924      have_F = True;
    925      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    926         have_F = False;
    927      } else {
    928         __asm__ __volatile__("fmr 0,0");
    929      }
    930 
    931      /* Altivec insns */
    932      have_V = True;
    933      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    934         have_V = False;
    935      } else {
    936         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
    937      }
    938 
    939      /* General-Purpose optional (fsqrt, fsqrts) */
    940      have_FX = True;
    941      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    942         have_FX = False;
    943      } else {
    944         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
    945      }
    946 
    947      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
    948      have_GX = True;
    949      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    950         have_GX = False;
    951      } else {
    952         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
    953      }
    954 
    955      /* VSX support implies Power ISA 2.06 */
    956      have_VX = True;
    957      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    958         have_VX = False;
    959      } else {
    960         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
    961      }
    962 
    963      /* determine dcbz/dcbzl sizes while we still have the signal
    964       * handlers registered */
    965      find_ppc_dcbz_sz(&vai);
    966 
    967      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
    968      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
    969      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
    970      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d\n",
    971                     (Int)have_F, (Int)have_V, (Int)have_FX,
    972                     (Int)have_GX, (Int)have_VX);
    973      /* on ppc64, if we don't even have FP, just give up. */
    974      if (!have_F)
    975         return False;
    976 
    977      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
    978 
    979      va = VexArchPPC64;
    980 
    981      vai.hwcaps = 0;
    982      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
    983      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
    984      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
    985      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
    986 
    987      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
    988         called before we're ready to go. */
    989      return True;
    990    }
    991 
    992 #elif defined(VGA_s390x)
    993    {
    994      /* Instruction set detection code borrowed from ppc above. */
    995      vki_sigset_t          saved_set, tmp_set;
    996      vki_sigaction_fromK_t saved_sigill_act;
    997      vki_sigaction_toK_t     tmp_sigill_act;
    998 
    999      volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX;
   1000      Int r, model;
   1001 
   1002      /* Unblock SIGILL and stash away the old action for that signal */
   1003      VG_(sigemptyset)(&tmp_set);
   1004      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1005 
   1006      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1007      vg_assert(r == 0);
   1008 
   1009      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1010      vg_assert(r == 0);
   1011      tmp_sigill_act = saved_sigill_act;
   1012 
   1013      /* NODEFER: signal handler does not return (from the kernel's point of
   1014         view), hence if it is to successfully catch a signal more than once,
   1015         we need the NODEFER flag. */
   1016      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1017      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1018      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1019      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1020      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1021 
   1022      /* Determine hwcaps. Note, we cannot use the stfle insn because it
   1023         is not supported on z900. */
   1024 
   1025      have_LDISP = True;
   1026      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1027         have_LDISP = False;
   1028      } else {
   1029        /* BASR loads the address of the next insn into r1. Needed to avoid
   1030           a segfault in XY. */
   1031         __asm__ __volatile__("basr %%r1,%%r0\n\t"
   1032                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
   1033                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
   1034      }
   1035 
   1036      have_EIMM = True;
   1037      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1038         have_EIMM = False;
   1039      } else {
   1040         __asm__ __volatile__(".long  0xc0090000\n\t"  /* iilf r0,0 */
   1041                              ".short 0x0000" : : : "r0", "memory");
   1042      }
   1043 
   1044      have_GIE = True;
   1045      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1046         have_GIE = False;
   1047      } else {
   1048         __asm__ __volatile__(".long  0xc2010000\n\t"  /* msfi r0,0 */
   1049                              ".short 0x0000" : : : "r0", "memory");
   1050      }
   1051 
   1052      have_DFP = True;
   1053      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1054         have_DFP = False;
   1055      } else {
   1056         __asm__ __volatile__(".long 0xb3d20000"
   1057                                : : : "r0", "cc", "memory");  /* adtr r0,r0,r0 */
   1058      }
   1059 
   1060      have_FGX = True;
   1061      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1062         have_FGX = False;
   1063      } else {
   1064         __asm__ __volatile__(".long 0xb3cd0000" : : : "r0");  /* lgdr r0,f0 */
   1065      }
   1066 
   1067      /* Restore signals */
   1068      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1069      vg_assert(r == 0);
   1070      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1071      vg_assert(r == 0);
   1072      va = VexArchS390X;
   1073 
   1074      model = VG_(get_machine_model)();
   1075 
   1076      VG_(debugLog)(1, "machine", "machine %d  LDISP %d EIMM %d GIE %d DFP %d "
   1077                    "FGX %d\n", model, have_LDISP, have_EIMM, have_GIE,
   1078                    have_DFP, have_FGX);
   1079 
   1080      if (model == VEX_S390X_MODEL_INVALID) return False;
   1081 
   1082      vai.hwcaps = model;
   1083      if (have_LDISP) {
   1084         /* Use long displacement only on machines >= z990. For all other machines
   1085            it is millicoded and therefore slow. */
   1086         if (model >= VEX_S390X_MODEL_Z990)
   1087            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
   1088      }
   1089      if (have_EIMM)  vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
   1090      if (have_GIE)   vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
   1091      if (have_DFP)   vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
   1092      if (have_FGX)   vai.hwcaps |= VEX_HWCAPS_S390X_FGX;
   1093 
   1094      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1095 
   1096      return True;
   1097    }
   1098 
   1099 #elif defined(VGA_arm)
   1100    {
   1101      /* Same instruction set detection algorithm as for ppc32. */
   1102      vki_sigset_t          saved_set, tmp_set;
   1103      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1104      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1105 
   1106      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
   1107      volatile Int archlevel;
   1108      Int r;
   1109 
   1110      /* This is a kludge.  Really we ought to back-convert saved_act
   1111         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1112         since that's a no-op on all ppc64 platforms so far supported,
   1113         it's not worth the typing effort.  At least include most basic
   1114         sanity check: */
   1115      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1116 
   1117      VG_(sigemptyset)(&tmp_set);
   1118      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1119      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1120 
   1121      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1122      vg_assert(r == 0);
   1123 
   1124      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1125      vg_assert(r == 0);
   1126      tmp_sigill_act = saved_sigill_act;
   1127 
   1128      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1129      tmp_sigfpe_act = saved_sigfpe_act;
   1130 
   1131      /* NODEFER: signal handler does not return (from the kernel's point of
   1132         view), hence if it is to successfully catch a signal more than once,
   1133         we need the NODEFER flag. */
   1134      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1135      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1136      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1137      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1138      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1139 
   1140      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1141      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1142      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1143      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1144      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1145 
   1146      /* VFP insns */
   1147      have_VFP = True;
   1148      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1149         have_VFP = False;
   1150      } else {
   1151         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
   1152      }
   1153      /* There are several generation of VFP extension but they differs very
   1154         little so for now we will not distinguish them. */
   1155      have_VFP2 = have_VFP;
   1156      have_VFP3 = have_VFP;
   1157 
   1158      /* NEON insns */
   1159      have_NEON = True;
   1160      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1161         have_NEON = False;
   1162      } else {
   1163         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
   1164      }
   1165 
   1166      /* ARM architecture level */
   1167      archlevel = 5; /* v5 will be base level */
   1168      if (archlevel < 7) {
   1169         archlevel = 7;
   1170         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1171            archlevel = 5;
   1172         } else {
   1173            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
   1174         }
   1175      }
   1176      if (archlevel < 6) {
   1177         archlevel = 6;
   1178         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1179            archlevel = 5;
   1180         } else {
   1181            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
   1182         }
   1183      }
   1184 
   1185      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1186      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
   1187      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1188      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1189      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1190 
   1191      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
   1192            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
   1193            (Int)have_NEON);
   1194 
   1195      VG_(machine_arm_archlevel) = archlevel;
   1196 
   1197      va = VexArchARM;
   1198 
   1199      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
   1200      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
   1201      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
   1202      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
   1203      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1204 
   1205      return True;
   1206    }
   1207 
   1208 #else
   1209 #  error "Unknown arch"
   1210 #endif
   1211 }
   1212 
   1213 /* Notify host cpu cache line size. */
   1214 #if defined(VGA_ppc32)
   1215 void VG_(machine_ppc32_set_clszB)( Int szB )
   1216 {
   1217    vg_assert(hwcaps_done);
   1218 
   1219    /* Either the value must not have been set yet (zero) or we can
   1220       tolerate it being set to the same value multiple times, as the
   1221       stack scanning logic in m_main is a bit stupid. */
   1222    vg_assert(vai.ppc_cache_line_szB == 0
   1223              || vai.ppc_cache_line_szB == szB);
   1224 
   1225    vg_assert(szB == 32 || szB == 64 || szB == 128);
   1226    vai.ppc_cache_line_szB = szB;
   1227 }
   1228 #endif
   1229 
   1230 
   1231 /* Notify host cpu cache line size. */
   1232 #if defined(VGA_ppc64)
   1233 void VG_(machine_ppc64_set_clszB)( Int szB )
   1234 {
   1235    vg_assert(hwcaps_done);
   1236 
   1237    /* Either the value must not have been set yet (zero) or we can
   1238       tolerate it being set to the same value multiple times, as the
   1239       stack scanning logic in m_main is a bit stupid. */
   1240    vg_assert(vai.ppc_cache_line_szB == 0
   1241              || vai.ppc_cache_line_szB == szB);
   1242 
   1243    vg_assert(szB == 32 || szB == 64 || szB == 128);
   1244    vai.ppc_cache_line_szB = szB;
   1245 }
   1246 #endif
   1247 
   1248 
   1249 /* Notify host's ability to handle NEON instructions. */
   1250 #if defined(VGA_arm)
   1251 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
   1252 {
   1253    vg_assert(hwcaps_done);
   1254    /* There's nothing else we can sanity check. */
   1255 
   1256    if (has_neon) {
   1257       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1258    } else {
   1259       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
   1260    }
   1261 }
   1262 #endif
   1263 
   1264 
   1265 /* Fetch host cpu info, once established. */
   1266 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
   1267                                    /*OUT*/VexArchInfo* pVai )
   1268 {
   1269    vg_assert(hwcaps_done);
   1270    if (pVa)  *pVa  = va;
   1271    if (pVai) *pVai = vai;
   1272 }
   1273 
   1274 
   1275 // Given a pointer to a function as obtained by "& functionname" in C,
   1276 // produce a pointer to the actual entry point for the function.
   1277 void* VG_(fnptr_to_fnentry)( void* f )
   1278 {
   1279 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
   1280       || defined(VGP_arm_linux)                           \
   1281       || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
   1282       || defined(VGP_s390x_linux)
   1283    return f;
   1284 #  elif defined(VGP_ppc64_linux)
   1285    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
   1286       3-word function descriptor, of which the first word is the entry
   1287       address. */
   1288    UWord* descr = (UWord*)f;
   1289    return (void*)(descr[0]);
   1290 #  else
   1291 #    error "Unknown platform"
   1292 #  endif
   1293 }
   1294 
   1295 /*--------------------------------------------------------------------*/
   1296 /*--- end                                                          ---*/
   1297 /*--------------------------------------------------------------------*/
   1298