Home | History | Annotate | Download | only in coregrind
      1 /*--------------------------------------------------------------------*/
      2 /*--- Machine-related stuff.                           m_machine.c ---*/
      3 /*--------------------------------------------------------------------*/
      4 
      5 /*
      6    This file is part of Valgrind, a dynamic binary instrumentation
      7    framework.
      8 
      9    Copyright (C) 2000-2013 Julian Seward
     10       jseward (at) acm.org
     11 
     12    This program is free software; you can redistribute it and/or
     13    modify it under the terms of the GNU General Public License as
     14    published by the Free Software Foundation; either version 2 of the
     15    License, or (at your option) any later version.
     16 
     17    This program is distributed in the hope that it will be useful, but
     18    WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    General Public License for more details.
     21 
     22    You should have received a copy of the GNU General Public License
     23    along with this program; if not, write to the Free Software
     24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     25    02111-1307, USA.
     26 
     27    The GNU General Public License is contained in the file COPYING.
     28 */
     29 
     30 #include "pub_core_basics.h"
     31 #include "pub_core_vki.h"
     32 #include "pub_core_libcsetjmp.h"   // setjmp facilities
     33 #include "pub_core_threadstate.h"
     34 #include "pub_core_libcassert.h"
     35 #include "pub_core_libcbase.h"
     36 #include "pub_core_libcfile.h"
     37 #include "pub_core_libcprint.h"
     38 #include "pub_core_mallocfree.h"
     39 #include "pub_core_machine.h"
     40 #include "pub_core_cpuid.h"
     41 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
     42 #include "pub_core_debuglog.h"
     43 
     44 
     45 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
     46 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
     47 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
     48 
     49 Addr VG_(get_IP) ( ThreadId tid ) {
     50    return INSTR_PTR( VG_(threads)[tid].arch );
     51 }
     52 Addr VG_(get_SP) ( ThreadId tid ) {
     53    return STACK_PTR( VG_(threads)[tid].arch );
     54 }
     55 Addr VG_(get_FP) ( ThreadId tid ) {
     56    return FRAME_PTR( VG_(threads)[tid].arch );
     57 }
     58 
     59 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
     60    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
     61 }
     62 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
     63    STACK_PTR( VG_(threads)[tid].arch ) = sp;
     64 }
     65 
     66 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
     67                                 ThreadId tid )
     68 {
     69 #  if defined(VGA_x86)
     70    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
     71    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
     72    regs->misc.X86.r_ebp
     73       = VG_(threads)[tid].arch.vex.guest_EBP;
     74 #  elif defined(VGA_amd64)
     75    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
     76    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
     77    regs->misc.AMD64.r_rbp
     78       = VG_(threads)[tid].arch.vex.guest_RBP;
     79 #  elif defined(VGA_ppc32)
     80    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
     81    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
     82    regs->misc.PPC32.r_lr
     83       = VG_(threads)[tid].arch.vex.guest_LR;
     84 #  elif defined(VGA_ppc64)
     85    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
     86    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
     87    regs->misc.PPC64.r_lr
     88       = VG_(threads)[tid].arch.vex.guest_LR;
     89 #  elif defined(VGA_arm)
     90    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
     91    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
     92    regs->misc.ARM.r14
     93       = VG_(threads)[tid].arch.vex.guest_R14;
     94    regs->misc.ARM.r12
     95       = VG_(threads)[tid].arch.vex.guest_R12;
     96    regs->misc.ARM.r11
     97       = VG_(threads)[tid].arch.vex.guest_R11;
     98    regs->misc.ARM.r7
     99       = VG_(threads)[tid].arch.vex.guest_R7;
    100 #  elif defined(VGA_arm64)
    101    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    102    regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
    103    regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
    104    regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
    105 #  elif defined(VGA_s390x)
    106    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
    107    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
    108    regs->misc.S390X.r_fp
    109       = VG_(threads)[tid].arch.vex.guest_r11;
    110    regs->misc.S390X.r_lr
    111       = VG_(threads)[tid].arch.vex.guest_r14;
    112 #  elif defined(VGA_mips32)
    113    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    114    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
    115    regs->misc.MIPS32.r30
    116       = VG_(threads)[tid].arch.vex.guest_r30;
    117    regs->misc.MIPS32.r31
    118       = VG_(threads)[tid].arch.vex.guest_r31;
    119    regs->misc.MIPS32.r28
    120       = VG_(threads)[tid].arch.vex.guest_r28;
    121 #  elif defined(VGA_mips64)
    122    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    123    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
    124    regs->misc.MIPS64.r30
    125       = VG_(threads)[tid].arch.vex.guest_r30;
    126    regs->misc.MIPS64.r31
    127       = VG_(threads)[tid].arch.vex.guest_r31;
    128    regs->misc.MIPS64.r28
    129       = VG_(threads)[tid].arch.vex.guest_r28;
    130 #  else
    131 #    error "Unknown arch"
    132 #  endif
    133 }
    134 
    135 void
    136 VG_(get_shadow_regs_area) ( ThreadId tid,
    137                             /*DST*/UChar* dst,
    138                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
    139 {
    140    void*        src;
    141    ThreadState* tst;
    142    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    143    vg_assert(VG_(is_valid_tid)(tid));
    144    // Bounds check
    145    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    146    vg_assert(offset + size <= sizeof(VexGuestArchState));
    147    // Copy
    148    tst = & VG_(threads)[tid];
    149    src = NULL;
    150    switch (shadowNo) {
    151       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    152       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    153       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    154    }
    155    tl_assert(src != NULL);
    156    VG_(memcpy)( dst, src, size);
    157 }
    158 
    159 void
    160 VG_(set_shadow_regs_area) ( ThreadId tid,
    161                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
    162                             /*SRC*/const UChar* src )
    163 {
    164    void*        dst;
    165    ThreadState* tst;
    166    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    167    vg_assert(VG_(is_valid_tid)(tid));
    168    // Bounds check
    169    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    170    vg_assert(offset + size <= sizeof(VexGuestArchState));
    171    // Copy
    172    tst = & VG_(threads)[tid];
    173    dst = NULL;
    174    switch (shadowNo) {
    175       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    176       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    177       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    178    }
    179    tl_assert(dst != NULL);
    180    VG_(memcpy)( dst, src, size);
    181 }
    182 
    183 
    184 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
    185                                                         const HChar*, Addr))
    186 {
    187    VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
    188    VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %d\n", tid);
    189 #if defined(VGA_x86)
    190    (*f)(tid, "EAX", vex->guest_EAX);
    191    (*f)(tid, "ECX", vex->guest_ECX);
    192    (*f)(tid, "EDX", vex->guest_EDX);
    193    (*f)(tid, "EBX", vex->guest_EBX);
    194    (*f)(tid, "ESI", vex->guest_ESI);
    195    (*f)(tid, "EDI", vex->guest_EDI);
    196    (*f)(tid, "ESP", vex->guest_ESP);
    197    (*f)(tid, "EBP", vex->guest_EBP);
    198 #elif defined(VGA_amd64)
    199    (*f)(tid, "RAX", vex->guest_RAX);
    200    (*f)(tid, "RCX", vex->guest_RCX);
    201    (*f)(tid, "RDX", vex->guest_RDX);
    202    (*f)(tid, "RBX", vex->guest_RBX);
    203    (*f)(tid, "RSI", vex->guest_RSI);
    204    (*f)(tid, "RDI", vex->guest_RDI);
    205    (*f)(tid, "RSP", vex->guest_RSP);
    206    (*f)(tid, "RBP", vex->guest_RBP);
    207    (*f)(tid, "R8" , vex->guest_R8 );
    208    (*f)(tid, "R9" , vex->guest_R9 );
    209    (*f)(tid, "R10", vex->guest_R10);
    210    (*f)(tid, "R11", vex->guest_R11);
    211    (*f)(tid, "R12", vex->guest_R12);
    212    (*f)(tid, "R13", vex->guest_R13);
    213    (*f)(tid, "R14", vex->guest_R14);
    214    (*f)(tid, "R15", vex->guest_R15);
    215 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
    216    (*f)(tid, "GPR0" , vex->guest_GPR0 );
    217    (*f)(tid, "GPR1" , vex->guest_GPR1 );
    218    (*f)(tid, "GPR2" , vex->guest_GPR2 );
    219    (*f)(tid, "GPR3" , vex->guest_GPR3 );
    220    (*f)(tid, "GPR4" , vex->guest_GPR4 );
    221    (*f)(tid, "GPR5" , vex->guest_GPR5 );
    222    (*f)(tid, "GPR6" , vex->guest_GPR6 );
    223    (*f)(tid, "GPR7" , vex->guest_GPR7 );
    224    (*f)(tid, "GPR8" , vex->guest_GPR8 );
    225    (*f)(tid, "GPR9" , vex->guest_GPR9 );
    226    (*f)(tid, "GPR10", vex->guest_GPR10);
    227    (*f)(tid, "GPR11", vex->guest_GPR11);
    228    (*f)(tid, "GPR12", vex->guest_GPR12);
    229    (*f)(tid, "GPR13", vex->guest_GPR13);
    230    (*f)(tid, "GPR14", vex->guest_GPR14);
    231    (*f)(tid, "GPR15", vex->guest_GPR15);
    232    (*f)(tid, "GPR16", vex->guest_GPR16);
    233    (*f)(tid, "GPR17", vex->guest_GPR17);
    234    (*f)(tid, "GPR18", vex->guest_GPR18);
    235    (*f)(tid, "GPR19", vex->guest_GPR19);
    236    (*f)(tid, "GPR20", vex->guest_GPR20);
    237    (*f)(tid, "GPR21", vex->guest_GPR21);
    238    (*f)(tid, "GPR22", vex->guest_GPR22);
    239    (*f)(tid, "GPR23", vex->guest_GPR23);
    240    (*f)(tid, "GPR24", vex->guest_GPR24);
    241    (*f)(tid, "GPR25", vex->guest_GPR25);
    242    (*f)(tid, "GPR26", vex->guest_GPR26);
    243    (*f)(tid, "GPR27", vex->guest_GPR27);
    244    (*f)(tid, "GPR28", vex->guest_GPR28);
    245    (*f)(tid, "GPR29", vex->guest_GPR29);
    246    (*f)(tid, "GPR30", vex->guest_GPR30);
    247    (*f)(tid, "GPR31", vex->guest_GPR31);
    248    (*f)(tid, "CTR"  , vex->guest_CTR  );
    249    (*f)(tid, "LR"   , vex->guest_LR   );
    250 #elif defined(VGA_arm)
    251    (*f)(tid, "R0" , vex->guest_R0 );
    252    (*f)(tid, "R1" , vex->guest_R1 );
    253    (*f)(tid, "R2" , vex->guest_R2 );
    254    (*f)(tid, "R3" , vex->guest_R3 );
    255    (*f)(tid, "R4" , vex->guest_R4 );
    256    (*f)(tid, "R5" , vex->guest_R5 );
    257    (*f)(tid, "R6" , vex->guest_R6 );
    258    (*f)(tid, "R8" , vex->guest_R8 );
    259    (*f)(tid, "R9" , vex->guest_R9 );
    260    (*f)(tid, "R10", vex->guest_R10);
    261    (*f)(tid, "R11", vex->guest_R11);
    262    (*f)(tid, "R12", vex->guest_R12);
    263    (*f)(tid, "R13", vex->guest_R13);
    264    (*f)(tid, "R14", vex->guest_R14);
    265 #elif defined(VGA_s390x)
    266    (*f)(tid, "r0" , vex->guest_r0 );
    267    (*f)(tid, "r1" , vex->guest_r1 );
    268    (*f)(tid, "r2" , vex->guest_r2 );
    269    (*f)(tid, "r3" , vex->guest_r3 );
    270    (*f)(tid, "r4" , vex->guest_r4 );
    271    (*f)(tid, "r5" , vex->guest_r5 );
    272    (*f)(tid, "r6" , vex->guest_r6 );
    273    (*f)(tid, "r7" , vex->guest_r7 );
    274    (*f)(tid, "r8" , vex->guest_r8 );
    275    (*f)(tid, "r9" , vex->guest_r9 );
    276    (*f)(tid, "r10", vex->guest_r10);
    277    (*f)(tid, "r11", vex->guest_r11);
    278    (*f)(tid, "r12", vex->guest_r12);
    279    (*f)(tid, "r13", vex->guest_r13);
    280    (*f)(tid, "r14", vex->guest_r14);
    281    (*f)(tid, "r15", vex->guest_r15);
    282 #elif defined(VGA_mips32) || defined(VGA_mips64)
    283    (*f)(tid, "r0" , vex->guest_r0 );
    284    (*f)(tid, "r1" , vex->guest_r1 );
    285    (*f)(tid, "r2" , vex->guest_r2 );
    286    (*f)(tid, "r3" , vex->guest_r3 );
    287    (*f)(tid, "r4" , vex->guest_r4 );
    288    (*f)(tid, "r5" , vex->guest_r5 );
    289    (*f)(tid, "r6" , vex->guest_r6 );
    290    (*f)(tid, "r7" , vex->guest_r7 );
    291    (*f)(tid, "r8" , vex->guest_r8 );
    292    (*f)(tid, "r9" , vex->guest_r9 );
    293    (*f)(tid, "r10", vex->guest_r10);
    294    (*f)(tid, "r11", vex->guest_r11);
    295    (*f)(tid, "r12", vex->guest_r12);
    296    (*f)(tid, "r13", vex->guest_r13);
    297    (*f)(tid, "r14", vex->guest_r14);
    298    (*f)(tid, "r15", vex->guest_r15);
    299    (*f)(tid, "r16", vex->guest_r16);
    300    (*f)(tid, "r17", vex->guest_r17);
    301    (*f)(tid, "r18", vex->guest_r18);
    302    (*f)(tid, "r19", vex->guest_r19);
    303    (*f)(tid, "r20", vex->guest_r20);
    304    (*f)(tid, "r21", vex->guest_r21);
    305    (*f)(tid, "r22", vex->guest_r22);
    306    (*f)(tid, "r23", vex->guest_r23);
    307    (*f)(tid, "r24", vex->guest_r24);
    308    (*f)(tid, "r25", vex->guest_r25);
    309    (*f)(tid, "r26", vex->guest_r26);
    310    (*f)(tid, "r27", vex->guest_r27);
    311    (*f)(tid, "r28", vex->guest_r28);
    312    (*f)(tid, "r29", vex->guest_r29);
    313    (*f)(tid, "r30", vex->guest_r30);
    314    (*f)(tid, "r31", vex->guest_r31);
    315 #elif defined(VGA_arm64)
    316    (*f)(tid, "x0" , vex->guest_X0 );
    317    (*f)(tid, "x1" , vex->guest_X1 );
    318    (*f)(tid, "x2" , vex->guest_X2 );
    319    (*f)(tid, "x3" , vex->guest_X3 );
    320    (*f)(tid, "x4" , vex->guest_X4 );
    321    (*f)(tid, "x5" , vex->guest_X5 );
    322    (*f)(tid, "x6" , vex->guest_X6 );
    323    (*f)(tid, "x7" , vex->guest_X7 );
    324    (*f)(tid, "x8" , vex->guest_X8 );
    325    (*f)(tid, "x9" , vex->guest_X9 );
    326    (*f)(tid, "x10", vex->guest_X10);
    327    (*f)(tid, "x11", vex->guest_X11);
    328    (*f)(tid, "x12", vex->guest_X12);
    329    (*f)(tid, "x13", vex->guest_X13);
    330    (*f)(tid, "x14", vex->guest_X14);
    331    (*f)(tid, "x15", vex->guest_X15);
    332    (*f)(tid, "x16", vex->guest_X16);
    333    (*f)(tid, "x17", vex->guest_X17);
    334    (*f)(tid, "x18", vex->guest_X18);
    335    (*f)(tid, "x19", vex->guest_X19);
    336    (*f)(tid, "x20", vex->guest_X20);
    337    (*f)(tid, "x21", vex->guest_X21);
    338    (*f)(tid, "x22", vex->guest_X22);
    339    (*f)(tid, "x23", vex->guest_X23);
    340    (*f)(tid, "x24", vex->guest_X24);
    341    (*f)(tid, "x25", vex->guest_X25);
    342    (*f)(tid, "x26", vex->guest_X26);
    343    (*f)(tid, "x27", vex->guest_X27);
    344    (*f)(tid, "x28", vex->guest_X28);
    345    (*f)(tid, "x29", vex->guest_X29);
    346    (*f)(tid, "x30", vex->guest_X30);
    347 #else
    348 #  error Unknown arch
    349 #endif
    350 }
    351 
    352 
    353 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
    354 {
    355    ThreadId tid;
    356 
    357    for (tid = 1; tid < VG_N_THREADS; tid++) {
    358       if (VG_(is_valid_tid)(tid)
    359           || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
    360          // live thread or thread instructed to die by another thread that
    361          // called exit.
    362          apply_to_GPs_of_tid(tid, f);
    363       }
    364    }
    365 }
    366 
    367 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
    368 {
    369    *tid = (ThreadId)(-1);
    370 }
    371 
    372 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
    373                             /*OUT*/Addr* stack_min,
    374                             /*OUT*/Addr* stack_max)
    375 {
    376    ThreadId i;
    377    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
    378       if (i == VG_INVALID_THREADID)
    379          continue;
    380       if (VG_(threads)[i].status != VgTs_Empty) {
    381          *tid       = i;
    382          *stack_min = VG_(get_SP)(i);
    383          *stack_max = VG_(threads)[i].client_stack_highest_word;
    384          return True;
    385       }
    386    }
    387    return False;
    388 }
    389 
    390 Addr VG_(thread_get_stack_max)(ThreadId tid)
    391 {
    392    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    393    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    394    return VG_(threads)[tid].client_stack_highest_word;
    395 }
    396 
    397 SizeT VG_(thread_get_stack_size)(ThreadId tid)
    398 {
    399    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    400    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    401    return VG_(threads)[tid].client_stack_szB;
    402 }
    403 
    404 Addr VG_(thread_get_altstack_min)(ThreadId tid)
    405 {
    406    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    407    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    408    return (Addr)VG_(threads)[tid].altstack.ss_sp;
    409 }
    410 
    411 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
    412 {
    413    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    414    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    415    return VG_(threads)[tid].altstack.ss_size;
    416 }
    417 
    418 //-------------------------------------------------------------
    419 /* Details about the capabilities of the underlying (host) CPU.  These
    420    details are acquired by (1) enquiring with the CPU at startup, or
    421    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
    422    line size).  It's a bit nasty in the sense that there's no obvious
    423    way to stop uses of some of this info before it's ready to go.
    424    See pub_core_machine.h for more information about that.
    425 
    426    VG_(machine_get_hwcaps) may use signals (although it attempts to
    427    leave signal state unchanged) and therefore should only be
    428    called before m_main sets up the client's signal state.
    429 */
    430 
    431 /* --------- State --------- */
    432 static Bool hwcaps_done = False;
    433 
    434 /* --- all archs --- */
    435 static VexArch     va = VexArch_INVALID;
    436 static VexArchInfo vai;
    437 
    438 #if defined(VGA_x86)
    439 UInt VG_(machine_x86_have_mxcsr) = 0;
    440 #endif
    441 #if defined(VGA_ppc32)
    442 UInt VG_(machine_ppc32_has_FP)  = 0;
    443 UInt VG_(machine_ppc32_has_VMX) = 0;
    444 #endif
    445 #if defined(VGA_ppc64)
    446 ULong VG_(machine_ppc64_has_VMX) = 0;
    447 #endif
    448 #if defined(VGA_arm)
    449 Int VG_(machine_arm_archlevel) = 4;
    450 #endif
    451 
    452 
    453 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
    454    testing, so we need a VG_MINIMAL_JMP_BUF. */
    455 #if defined(VGA_ppc32) || defined(VGA_ppc64) \
    456     || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
    457 #include "pub_core_libcsetjmp.h"
    458 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
    459 static void handler_unsup_insn ( Int x ) {
    460    VG_MINIMAL_LONGJMP(env_unsup_insn);
    461 }
    462 #endif
    463 
    464 
    465 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
    466  * handlers are installed.  Determines the the sizes affected by dcbz
    467  * and dcbzl instructions and updates the given VexArchInfo structure
    468  * accordingly.
    469  *
    470  * Not very defensive: assumes that as long as the dcbz/dcbzl
    471  * instructions don't raise a SIGILL, that they will zero an aligned,
    472  * contiguous block of memory of a sensible size. */
    473 #if defined(VGA_ppc32) || defined(VGA_ppc64)
    474 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
    475 {
    476    Int dcbz_szB = 0;
    477    Int dcbzl_szB;
    478 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
    479    char test_block[4*MAX_DCBZL_SZB];
    480    char *aligned = test_block;
    481    Int i;
    482 
    483    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
    484    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
    485    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
    486 
    487    /* dcbz often clears 32B, although sometimes whatever the native cache
    488     * block size is */
    489    VG_(memset)(test_block, 0xff, sizeof(test_block));
    490    __asm__ __volatile__("dcbz 0,%0"
    491                         : /*out*/
    492                         : "r" (aligned) /*in*/
    493                         : "memory" /*clobber*/);
    494    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    495       if (!test_block[i])
    496          ++dcbz_szB;
    497    }
    498    vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
    499 
    500    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
    501    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    502       dcbzl_szB = 0; /* indicates unsupported */
    503    }
    504    else {
    505       VG_(memset)(test_block, 0xff, sizeof(test_block));
    506       /* some older assemblers won't understand the dcbzl instruction
    507        * variant, so we directly emit the instruction ourselves */
    508       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
    509                            : /*out*/
    510                            : "r" (aligned) /*in*/
    511                            : "memory", "r9" /*clobber*/);
    512       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    513          if (!test_block[i])
    514             ++dcbzl_szB;
    515       }
    516       vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
    517    }
    518 
    519    arch_info->ppc_dcbz_szB  = dcbz_szB;
    520    arch_info->ppc_dcbzl_szB = dcbzl_szB;
    521 
    522    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
    523                  dcbz_szB, dcbzl_szB);
    524 #  undef MAX_DCBZL_SZB
    525 }
    526 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
    527 
    528 #ifdef VGA_s390x
    529 
    530 /* Read /proc/cpuinfo. Look for lines like these
    531 
    532    processor 0: version = FF,  identification = 0117C9,  machine = 2064
    533 
    534    and return the machine model. If the machine model could not be determined
    535    or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
    536 
    537 static UInt VG_(get_machine_model)(void)
    538 {
    539    static struct model_map {
    540       HChar name[5];
    541       UInt  id;
    542    } model_map[] = {
    543       { "2064", VEX_S390X_MODEL_Z900 },
    544       { "2066", VEX_S390X_MODEL_Z800 },
    545       { "2084", VEX_S390X_MODEL_Z990 },
    546       { "2086", VEX_S390X_MODEL_Z890 },
    547       { "2094", VEX_S390X_MODEL_Z9_EC },
    548       { "2096", VEX_S390X_MODEL_Z9_BC },
    549       { "2097", VEX_S390X_MODEL_Z10_EC },
    550       { "2098", VEX_S390X_MODEL_Z10_BC },
    551       { "2817", VEX_S390X_MODEL_Z196 },
    552       { "2818", VEX_S390X_MODEL_Z114 },
    553       { "2827", VEX_S390X_MODEL_ZEC12 },
    554       { "2828", VEX_S390X_MODEL_ZBC12 },
    555    };
    556 
    557    Int    model, n, fh;
    558    SysRes fd;
    559    SizeT  num_bytes, file_buf_size;
    560    HChar *p, *m, *model_name, *file_buf;
    561 
    562    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    563    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    564    if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
    565 
    566    fh  = sr_Res(fd);
    567 
    568    /* Determine the size of /proc/cpuinfo.
    569       Work around broken-ness in /proc file system implementation.
    570       fstat returns a zero size for /proc/cpuinfo although it is
    571       claimed to be a regular file. */
    572    num_bytes = 0;
    573    file_buf_size = 1000;
    574    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    575    while (42) {
    576       n = VG_(read)(fh, file_buf, file_buf_size);
    577       if (n < 0) break;
    578 
    579       num_bytes += n;
    580       if (n < file_buf_size) break;  /* reached EOF */
    581    }
    582 
    583    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    584 
    585    if (num_bytes > file_buf_size) {
    586       VG_(free)( file_buf );
    587       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    588       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    589       n = VG_(read)( fh, file_buf, num_bytes );
    590       if (n < 0) num_bytes = 0;
    591    }
    592 
    593    file_buf[num_bytes] = '\0';
    594    VG_(close)(fh);
    595 
    596    /* Parse file */
    597    model = VEX_S390X_MODEL_UNKNOWN;
    598    for (p = file_buf; *p; ++p) {
    599       /* Beginning of line */
    600      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
    601 
    602      m = VG_(strstr)( p, "machine" );
    603      if (m == NULL) continue;
    604 
    605      p = m + sizeof "machine" - 1;
    606      while ( VG_(isspace)( *p ) || *p == '=') {
    607        if (*p == '\n') goto next_line;
    608        ++p;
    609      }
    610 
    611      model_name = p;
    612      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
    613        struct model_map *mm = model_map + n;
    614        SizeT len = VG_(strlen)( mm->name );
    615        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
    616             VG_(isspace)( model_name[len] )) {
    617          if (mm->id < model) model = mm->id;
    618          p = model_name + len;
    619          break;
    620        }
    621      }
    622      /* Skip until end-of-line */
    623      while (*p != '\n')
    624        ++p;
    625    next_line: ;
    626    }
    627 
    628    VG_(free)( file_buf );
    629    VG_(debugLog)(1, "machine", "model = %s\n",
    630                  model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
    631                                                   : model_map[model].name);
    632    return model;
    633 }
    634 
    635 #endif /* VGA_s390x */
    636 
    637 #if defined(VGA_mips32) || defined(VGA_mips64)
    638 
    639 /* Read /proc/cpuinfo and return the machine model. */
    640 static UInt VG_(get_machine_model)(void)
    641 {
    642    const char *search_MIPS_str = "MIPS";
    643    const char *search_Broadcom_str = "Broadcom";
    644    const char *search_Netlogic_str = "Netlogic";
    645    const char *search_Cavium_str= "Cavium";
    646    Int    n, fh;
    647    SysRes fd;
    648    SizeT  num_bytes, file_buf_size;
    649    HChar  *file_buf;
    650 
    651    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    652    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    653    if ( sr_isError(fd) ) return -1;
    654 
    655    fh  = sr_Res(fd);
    656 
    657    /* Determine the size of /proc/cpuinfo.
    658       Work around broken-ness in /proc file system implementation.
    659       fstat returns a zero size for /proc/cpuinfo although it is
    660       claimed to be a regular file. */
    661    num_bytes = 0;
    662    file_buf_size = 1000;
    663    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    664    while (42) {
    665       n = VG_(read)(fh, file_buf, file_buf_size);
    666       if (n < 0) break;
    667 
    668       num_bytes += n;
    669       if (n < file_buf_size) break;  /* reached EOF */
    670    }
    671 
    672    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    673 
    674    if (num_bytes > file_buf_size) {
    675       VG_(free)( file_buf );
    676       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    677       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    678       n = VG_(read)( fh, file_buf, num_bytes );
    679       if (n < 0) num_bytes = 0;
    680    }
    681 
    682    file_buf[num_bytes] = '\0';
    683    VG_(close)(fh);
    684 
    685    /* Parse file */
    686    if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
    687        return VEX_PRID_COMP_BROADCOM;
    688    if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
    689        return VEX_PRID_COMP_NETLOGIC;
    690    if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
    691        return VEX_PRID_COMP_CAVIUM;
    692    if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
    693        return VEX_PRID_COMP_MIPS;
    694 
    695    /* Did not find string in the proc file. */
    696    return -1;
    697 }
    698 
    699 #endif
    700 
    701 /* Determine what insn set and insn set variant the host has, and
    702    record it.  To be called once at system startup.  Returns False if
    703    this a CPU incapable of running Valgrind.
    704    Also determine information about the caches on this host. */
    705 
    706 Bool VG_(machine_get_hwcaps)( void )
    707 {
    708    vg_assert(hwcaps_done == False);
    709    hwcaps_done = True;
    710 
    711    // Whack default settings into vai, so that we only need to fill in
    712    // any interesting bits.
    713    LibVEX_default_VexArchInfo(&vai);
    714 
    715 #if defined(VGA_x86)
    716    { Bool have_sse1, have_sse2, have_cx8, have_lzcnt, have_mmxext;
    717      UInt eax, ebx, ecx, edx, max_extended;
    718      HChar vstr[13];
    719      vstr[0] = 0;
    720 
    721      if (!VG_(has_cpuid)())
    722         /* we can't do cpuid at all.  Give up. */
    723         return False;
    724 
    725      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    726      if (eax < 1)
    727         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    728         return False;
    729 
    730      /* Get processor ID string, and max basic/extended index
    731         values. */
    732      VG_(memcpy)(&vstr[0], &ebx, 4);
    733      VG_(memcpy)(&vstr[4], &edx, 4);
    734      VG_(memcpy)(&vstr[8], &ecx, 4);
    735      vstr[12] = 0;
    736 
    737      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    738      max_extended = eax;
    739 
    740      /* get capabilities bits into edx */
    741      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    742 
    743      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
    744      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
    745 
    746      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    747         must simply give up.  But all CPUs since Pentium-I have it, so
    748         that doesn't seem like much of a restriction. */
    749      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    750      if (!have_cx8)
    751         return False;
    752 
    753      /* Figure out if this is an AMD that can do MMXEXT. */
    754      have_mmxext = False;
    755      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
    756          && max_extended >= 0x80000001) {
    757         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    758         /* Some older AMD processors support a sse1 subset (Integer SSE). */
    759         have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
    760      }
    761 
    762      /* Figure out if this is an AMD or Intel that can do LZCNT. */
    763      have_lzcnt = False;
    764      if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
    765           || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
    766          && max_extended >= 0x80000001) {
    767         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    768         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    769      }
    770 
    771      /* Intel processors don't define the mmxext extension, but since it
    772         is just a sse1 subset always define it when we have sse1. */
    773      if (have_sse1)
    774         have_mmxext = True;
    775 
    776      va = VexArchX86;
    777      if (have_sse2 && have_sse1 && have_mmxext) {
    778         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    779         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    780         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
    781         if (have_lzcnt)
    782            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
    783         VG_(machine_x86_have_mxcsr) = 1;
    784      } else if (have_sse1 && have_mmxext) {
    785         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    786         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    787         VG_(machine_x86_have_mxcsr) = 1;
    788      } else if (have_mmxext) {
    789         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
    790         VG_(machine_x86_have_mxcsr) = 0;
    791      } else {
    792        vai.hwcaps = 0; /*baseline - no sse at all*/
    793        VG_(machine_x86_have_mxcsr) = 0;
    794      }
    795 
    796      VG_(machine_get_cache_info)(&vai);
    797 
    798      return True;
    799    }
    800 
    801 #elif defined(VGA_amd64)
    802    { Bool have_sse3, have_cx8, have_cx16;
    803      Bool have_lzcnt, have_avx, have_bmi, have_avx2;
    804      Bool have_rdtscp;
    805      UInt eax, ebx, ecx, edx, max_basic, max_extended;
    806      HChar vstr[13];
    807      vstr[0] = 0;
    808 
    809      if (!VG_(has_cpuid)())
    810         /* we can't do cpuid at all.  Give up. */
    811         return False;
    812 
    813      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    814      max_basic = eax;
    815      if (max_basic < 1)
    816         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    817         return False;
    818 
    819      /* Get processor ID string, and max basic/extended index
    820         values. */
    821      VG_(memcpy)(&vstr[0], &ebx, 4);
    822      VG_(memcpy)(&vstr[4], &edx, 4);
    823      VG_(memcpy)(&vstr[8], &ecx, 4);
    824      vstr[12] = 0;
    825 
    826      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    827      max_extended = eax;
    828 
    829      /* get capabilities bits into edx */
    830      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    831 
    832      // we assume that SSE1 and SSE2 are available by default
    833      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
    834      // ssse3   is ecx:9
    835      // sse41   is ecx:19
    836      // sse42   is ecx:20
    837 
    838      // osxsave is ecx:27
    839      // avx     is ecx:28
    840      // fma     is ecx:12
    841      have_avx = False;
    842      /* have_fma = False; */
    843      if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
    844         /* processor supports AVX instructions and XGETBV is enabled
    845            by OS */
    846         ULong w;
    847         __asm__ __volatile__("movq $0,%%rcx ; "
    848                              ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
    849                              "movq %%rax,%0"
    850                              :/*OUT*/"=r"(w) :/*IN*/
    851                              :/*TRASH*/"rdx","rcx");
    852         if ((w & 6) == 6) {
    853            /* OS has enabled both XMM and YMM state support */
    854            have_avx = True;
    855            /* have_fma = (ecx & (1<<12)) != 0; */
    856            /* have_fma: Probably correct, but gcc complains due to
    857               unusedness. &*/
    858         }
    859      }
    860 
    861      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    862         must simply give up.  But all CPUs since Pentium-I have it, so
    863         that doesn't seem like much of a restriction. */
    864      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    865      if (!have_cx8)
    866         return False;
    867 
    868      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
    869      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
    870 
    871      /* Figure out if this CPU can do LZCNT. */
    872      have_lzcnt = False;
    873      if (max_extended >= 0x80000001) {
    874         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    875         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    876      }
    877 
    878      /* Can we do RDTSCP? */
    879      have_rdtscp = False;
    880      if (max_extended >= 0x80000001) {
    881         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    882         have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
    883      }
    884 
    885      /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
    886      have_bmi = False;
    887      have_avx2 = False;
    888      if (have_avx && max_basic >= 7) {
    889         VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
    890         have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
    891         have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
    892      }
    893 
    894      va         = VexArchAMD64;
    895      vai.hwcaps = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
    896                 | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
    897                 | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
    898                 | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
    899                 | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
    900                 | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
    901                 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
    902 
    903      VG_(machine_get_cache_info)(&vai);
    904 
    905      return True;
    906    }
    907 
    908 #elif defined(VGA_ppc32)
    909    {
    910      /* Find out which subset of the ppc32 instruction set is supported by
    911         verifying whether various ppc32 instructions generate a SIGILL
    912         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
    913         AT_PLATFORM entries in the ELF auxiliary table -- see also
    914         the_iifii.client_auxv in m_main.c.
    915       */
    916      vki_sigset_t          saved_set, tmp_set;
    917      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
    918      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
    919 
    920      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
    921      volatile Bool have_isa_2_07;
    922      Int r;
    923 
    924      /* This is a kludge.  Really we ought to back-convert saved_act
    925         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
    926         since that's a no-op on all ppc32 platforms so far supported,
    927         it's not worth the typing effort.  At least include most basic
    928         sanity check: */
    929      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
    930 
    931      VG_(sigemptyset)(&tmp_set);
    932      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
    933      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
    934 
    935      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
    936      vg_assert(r == 0);
    937 
    938      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
    939      vg_assert(r == 0);
    940      tmp_sigill_act = saved_sigill_act;
    941 
    942      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
    943      vg_assert(r == 0);
    944      tmp_sigfpe_act = saved_sigfpe_act;
    945 
    946      /* NODEFER: signal handler does not return (from the kernel's point of
    947         view), hence if it is to successfully catch a signal more than once,
    948         we need the NODEFER flag. */
    949      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
    950      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
    951      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
    952      tmp_sigill_act.ksa_handler = handler_unsup_insn;
    953      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
    954      vg_assert(r == 0);
    955 
    956      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
    957      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
    958      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
    959      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
    960      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
    961      vg_assert(r == 0);
    962 
    963      /* standard FP insns */
    964      have_F = True;
    965      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    966         have_F = False;
    967      } else {
    968         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
    969      }
    970 
    971      /* Altivec insns */
    972      have_V = True;
    973      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    974         have_V = False;
    975      } else {
    976         /* Unfortunately some older assemblers don't speak Altivec (or
    977            choose not to), so to be safe we directly emit the 32-bit
    978            word corresponding to "vor 0,0,0".  This fixes a build
    979            problem that happens on Debian 3.1 (ppc32), and probably
    980            various other places. */
    981         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
    982      }
    983 
    984      /* General-Purpose optional (fsqrt, fsqrts) */
    985      have_FX = True;
    986      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    987         have_FX = False;
    988      } else {
    989         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
    990      }
    991 
    992      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
    993      have_GX = True;
    994      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    995         have_GX = False;
    996      } else {
    997         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
    998      }
    999 
   1000      /* VSX support implies Power ISA 2.06 */
   1001      have_VX = True;
   1002      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1003         have_VX = False;
   1004      } else {
   1005         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
   1006      }
   1007 
   1008      /* Check for Decimal Floating Point (DFP) support. */
   1009      have_DFP = True;
   1010      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1011         have_DFP = False;
   1012      } else {
   1013         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
   1014      }
   1015 
   1016      /* Check for ISA 2.07 support. */
   1017      have_isa_2_07 = True;
   1018      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1019         have_isa_2_07 = False;
   1020      } else {
   1021         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
   1022      }
   1023 
   1024      /* determine dcbz/dcbzl sizes while we still have the signal
   1025       * handlers registered */
   1026      find_ppc_dcbz_sz(&vai);
   1027 
   1028      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1029      vg_assert(r == 0);
   1030      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
   1031      vg_assert(r == 0);
   1032      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1033      vg_assert(r == 0);
   1034      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
   1035                     (Int)have_F, (Int)have_V, (Int)have_FX,
   1036                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
   1037                     (Int)have_isa_2_07);
   1038      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
   1039      if (have_V && !have_F)
   1040         have_V = False;
   1041      if (have_FX && !have_F)
   1042         have_FX = False;
   1043      if (have_GX && !have_F)
   1044         have_GX = False;
   1045 
   1046      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
   1047      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
   1048 
   1049      va = VexArchPPC32;
   1050 
   1051      vai.hwcaps = 0;
   1052      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
   1053      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
   1054      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
   1055      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
   1056      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
   1057      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
   1058      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
   1059 
   1060      VG_(machine_get_cache_info)(&vai);
   1061 
   1062      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
   1063         called before we're ready to go. */
   1064      return True;
   1065    }
   1066 
   1067 #elif defined(VGA_ppc64)
   1068    {
   1069      /* Same instruction set detection algorithm as for ppc32. */
   1070      vki_sigset_t          saved_set, tmp_set;
   1071      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1072      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1073 
   1074      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
   1075      volatile Bool have_isa_2_07;
   1076      Int r;
   1077 
   1078      /* This is a kludge.  Really we ought to back-convert saved_act
   1079         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1080         since that's a no-op on all ppc64 platforms so far supported,
   1081         it's not worth the typing effort.  At least include most basic
   1082         sanity check: */
   1083      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1084 
   1085      VG_(sigemptyset)(&tmp_set);
   1086      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1087      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1088 
   1089      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1090      vg_assert(r == 0);
   1091 
   1092      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1093      vg_assert(r == 0);
   1094      tmp_sigill_act = saved_sigill_act;
   1095 
   1096      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1097      tmp_sigfpe_act = saved_sigfpe_act;
   1098 
   1099      /* NODEFER: signal handler does not return (from the kernel's point of
   1100         view), hence if it is to successfully catch a signal more than once,
   1101         we need the NODEFER flag. */
   1102      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1103      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1104      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1105      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1106      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1107 
   1108      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1109      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1110      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1111      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1112      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1113 
   1114      /* standard FP insns */
   1115      have_F = True;
   1116      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1117         have_F = False;
   1118      } else {
   1119         __asm__ __volatile__("fmr 0,0");
   1120      }
   1121 
   1122      /* Altivec insns */
   1123      have_V = True;
   1124      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1125         have_V = False;
   1126      } else {
   1127         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
   1128      }
   1129 
   1130      /* General-Purpose optional (fsqrt, fsqrts) */
   1131      have_FX = True;
   1132      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1133         have_FX = False;
   1134      } else {
   1135         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
   1136      }
   1137 
   1138      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
   1139      have_GX = True;
   1140      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1141         have_GX = False;
   1142      } else {
   1143         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
   1144      }
   1145 
   1146      /* VSX support implies Power ISA 2.06 */
   1147      have_VX = True;
   1148      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1149         have_VX = False;
   1150      } else {
   1151         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
   1152      }
   1153 
   1154      /* Check for Decimal Floating Point (DFP) support. */
   1155      have_DFP = True;
   1156      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1157         have_DFP = False;
   1158      } else {
   1159         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
   1160      }
   1161 
   1162      /* Check for ISA 2.07 support. */
   1163      have_isa_2_07 = True;
   1164      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1165         have_isa_2_07 = False;
   1166      } else {
   1167         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
   1168      }
   1169 
   1170      /* determine dcbz/dcbzl sizes while we still have the signal
   1171       * handlers registered */
   1172      find_ppc_dcbz_sz(&vai);
   1173 
   1174      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1175      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
   1176      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1177      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
   1178                     (Int)have_F, (Int)have_V, (Int)have_FX,
   1179                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
   1180                     (Int)have_isa_2_07);
   1181      /* on ppc64, if we don't even have FP, just give up. */
   1182      if (!have_F)
   1183         return False;
   1184 
   1185      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
   1186 
   1187      va = VexArchPPC64;
   1188 
   1189      vai.hwcaps = 0;
   1190      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
   1191      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
   1192      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
   1193      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
   1194      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
   1195      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
   1196 
   1197      VG_(machine_get_cache_info)(&vai);
   1198 
   1199      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
   1200         called before we're ready to go. */
   1201      return True;
   1202    }
   1203 
   1204 #elif defined(VGA_s390x)
   1205 
   1206 #  include "libvex_s390x_common.h"
   1207 
   1208    {
   1209      /* Instruction set detection code borrowed from ppc above. */
   1210      vki_sigset_t          saved_set, tmp_set;
   1211      vki_sigaction_fromK_t saved_sigill_act;
   1212      vki_sigaction_toK_t     tmp_sigill_act;
   1213 
   1214      volatile Bool have_LDISP, have_STFLE;
   1215      Int i, r, model;
   1216 
   1217      /* If the model is "unknown" don't treat this as an error. Assume
   1218         this is a brand-new machine model for which we don't have the
   1219         identification yet. Keeping fingers crossed. */
   1220      model = VG_(get_machine_model)();
   1221 
   1222      /* Unblock SIGILL and stash away the old action for that signal */
   1223      VG_(sigemptyset)(&tmp_set);
   1224      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1225 
   1226      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1227      vg_assert(r == 0);
   1228 
   1229      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1230      vg_assert(r == 0);
   1231      tmp_sigill_act = saved_sigill_act;
   1232 
   1233      /* NODEFER: signal handler does not return (from the kernel's point of
   1234         view), hence if it is to successfully catch a signal more than once,
   1235         we need the NODEFER flag. */
   1236      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1237      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1238      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1239      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1240      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1241 
   1242      /* Determine hwcaps. Note, we cannot use the stfle insn because it
   1243         is not supported on z900. */
   1244 
   1245      have_LDISP = True;
   1246      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1247         have_LDISP = False;
   1248      } else {
   1249        /* BASR loads the address of the next insn into r1. Needed to avoid
   1250           a segfault in XY. */
   1251         __asm__ __volatile__("basr %%r1,%%r0\n\t"
   1252                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
   1253                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
   1254      }
   1255 
   1256      /* Check availability og STFLE. If available store facility bits
   1257         in hoststfle. */
   1258      ULong hoststfle[S390_NUM_FACILITY_DW];
   1259 
   1260      for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
   1261         hoststfle[i] = 0;
   1262 
   1263      have_STFLE = True;
   1264      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1265         have_STFLE = False;
   1266      } else {
   1267          register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
   1268 
   1269          __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
   1270                               : "=m" (hoststfle), "+d"(reg0)
   1271                               : : "cc", "memory");
   1272      }
   1273 
   1274      /* Restore signals */
   1275      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1276      vg_assert(r == 0);
   1277      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1278      vg_assert(r == 0);
   1279      va = VexArchS390X;
   1280 
   1281      vai.hwcaps = model;
   1282      if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
   1283      if (have_LDISP) {
   1284         /* Use long displacement only on machines >= z990. For all other
   1285            machines it is millicoded and therefore slow. */
   1286         if (model >= VEX_S390X_MODEL_Z990)
   1287            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
   1288      }
   1289 
   1290      /* Detect presence of certain facilities using the STFLE insn.
   1291         Note, that these facilities were introduced at the same time or later
   1292         as STFLE, so the absence of STLFE implies the absence of the facility
   1293         we're trying to detect. */
   1294      struct fac_hwcaps_map {
   1295         UInt installed;
   1296         UInt facility_bit;
   1297         UInt hwcaps_bit;
   1298         const HChar name[6];   // may need adjustment for new facility names
   1299      } fac_hwcaps[] = {
   1300         { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
   1301         { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
   1302         { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
   1303         { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
   1304         { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
   1305         { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
   1306         { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
   1307         { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
   1308         { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
   1309         { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
   1310      };
   1311 
   1312      /* Set hwcaps according to the detected facilities */
   1313      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
   1314         vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
   1315         if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
   1316            fac_hwcaps[i].installed = True;
   1317            vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
   1318         }
   1319      }
   1320 
   1321      /* Build up a string showing the probed-for facilities */
   1322      HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
   1323                    (sizeof fac_hwcaps[0].name + 3) + //  %s %d
   1324                    7 + 1 + 4 + 2  // machine %4d
   1325                    + 1];  // \0
   1326      HChar *p = fac_str;
   1327      p += VG_(sprintf)(p, "machine %4d  ", model);
   1328      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
   1329         p += VG_(sprintf)(p, " %s %1d", fac_hwcaps[i].name,
   1330                           fac_hwcaps[i].installed);
   1331      }
   1332      *p++ = '\0';
   1333 
   1334      VG_(debugLog)(1, "machine", "%s\n", fac_str);
   1335      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1336 
   1337      VG_(machine_get_cache_info)(&vai);
   1338 
   1339      return True;
   1340    }
   1341 
   1342 #elif defined(VGA_arm)
   1343    {
   1344      /* Same instruction set detection algorithm as for ppc32. */
   1345      vki_sigset_t          saved_set, tmp_set;
   1346      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1347      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1348 
   1349      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
   1350      volatile Int archlevel;
   1351      Int r;
   1352 
   1353      /* This is a kludge.  Really we ought to back-convert saved_act
   1354         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1355         since that's a no-op on all ppc64 platforms so far supported,
   1356         it's not worth the typing effort.  At least include most basic
   1357         sanity check: */
   1358      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1359 
   1360      VG_(sigemptyset)(&tmp_set);
   1361      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1362      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1363 
   1364      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1365      vg_assert(r == 0);
   1366 
   1367      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1368      vg_assert(r == 0);
   1369      tmp_sigill_act = saved_sigill_act;
   1370 
   1371      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1372      tmp_sigfpe_act = saved_sigfpe_act;
   1373 
   1374      /* NODEFER: signal handler does not return (from the kernel's point of
   1375         view), hence if it is to successfully catch a signal more than once,
   1376         we need the NODEFER flag. */
   1377      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1378      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1379      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1380      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1381      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1382 
   1383      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1384      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1385      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1386      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1387      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1388 
   1389      /* VFP insns */
   1390      have_VFP = True;
   1391      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1392         have_VFP = False;
   1393      } else {
   1394         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
   1395      }
   1396      /* There are several generation of VFP extension but they differs very
   1397         little so for now we will not distinguish them. */
   1398      have_VFP2 = have_VFP;
   1399      have_VFP3 = have_VFP;
   1400 
   1401      /* NEON insns */
   1402      have_NEON = True;
   1403      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1404         have_NEON = False;
   1405      } else {
   1406         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
   1407      }
   1408 
   1409      /* ARM architecture level */
   1410      archlevel = 5; /* v5 will be base level */
   1411      if (archlevel < 7) {
   1412         archlevel = 7;
   1413         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1414            archlevel = 5;
   1415         } else {
   1416            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
   1417         }
   1418      }
   1419      if (archlevel < 6) {
   1420         archlevel = 6;
   1421         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1422            archlevel = 5;
   1423         } else {
   1424            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
   1425         }
   1426      }
   1427 
   1428      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1429      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
   1430      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1431      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1432      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1433 
   1434      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
   1435            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
   1436            (Int)have_NEON);
   1437 
   1438      VG_(machine_arm_archlevel) = archlevel;
   1439 
   1440      va = VexArchARM;
   1441 
   1442      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
   1443      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
   1444      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
   1445      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
   1446      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1447 
   1448      VG_(machine_get_cache_info)(&vai);
   1449 
   1450      return True;
   1451    }
   1452 
   1453 #elif defined(VGA_arm64)
   1454    {
   1455      va = VexArchARM64;
   1456 
   1457      /* So far there are no variants. */
   1458      vai.hwcaps = 0;
   1459 
   1460      VG_(machine_get_cache_info)(&vai);
   1461 
   1462      /* 0 denotes 'not set'.  The range of legitimate values here,
   1463         after being set that is, is 2 though 17 inclusive. */
   1464      vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
   1465      vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
   1466      ULong ctr_el0;
   1467      __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
   1468      vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
   1469      vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
   1470      VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
   1471                       "ctr_el0.iMinLine_szB = %d\n",
   1472                    1 << vai.arm64_dMinLine_lg2_szB,
   1473                    1 << vai.arm64_iMinLine_lg2_szB);
   1474 
   1475      return True;
   1476    }
   1477 
   1478 #elif defined(VGA_mips32)
   1479    {
   1480      /* Define the position of F64 bit in FIR register. */
   1481 #    define FP64 22
   1482      va = VexArchMIPS32;
   1483      UInt model = VG_(get_machine_model)();
   1484      if (model == -1)
   1485          return False;
   1486 
   1487      vai.hwcaps = model;
   1488 
   1489      /* Same instruction set detection algorithm as for ppc32/arm... */
   1490      vki_sigset_t          saved_set, tmp_set;
   1491      vki_sigaction_fromK_t saved_sigill_act;
   1492      vki_sigaction_toK_t   tmp_sigill_act;
   1493 
   1494      volatile Bool have_DSP, have_DSPr2;
   1495      Int r;
   1496 
   1497      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1498 
   1499      VG_(sigemptyset)(&tmp_set);
   1500      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1501 
   1502      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1503      vg_assert(r == 0);
   1504 
   1505      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1506      vg_assert(r == 0);
   1507      tmp_sigill_act = saved_sigill_act;
   1508 
   1509      /* NODEFER: signal handler does not return (from the kernel's point of
   1510         view), hence if it is to successfully catch a signal more than once,
   1511         we need the NODEFER flag. */
   1512      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1513      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1514      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1515      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1516      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1517 
   1518      if (model == VEX_PRID_COMP_MIPS) {
   1519         /* DSPr2 instructions. */
   1520         have_DSPr2 = True;
   1521         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1522            have_DSPr2 = False;
   1523         } else {
   1524            __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
   1525         }
   1526         if (have_DSPr2) {
   1527            /* We assume it's 74K, since it can run DSPr2. */
   1528            vai.hwcaps |= VEX_PRID_IMP_74K;
   1529         } else {
   1530            /* DSP instructions. */
   1531            have_DSP = True;
   1532            if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1533               have_DSP = False;
   1534            } else {
   1535               __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
   1536            }
   1537            if (have_DSP) {
   1538               /* We assume it's 34K, since it has support for DSP. */
   1539               vai.hwcaps |= VEX_PRID_IMP_34K;
   1540            }
   1541         }
   1542      }
   1543 
   1544      /* Check if CPU has FPU and 32 dbl. prec. FP registers */
   1545      int FIR = 0;
   1546      __asm__ __volatile__(
   1547         "cfc1 %0, $0"  "\n\t"
   1548         : "=r" (FIR)
   1549      );
   1550      if (FIR & (1 << FP64)) {
   1551         vai.hwcaps |= VEX_PRID_CPU_32FPR;
   1552      }
   1553 
   1554      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1555      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1556      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1557 
   1558      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1559      VG_(machine_get_cache_info)(&vai);
   1560 
   1561      return True;
   1562    }
   1563 
   1564 #elif defined(VGA_mips64)
   1565    {
   1566      va = VexArchMIPS64;
   1567      UInt model = VG_(get_machine_model)();
   1568      if (model== -1)
   1569          return False;
   1570 
   1571      vai.hwcaps = model;
   1572 
   1573      VG_(machine_get_cache_info)(&vai);
   1574 
   1575      return True;
   1576    }
   1577 
   1578 #else
   1579 #  error "Unknown arch"
   1580 #endif
   1581 }
   1582 
   1583 /* Notify host cpu instruction cache line size. */
   1584 #if defined(VGA_ppc32)
   1585 void VG_(machine_ppc32_set_clszB)( Int szB )
   1586 {
   1587    vg_assert(hwcaps_done);
   1588 
   1589    /* Either the value must not have been set yet (zero) or we can
   1590       tolerate it being set to the same value multiple times, as the
   1591       stack scanning logic in m_main is a bit stupid. */
   1592    vg_assert(vai.ppc_icache_line_szB == 0
   1593              || vai.ppc_icache_line_szB == szB);
   1594 
   1595    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
   1596    vai.ppc_icache_line_szB = szB;
   1597 }
   1598 #endif
   1599 
   1600 
   1601 /* Notify host cpu instruction cache line size. */
   1602 #if defined(VGA_ppc64)
   1603 void VG_(machine_ppc64_set_clszB)( Int szB )
   1604 {
   1605    vg_assert(hwcaps_done);
   1606 
   1607    /* Either the value must not have been set yet (zero) or we can
   1608       tolerate it being set to the same value multiple times, as the
   1609       stack scanning logic in m_main is a bit stupid. */
   1610    vg_assert(vai.ppc_icache_line_szB == 0
   1611              || vai.ppc_icache_line_szB == szB);
   1612 
   1613    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
   1614    vai.ppc_icache_line_szB = szB;
   1615 }
   1616 #endif
   1617 
   1618 
   1619 /* Notify host's ability to handle NEON instructions. */
   1620 #if defined(VGA_arm)
   1621 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
   1622 {
   1623    vg_assert(hwcaps_done);
   1624    /* There's nothing else we can sanity check. */
   1625 
   1626    if (has_neon) {
   1627       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1628    } else {
   1629       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
   1630    }
   1631 }
   1632 #endif
   1633 
   1634 
   1635 /* Fetch host cpu info, once established. */
   1636 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
   1637                                    /*OUT*/VexArchInfo* pVai )
   1638 {
   1639    vg_assert(hwcaps_done);
   1640    if (pVa)  *pVa  = va;
   1641    if (pVai) *pVai = vai;
   1642 }
   1643 
   1644 
   1645 /* Returns the size of the largest guest register that we will
   1646    simulate in this run.  This depends on both the guest architecture
   1647    and on the specific capabilities we are simulating for that guest
   1648    (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
   1649    or 32.  General rule: if in doubt, return a value larger than
   1650    reality.
   1651 
   1652    This information is needed by Cachegrind and Callgrind to decide
   1653    what the minimum cache line size they are prepared to simulate is.
   1654    Basically require that the minimum cache line size is at least as
   1655    large as the largest register that might get transferred to/from
   1656    memory, so as to guarantee that any such transaction can straddle
   1657    at most 2 cache lines.
   1658 */
   1659 Int VG_(machine_get_size_of_largest_guest_register) ( void )
   1660 {
   1661    vg_assert(hwcaps_done);
   1662    /* Once hwcaps_done is True, we can fish around inside va/vai to
   1663       find the information we need. */
   1664 
   1665 #  if defined(VGA_x86)
   1666    vg_assert(va == VexArchX86);
   1667    /* We don't support AVX, so 32 is out.  At the other end, even if
   1668       we don't support any SSE, the X87 can generate 10 byte
   1669       transfers, so let's say 16 to be on the safe side.  Hence the
   1670       answer is always 16. */
   1671    return 16;
   1672 
   1673 #  elif defined(VGA_amd64)
   1674    /* if AVX then 32 else 16 */
   1675    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
   1676 
   1677 #  elif defined(VGA_ppc32)
   1678    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1679    if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
   1680    if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
   1681    if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
   1682    return 8;
   1683 
   1684 #  elif defined(VGA_ppc64)
   1685    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1686    if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
   1687    if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
   1688    if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
   1689    return 8;
   1690 
   1691 #  elif defined(VGA_s390x)
   1692    return 8;
   1693 
   1694 #  elif defined(VGA_arm)
   1695    /* Really it depends whether or not we have NEON, but let's just
   1696       assume we always do. */
   1697    return 16;
   1698 
   1699 #  elif defined(VGA_arm64)
   1700    /* ARM64 always has Neon, AFAICS. */
   1701    return 16;
   1702 
   1703 #  elif defined(VGA_mips32)
   1704    /* The guest state implies 4, but that can't really be true, can
   1705       it? */
   1706    return 8;
   1707 
   1708 #  elif defined(VGA_mips64)
   1709    return 8;
   1710 
   1711 #  else
   1712 #    error "Unknown arch"
   1713 #  endif
   1714 }
   1715 
   1716 
   1717 // Given a pointer to a function as obtained by "& functionname" in C,
   1718 // produce a pointer to the actual entry point for the function.
   1719 void* VG_(fnptr_to_fnentry)( void* f )
   1720 {
   1721 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
   1722       || defined(VGP_arm_linux)                           \
   1723       || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
   1724       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
   1725       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
   1726    return f;
   1727 #  elif defined(VGP_ppc64_linux)
   1728    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
   1729       3-word function descriptor, of which the first word is the entry
   1730       address. */
   1731    UWord* descr = (UWord*)f;
   1732    return (void*)(descr[0]);
   1733 #  else
   1734 #    error "Unknown platform"
   1735 #  endif
   1736 }
   1737 
   1738 /*--------------------------------------------------------------------*/
   1739 /*--- end                                                          ---*/
   1740 /*--------------------------------------------------------------------*/
   1741