Home | History | Annotate | Download | only in coregrind
      1 /*--------------------------------------------------------------------*/
      2 /*--- Machine-related stuff.                           m_machine.c ---*/
      3 /*--------------------------------------------------------------------*/
      4 
      5 /*
      6    This file is part of Valgrind, a dynamic binary instrumentation
      7    framework.
      8 
      9    Copyright (C) 2000-2017 Julian Seward
     10       jseward (at) acm.org
     11 
     12    This program is free software; you can redistribute it and/or
     13    modify it under the terms of the GNU General Public License as
     14    published by the Free Software Foundation; either version 2 of the
     15    License, or (at your option) any later version.
     16 
     17    This program is distributed in the hope that it will be useful, but
     18    WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    General Public License for more details.
     21 
     22    You should have received a copy of the GNU General Public License
     23    along with this program; if not, write to the Free Software
     24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     25    02111-1307, USA.
     26 
     27    The GNU General Public License is contained in the file COPYING.
     28 */
     29 
     30 #include "pub_core_basics.h"
     31 #include "pub_core_vki.h"
     32 #include "pub_core_threadstate.h"
     33 #include "pub_core_libcassert.h"
     34 #include "pub_core_libcbase.h"
     35 #include "pub_core_libcfile.h"
     36 #include "pub_core_libcprint.h"
     37 #include "pub_core_libcproc.h"
     38 #include "pub_core_mallocfree.h"
     39 #include "pub_core_machine.h"
     40 #include "pub_core_cpuid.h"
     41 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
     42 #include "pub_core_debuglog.h"
     43 
     44 
     45 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
     46 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
     47 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
     48 
     49 Addr VG_(get_IP) ( ThreadId tid ) {
     50    return INSTR_PTR( VG_(threads)[tid].arch );
     51 }
     52 Addr VG_(get_SP) ( ThreadId tid ) {
     53    return STACK_PTR( VG_(threads)[tid].arch );
     54 }
     55 Addr VG_(get_FP) ( ThreadId tid ) {
     56    return FRAME_PTR( VG_(threads)[tid].arch );
     57 }
     58 
     59 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
     60    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
     61 }
     62 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
     63    STACK_PTR( VG_(threads)[tid].arch ) = sp;
     64 }
     65 
     66 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
     67                                 ThreadId tid )
     68 {
     69 #  if defined(VGA_x86)
     70    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
     71    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
     72    regs->misc.X86.r_ebp
     73       = VG_(threads)[tid].arch.vex.guest_EBP;
     74 #  elif defined(VGA_amd64)
     75    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
     76    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
     77    regs->misc.AMD64.r_rbp
     78       = VG_(threads)[tid].arch.vex.guest_RBP;
     79 #  elif defined(VGA_ppc32)
     80    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
     81    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
     82    regs->misc.PPC32.r_lr
     83       = VG_(threads)[tid].arch.vex.guest_LR;
     84 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
     85    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
     86    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
     87    regs->misc.PPC64.r_lr
     88       = VG_(threads)[tid].arch.vex.guest_LR;
     89 #  elif defined(VGA_arm)
     90    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
     91    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
     92    regs->misc.ARM.r14
     93       = VG_(threads)[tid].arch.vex.guest_R14;
     94    regs->misc.ARM.r12
     95       = VG_(threads)[tid].arch.vex.guest_R12;
     96    regs->misc.ARM.r11
     97       = VG_(threads)[tid].arch.vex.guest_R11;
     98    regs->misc.ARM.r7
     99       = VG_(threads)[tid].arch.vex.guest_R7;
    100 #  elif defined(VGA_arm64)
    101    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    102    regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
    103    regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
    104    regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
    105 #  elif defined(VGA_s390x)
    106    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
    107    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
    108    regs->misc.S390X.r_fp
    109       = VG_(threads)[tid].arch.vex.guest_FP;
    110    regs->misc.S390X.r_lr
    111       = VG_(threads)[tid].arch.vex.guest_LR;
    112 #  elif defined(VGA_mips32)
    113    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    114    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
    115    regs->misc.MIPS32.r30
    116       = VG_(threads)[tid].arch.vex.guest_r30;
    117    regs->misc.MIPS32.r31
    118       = VG_(threads)[tid].arch.vex.guest_r31;
    119    regs->misc.MIPS32.r28
    120       = VG_(threads)[tid].arch.vex.guest_r28;
    121 #  elif defined(VGA_mips64)
    122    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    123    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
    124    regs->misc.MIPS64.r30
    125       = VG_(threads)[tid].arch.vex.guest_r30;
    126    regs->misc.MIPS64.r31
    127       = VG_(threads)[tid].arch.vex.guest_r31;
    128    regs->misc.MIPS64.r28
    129       = VG_(threads)[tid].arch.vex.guest_r28;
    130 #  else
    131 #    error "Unknown arch"
    132 #  endif
    133 }
    134 
    135 void
    136 VG_(get_shadow_regs_area) ( ThreadId tid,
    137                             /*DST*/UChar* dst,
    138                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
    139 {
    140    void*        src;
    141    ThreadState* tst;
    142    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    143    vg_assert(VG_(is_valid_tid)(tid));
    144    // Bounds check
    145    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    146    vg_assert(offset + size <= sizeof(VexGuestArchState));
    147    // Copy
    148    tst = & VG_(threads)[tid];
    149    src = NULL;
    150    switch (shadowNo) {
    151       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    152       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    153       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    154    }
    155    vg_assert(src != NULL);
    156    VG_(memcpy)( dst, src, size);
    157 }
    158 
    159 void
    160 VG_(set_shadow_regs_area) ( ThreadId tid,
    161                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
    162                             /*SRC*/const UChar* src )
    163 {
    164    void*        dst;
    165    ThreadState* tst;
    166    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    167    vg_assert(VG_(is_valid_tid)(tid));
    168    // Bounds check
    169    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    170    vg_assert(offset + size <= sizeof(VexGuestArchState));
    171    // Copy
    172    tst = & VG_(threads)[tid];
    173    dst = NULL;
    174    switch (shadowNo) {
    175       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    176       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    177       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    178    }
    179    vg_assert(dst != NULL);
    180    VG_(memcpy)( dst, src, size);
    181 }
    182 
    183 
    184 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
    185                                                         const HChar*, Addr))
    186 {
    187    VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
    188    VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
    189 #if defined(VGA_x86)
    190    (*f)(tid, "EAX", vex->guest_EAX);
    191    (*f)(tid, "ECX", vex->guest_ECX);
    192    (*f)(tid, "EDX", vex->guest_EDX);
    193    (*f)(tid, "EBX", vex->guest_EBX);
    194    (*f)(tid, "ESI", vex->guest_ESI);
    195    (*f)(tid, "EDI", vex->guest_EDI);
    196    (*f)(tid, "ESP", vex->guest_ESP);
    197    (*f)(tid, "EBP", vex->guest_EBP);
    198 #elif defined(VGA_amd64)
    199    (*f)(tid, "RAX", vex->guest_RAX);
    200    (*f)(tid, "RCX", vex->guest_RCX);
    201    (*f)(tid, "RDX", vex->guest_RDX);
    202    (*f)(tid, "RBX", vex->guest_RBX);
    203    (*f)(tid, "RSI", vex->guest_RSI);
    204    (*f)(tid, "RDI", vex->guest_RDI);
    205    (*f)(tid, "RSP", vex->guest_RSP);
    206    (*f)(tid, "RBP", vex->guest_RBP);
    207    (*f)(tid, "R8" , vex->guest_R8 );
    208    (*f)(tid, "R9" , vex->guest_R9 );
    209    (*f)(tid, "R10", vex->guest_R10);
    210    (*f)(tid, "R11", vex->guest_R11);
    211    (*f)(tid, "R12", vex->guest_R12);
    212    (*f)(tid, "R13", vex->guest_R13);
    213    (*f)(tid, "R14", vex->guest_R14);
    214    (*f)(tid, "R15", vex->guest_R15);
    215 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
    216    (*f)(tid, "GPR0" , vex->guest_GPR0 );
    217    (*f)(tid, "GPR1" , vex->guest_GPR1 );
    218    (*f)(tid, "GPR2" , vex->guest_GPR2 );
    219    (*f)(tid, "GPR3" , vex->guest_GPR3 );
    220    (*f)(tid, "GPR4" , vex->guest_GPR4 );
    221    (*f)(tid, "GPR5" , vex->guest_GPR5 );
    222    (*f)(tid, "GPR6" , vex->guest_GPR6 );
    223    (*f)(tid, "GPR7" , vex->guest_GPR7 );
    224    (*f)(tid, "GPR8" , vex->guest_GPR8 );
    225    (*f)(tid, "GPR9" , vex->guest_GPR9 );
    226    (*f)(tid, "GPR10", vex->guest_GPR10);
    227    (*f)(tid, "GPR11", vex->guest_GPR11);
    228    (*f)(tid, "GPR12", vex->guest_GPR12);
    229    (*f)(tid, "GPR13", vex->guest_GPR13);
    230    (*f)(tid, "GPR14", vex->guest_GPR14);
    231    (*f)(tid, "GPR15", vex->guest_GPR15);
    232    (*f)(tid, "GPR16", vex->guest_GPR16);
    233    (*f)(tid, "GPR17", vex->guest_GPR17);
    234    (*f)(tid, "GPR18", vex->guest_GPR18);
    235    (*f)(tid, "GPR19", vex->guest_GPR19);
    236    (*f)(tid, "GPR20", vex->guest_GPR20);
    237    (*f)(tid, "GPR21", vex->guest_GPR21);
    238    (*f)(tid, "GPR22", vex->guest_GPR22);
    239    (*f)(tid, "GPR23", vex->guest_GPR23);
    240    (*f)(tid, "GPR24", vex->guest_GPR24);
    241    (*f)(tid, "GPR25", vex->guest_GPR25);
    242    (*f)(tid, "GPR26", vex->guest_GPR26);
    243    (*f)(tid, "GPR27", vex->guest_GPR27);
    244    (*f)(tid, "GPR28", vex->guest_GPR28);
    245    (*f)(tid, "GPR29", vex->guest_GPR29);
    246    (*f)(tid, "GPR30", vex->guest_GPR30);
    247    (*f)(tid, "GPR31", vex->guest_GPR31);
    248    (*f)(tid, "CTR"  , vex->guest_CTR  );
    249    (*f)(tid, "LR"   , vex->guest_LR   );
    250 #elif defined(VGA_arm)
    251    (*f)(tid, "R0" , vex->guest_R0 );
    252    (*f)(tid, "R1" , vex->guest_R1 );
    253    (*f)(tid, "R2" , vex->guest_R2 );
    254    (*f)(tid, "R3" , vex->guest_R3 );
    255    (*f)(tid, "R4" , vex->guest_R4 );
    256    (*f)(tid, "R5" , vex->guest_R5 );
    257    (*f)(tid, "R6" , vex->guest_R6 );
    258    (*f)(tid, "R8" , vex->guest_R8 );
    259    (*f)(tid, "R9" , vex->guest_R9 );
    260    (*f)(tid, "R10", vex->guest_R10);
    261    (*f)(tid, "R11", vex->guest_R11);
    262    (*f)(tid, "R12", vex->guest_R12);
    263    (*f)(tid, "R13", vex->guest_R13);
    264    (*f)(tid, "R14", vex->guest_R14);
    265 #elif defined(VGA_s390x)
    266    (*f)(tid, "r0" , vex->guest_r0 );
    267    (*f)(tid, "r1" , vex->guest_r1 );
    268    (*f)(tid, "r2" , vex->guest_r2 );
    269    (*f)(tid, "r3" , vex->guest_r3 );
    270    (*f)(tid, "r4" , vex->guest_r4 );
    271    (*f)(tid, "r5" , vex->guest_r5 );
    272    (*f)(tid, "r6" , vex->guest_r6 );
    273    (*f)(tid, "r7" , vex->guest_r7 );
    274    (*f)(tid, "r8" , vex->guest_r8 );
    275    (*f)(tid, "r9" , vex->guest_r9 );
    276    (*f)(tid, "r10", vex->guest_r10);
    277    (*f)(tid, "r11", vex->guest_r11);
    278    (*f)(tid, "r12", vex->guest_r12);
    279    (*f)(tid, "r13", vex->guest_r13);
    280    (*f)(tid, "r14", vex->guest_r14);
    281    (*f)(tid, "r15", vex->guest_r15);
    282 #elif defined(VGA_mips32) || defined(VGA_mips64)
    283    (*f)(tid, "r0" , vex->guest_r0 );
    284    (*f)(tid, "r1" , vex->guest_r1 );
    285    (*f)(tid, "r2" , vex->guest_r2 );
    286    (*f)(tid, "r3" , vex->guest_r3 );
    287    (*f)(tid, "r4" , vex->guest_r4 );
    288    (*f)(tid, "r5" , vex->guest_r5 );
    289    (*f)(tid, "r6" , vex->guest_r6 );
    290    (*f)(tid, "r7" , vex->guest_r7 );
    291    (*f)(tid, "r8" , vex->guest_r8 );
    292    (*f)(tid, "r9" , vex->guest_r9 );
    293    (*f)(tid, "r10", vex->guest_r10);
    294    (*f)(tid, "r11", vex->guest_r11);
    295    (*f)(tid, "r12", vex->guest_r12);
    296    (*f)(tid, "r13", vex->guest_r13);
    297    (*f)(tid, "r14", vex->guest_r14);
    298    (*f)(tid, "r15", vex->guest_r15);
    299    (*f)(tid, "r16", vex->guest_r16);
    300    (*f)(tid, "r17", vex->guest_r17);
    301    (*f)(tid, "r18", vex->guest_r18);
    302    (*f)(tid, "r19", vex->guest_r19);
    303    (*f)(tid, "r20", vex->guest_r20);
    304    (*f)(tid, "r21", vex->guest_r21);
    305    (*f)(tid, "r22", vex->guest_r22);
    306    (*f)(tid, "r23", vex->guest_r23);
    307    (*f)(tid, "r24", vex->guest_r24);
    308    (*f)(tid, "r25", vex->guest_r25);
    309    (*f)(tid, "r26", vex->guest_r26);
    310    (*f)(tid, "r27", vex->guest_r27);
    311    (*f)(tid, "r28", vex->guest_r28);
    312    (*f)(tid, "r29", vex->guest_r29);
    313    (*f)(tid, "r30", vex->guest_r30);
    314    (*f)(tid, "r31", vex->guest_r31);
    315 #elif defined(VGA_arm64)
    316    (*f)(tid, "x0" , vex->guest_X0 );
    317    (*f)(tid, "x1" , vex->guest_X1 );
    318    (*f)(tid, "x2" , vex->guest_X2 );
    319    (*f)(tid, "x3" , vex->guest_X3 );
    320    (*f)(tid, "x4" , vex->guest_X4 );
    321    (*f)(tid, "x5" , vex->guest_X5 );
    322    (*f)(tid, "x6" , vex->guest_X6 );
    323    (*f)(tid, "x7" , vex->guest_X7 );
    324    (*f)(tid, "x8" , vex->guest_X8 );
    325    (*f)(tid, "x9" , vex->guest_X9 );
    326    (*f)(tid, "x10", vex->guest_X10);
    327    (*f)(tid, "x11", vex->guest_X11);
    328    (*f)(tid, "x12", vex->guest_X12);
    329    (*f)(tid, "x13", vex->guest_X13);
    330    (*f)(tid, "x14", vex->guest_X14);
    331    (*f)(tid, "x15", vex->guest_X15);
    332    (*f)(tid, "x16", vex->guest_X16);
    333    (*f)(tid, "x17", vex->guest_X17);
    334    (*f)(tid, "x18", vex->guest_X18);
    335    (*f)(tid, "x19", vex->guest_X19);
    336    (*f)(tid, "x20", vex->guest_X20);
    337    (*f)(tid, "x21", vex->guest_X21);
    338    (*f)(tid, "x22", vex->guest_X22);
    339    (*f)(tid, "x23", vex->guest_X23);
    340    (*f)(tid, "x24", vex->guest_X24);
    341    (*f)(tid, "x25", vex->guest_X25);
    342    (*f)(tid, "x26", vex->guest_X26);
    343    (*f)(tid, "x27", vex->guest_X27);
    344    (*f)(tid, "x28", vex->guest_X28);
    345    (*f)(tid, "x29", vex->guest_X29);
    346    (*f)(tid, "x30", vex->guest_X30);
    347 #else
    348 #  error Unknown arch
    349 #endif
    350 }
    351 
    352 
    353 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
    354 {
    355    ThreadId tid;
    356 
    357    for (tid = 1; tid < VG_N_THREADS; tid++) {
    358       if (VG_(is_valid_tid)(tid)
    359           || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
    360          // live thread or thread instructed to die by another thread that
    361          // called exit.
    362          apply_to_GPs_of_tid(tid, f);
    363       }
    364    }
    365 }
    366 
    367 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
    368 {
    369    *tid = (ThreadId)(-1);
    370 }
    371 
    372 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
    373                             /*OUT*/Addr* stack_min,
    374                             /*OUT*/Addr* stack_max)
    375 {
    376    ThreadId i;
    377    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
    378       if (i == VG_INVALID_THREADID)
    379          continue;
    380       if (VG_(threads)[i].status != VgTs_Empty) {
    381          *tid       = i;
    382          *stack_min = VG_(get_SP)(i);
    383          *stack_max = VG_(threads)[i].client_stack_highest_byte;
    384          return True;
    385       }
    386    }
    387    return False;
    388 }
    389 
    390 Addr VG_(thread_get_stack_max)(ThreadId tid)
    391 {
    392    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    393    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    394    return VG_(threads)[tid].client_stack_highest_byte;
    395 }
    396 
    397 SizeT VG_(thread_get_stack_size)(ThreadId tid)
    398 {
    399    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    400    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    401    return VG_(threads)[tid].client_stack_szB;
    402 }
    403 
    404 Addr VG_(thread_get_altstack_min)(ThreadId tid)
    405 {
    406    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    407    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    408    return (Addr)VG_(threads)[tid].altstack.ss_sp;
    409 }
    410 
    411 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
    412 {
    413    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    414    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    415    return VG_(threads)[tid].altstack.ss_size;
    416 }
    417 
    418 //-------------------------------------------------------------
    419 /* Details about the capabilities of the underlying (host) CPU.  These
    420    details are acquired by (1) enquiring with the CPU at startup, or
    421    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
    422    line size).  It's a bit nasty in the sense that there's no obvious
    423    way to stop uses of some of this info before it's ready to go.
    424    See pub_core_machine.h for more information about that.
    425 
    426    VG_(machine_get_hwcaps) may use signals (although it attempts to
    427    leave signal state unchanged) and therefore should only be
    428    called before m_main sets up the client's signal state.
    429 */
    430 
    431 /* --------- State --------- */
    432 static Bool hwcaps_done = False;
    433 
    434 /* --- all archs --- */
    435 static VexArch     va = VexArch_INVALID;
    436 static VexArchInfo vai;
    437 
    438 #if defined(VGA_x86)
    439 UInt VG_(machine_x86_have_mxcsr) = 0;
    440 #endif
    441 #if defined(VGA_ppc32)
    442 UInt VG_(machine_ppc32_has_FP)  = 0;
    443 UInt VG_(machine_ppc32_has_VMX) = 0;
    444 #endif
    445 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
    446 ULong VG_(machine_ppc64_has_VMX) = 0;
    447 #endif
    448 #if defined(VGA_arm)
    449 Int VG_(machine_arm_archlevel) = 4;
    450 #endif
    451 
    452 
    453 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
    454    testing, so we need a VG_MINIMAL_JMP_BUF. */
    455 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
    456     || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
    457 #include "pub_core_libcsetjmp.h"
    458 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
    459 static void handler_unsup_insn ( Int x ) {
    460    VG_MINIMAL_LONGJMP(env_unsup_insn);
    461 }
    462 #endif
    463 
    464 
    465 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
    466  * handlers are installed.  Determines the sizes affected by dcbz
    467  * and dcbzl instructions and updates the given VexArchInfo structure
    468  * accordingly.
    469  *
    470  * Not very defensive: assumes that as long as the dcbz/dcbzl
    471  * instructions don't raise a SIGILL, that they will zero an aligned,
    472  * contiguous block of memory of a sensible size. */
    473 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
    474 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
    475 {
    476    Int dcbz_szB = 0;
    477    Int dcbzl_szB;
    478 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
    479    char test_block[4*MAX_DCBZL_SZB];
    480    char *aligned = test_block;
    481    Int i;
    482 
    483    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
    484    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
    485    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
    486 
    487    /* dcbz often clears 32B, although sometimes whatever the native cache
    488     * block size is */
    489    VG_(memset)(test_block, 0xff, sizeof(test_block));
    490    __asm__ __volatile__("dcbz 0,%0"
    491                         : /*out*/
    492                         : "r" (aligned) /*in*/
    493                         : "memory" /*clobber*/);
    494    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    495       if (!test_block[i])
    496          ++dcbz_szB;
    497    }
    498    vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
    499 
    500    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
    501    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    502       dcbzl_szB = 0; /* indicates unsupported */
    503    }
    504    else {
    505       VG_(memset)(test_block, 0xff, sizeof(test_block));
    506       /* some older assemblers won't understand the dcbzl instruction
    507        * variant, so we directly emit the instruction ourselves */
    508       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
    509                            : /*out*/
    510                            : "r" (aligned) /*in*/
    511                            : "memory", "r9" /*clobber*/);
    512       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    513          if (!test_block[i])
    514             ++dcbzl_szB;
    515       }
    516       vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
    517    }
    518 
    519    arch_info->ppc_dcbz_szB  = dcbz_szB;
    520    arch_info->ppc_dcbzl_szB = dcbzl_szB;
    521 
    522    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
    523                  dcbz_szB, dcbzl_szB);
    524 #  undef MAX_DCBZL_SZB
    525 }
    526 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
    527 
    528 #ifdef VGA_s390x
    529 
    530 /* Read /proc/cpuinfo. Look for lines like these
    531 
    532    processor 0: version = FF,  identification = 0117C9,  machine = 2064
    533 
    534    and return the machine model. If the machine model could not be determined
    535    or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
    536 
    537 static UInt VG_(get_machine_model)(void)
    538 {
    539    static struct model_map {
    540       const HChar name[5];
    541       UInt  id;
    542    } model_map[] = {
    543       { "2064", VEX_S390X_MODEL_Z900 },
    544       { "2066", VEX_S390X_MODEL_Z800 },
    545       { "2084", VEX_S390X_MODEL_Z990 },
    546       { "2086", VEX_S390X_MODEL_Z890 },
    547       { "2094", VEX_S390X_MODEL_Z9_EC },
    548       { "2096", VEX_S390X_MODEL_Z9_BC },
    549       { "2097", VEX_S390X_MODEL_Z10_EC },
    550       { "2098", VEX_S390X_MODEL_Z10_BC },
    551       { "2817", VEX_S390X_MODEL_Z196 },
    552       { "2818", VEX_S390X_MODEL_Z114 },
    553       { "2827", VEX_S390X_MODEL_ZEC12 },
    554       { "2828", VEX_S390X_MODEL_ZBC12 },
    555       { "2964", VEX_S390X_MODEL_Z13 },
    556       { "2965", VEX_S390X_MODEL_Z13S },
    557    };
    558 
    559    Int    model, n, fh;
    560    SysRes fd;
    561    SizeT  num_bytes, file_buf_size;
    562    HChar *p, *m, *model_name, *file_buf;
    563 
    564    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    565    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    566    if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
    567 
    568    fh  = sr_Res(fd);
    569 
    570    /* Determine the size of /proc/cpuinfo.
    571       Work around broken-ness in /proc file system implementation.
    572       fstat returns a zero size for /proc/cpuinfo although it is
    573       claimed to be a regular file. */
    574    num_bytes = 0;
    575    file_buf_size = 1000;
    576    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    577    while (42) {
    578       n = VG_(read)(fh, file_buf, file_buf_size);
    579       if (n < 0) break;
    580 
    581       num_bytes += n;
    582       if (n < file_buf_size) break;  /* reached EOF */
    583    }
    584 
    585    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    586 
    587    if (num_bytes > file_buf_size) {
    588       VG_(free)( file_buf );
    589       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    590       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    591       n = VG_(read)( fh, file_buf, num_bytes );
    592       if (n < 0) num_bytes = 0;
    593    }
    594 
    595    file_buf[num_bytes] = '\0';
    596    VG_(close)(fh);
    597 
    598    /* Parse file */
    599    model = VEX_S390X_MODEL_UNKNOWN;
    600    for (p = file_buf; *p; ++p) {
    601       /* Beginning of line */
    602      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
    603 
    604      m = VG_(strstr)( p, "machine" );
    605      if (m == NULL) continue;
    606 
    607      p = m + sizeof "machine" - 1;
    608      while ( VG_(isspace)( *p ) || *p == '=') {
    609        if (*p == '\n') goto next_line;
    610        ++p;
    611      }
    612 
    613      model_name = p;
    614      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
    615        struct model_map *mm = model_map + n;
    616        SizeT len = VG_(strlen)( mm->name );
    617        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
    618             VG_(isspace)( model_name[len] )) {
    619          if (mm->id < model) model = mm->id;
    620          p = model_name + len;
    621          break;
    622        }
    623      }
    624      /* Skip until end-of-line */
    625      while (*p != '\n')
    626        ++p;
    627    next_line: ;
    628    }
    629 
    630    VG_(free)( file_buf );
    631    VG_(debugLog)(1, "machine", "model = %s\n",
    632                  model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
    633                                                   : model_map[model].name);
    634    return model;
    635 }
    636 
    637 #endif /* defined(VGA_s390x) */
    638 
    639 #if defined(VGA_mips32) || defined(VGA_mips64)
    640 
    641 /*
    642  * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
    643  * determine what CPU it is (it searches only for the models that are or may be
    644  * supported by Valgrind).
    645  */
    646 static Bool VG_(parse_cpuinfo)(void)
    647 {
    648    const char *search_Broadcom_str = "cpu model\t\t: Broadcom";
    649    const char *search_Cavium_str= "cpu model\t\t: Cavium";
    650    const char *search_Ingenic_str= "cpu model\t\t: Ingenic";
    651    const char *search_Loongson_str= "cpu model\t\t: ICT Loongson";
    652    const char *search_MIPS_str = "cpu model\t\t: MIPS";
    653    const char *search_Netlogic_str = "cpu model\t\t: Netlogic";
    654 
    655    Int    n, fh;
    656    SysRes fd;
    657    SizeT  num_bytes, file_buf_size;
    658    HChar  *file_buf, *isa;
    659 
    660    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    661    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    662    if ( sr_isError(fd) ) return False;
    663 
    664    fh  = sr_Res(fd);
    665 
    666    /* Determine the size of /proc/cpuinfo.
    667       Work around broken-ness in /proc file system implementation.
    668       fstat returns a zero size for /proc/cpuinfo although it is
    669       claimed to be a regular file. */
    670    num_bytes = 0;
    671    file_buf_size = 1000;
    672    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    673    while (42) {
    674       n = VG_(read)(fh, file_buf, file_buf_size);
    675       if (n < 0) break;
    676 
    677       num_bytes += n;
    678       if (n < file_buf_size) break;  /* reached EOF */
    679    }
    680 
    681    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    682 
    683    if (num_bytes > file_buf_size) {
    684       VG_(free)( file_buf );
    685       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    686       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    687       n = VG_(read)( fh, file_buf, num_bytes );
    688       if (n < 0) num_bytes = 0;
    689    }
    690 
    691    file_buf[num_bytes] = '\0';
    692    VG_(close)(fh);
    693 
    694    /* Parse file */
    695    if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL)
    696        vai.hwcaps = VEX_PRID_COMP_BROADCOM;
    697    else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL)
    698        vai.hwcaps = VEX_PRID_COMP_NETLOGIC;
    699    else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
    700        vai.hwcaps = VEX_PRID_COMP_CAVIUM;
    701    else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL)
    702        vai.hwcaps = VEX_PRID_COMP_MIPS;
    703    else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL)
    704        vai.hwcaps = VEX_PRID_COMP_INGENIC_E1;
    705    else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL)
    706        vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64);
    707    else {
    708        /* Did not find string in the proc file. */
    709        vai.hwcaps = 0;
    710        VG_(free)(file_buf);
    711        return False;
    712    }
    713 
    714    isa = VG_(strstr)(file_buf, "isa\t\t\t: ");
    715 
    716    if (NULL != isa) {
    717       if (VG_(strstr) (isa, "mips32r1") != NULL)
    718           vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
    719       if (VG_(strstr) (isa, "mips32r2") != NULL)
    720           vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
    721       if (VG_(strstr) (isa, "mips32r6") != NULL)
    722           vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6;
    723       if (VG_(strstr) (isa, "mips64r1") != NULL)
    724           vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1;
    725       if (VG_(strstr) (isa, "mips64r2") != NULL)
    726           vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2;
    727       if (VG_(strstr) (isa, "mips64r6") != NULL)
    728           vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6;
    729 
    730       /*
    731        * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
    732        * decide to change incorrect settings in
    733        * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
    734        * The current settings show mips32r1, mips32r2 and mips64r1 as
    735        * unsupported ISAs by Cavium MIPS CPUs.
    736        */
    737       if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) {
    738          vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 |
    739                        VEX_MIPS_CPU_ISA_M64R1;
    740       }
    741    } else {
    742       /*
    743        * Kernel does not provide information about supported ISAs.
    744        * Populate the isa level flags based on the CPU model. That is our
    745        * best guess.
    746        */
    747        switch VEX_MIPS_COMP_ID(vai.hwcaps) {
    748           case VEX_PRID_COMP_CAVIUM:
    749           case VEX_PRID_COMP_NETLOGIC:
    750              vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1);
    751           case VEX_PRID_COMP_INGENIC_E1:
    752           case VEX_PRID_COMP_MIPS:
    753              vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
    754           case VEX_PRID_COMP_BROADCOM:
    755              vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
    756              break;
    757           case VEX_PRID_COMP_LEGACY:
    758              if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64))
    759                 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 |
    760                               VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1;
    761              break;
    762          default:
    763              break;
    764        }
    765    }
    766    VG_(free)(file_buf);
    767    return True;
    768 }
    769 
    770 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
    771 
    772 #if defined(VGP_arm64_linux)
    773 
    774 /* Check to see whether we are running on a Cavium core, and if so auto-enable
    775    the fallback LLSC implementation.  See #369459. */
    776 
    777 static Bool VG_(parse_cpuinfo)(void)
    778 {
    779    const char *search_Cavium_str = "CPU implementer\t: 0x43";
    780 
    781    Int    n, fh;
    782    SysRes fd;
    783    SizeT  num_bytes, file_buf_size;
    784    HChar  *file_buf;
    785 
    786    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    787    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    788    if ( sr_isError(fd) ) return False;
    789 
    790    fh  = sr_Res(fd);
    791 
    792    /* Determine the size of /proc/cpuinfo.
    793       Work around broken-ness in /proc file system implementation.
    794       fstat returns a zero size for /proc/cpuinfo although it is
    795       claimed to be a regular file. */
    796    num_bytes = 0;
    797    file_buf_size = 1000;
    798    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    799    while (42) {
    800       n = VG_(read)(fh, file_buf, file_buf_size);
    801       if (n < 0) break;
    802 
    803       num_bytes += n;
    804       if (n < file_buf_size) break;  /* reached EOF */
    805    }
    806 
    807    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    808 
    809    if (num_bytes > file_buf_size) {
    810       VG_(free)( file_buf );
    811       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    812       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    813       n = VG_(read)( fh, file_buf, num_bytes );
    814       if (n < 0) num_bytes = 0;
    815    }
    816 
    817    file_buf[num_bytes] = '\0';
    818    VG_(close)(fh);
    819 
    820    /* Parse file */
    821    if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
    822       vai.arm64_requires_fallback_LLSC = True;
    823 
    824    VG_(free)(file_buf);
    825    return True;
    826 }
    827 
    828 #endif /* defined(VGP_arm64_linux) */
    829 
    830 Bool VG_(machine_get_hwcaps)( void )
    831 {
    832    vg_assert(hwcaps_done == False);
    833    hwcaps_done = True;
    834 
    835    // Whack default settings into vai, so that we only need to fill in
    836    // any interesting bits.
    837    LibVEX_default_VexArchInfo(&vai);
    838 
    839 #if defined(VGA_x86)
    840    { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
    841      UInt eax, ebx, ecx, edx, max_extended;
    842      HChar vstr[13];
    843      vstr[0] = 0;
    844 
    845      if (!VG_(has_cpuid)())
    846         /* we can't do cpuid at all.  Give up. */
    847         return False;
    848 
    849      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    850      if (eax < 1)
    851         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    852         return False;
    853 
    854      /* Get processor ID string, and max basic/extended index
    855         values. */
    856      VG_(memcpy)(&vstr[0], &ebx, 4);
    857      VG_(memcpy)(&vstr[4], &edx, 4);
    858      VG_(memcpy)(&vstr[8], &ecx, 4);
    859      vstr[12] = 0;
    860 
    861      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    862      max_extended = eax;
    863 
    864      /* get capabilities bits into edx */
    865      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    866 
    867      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
    868      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
    869      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
    870 
    871      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    872         must simply give up.  But all CPUs since Pentium-I have it, so
    873         that doesn't seem like much of a restriction. */
    874      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    875      if (!have_cx8)
    876         return False;
    877 
    878      /* Figure out if this is an AMD that can do MMXEXT. */
    879      have_mmxext = False;
    880      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
    881          && max_extended >= 0x80000001) {
    882         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    883         /* Some older AMD processors support a sse1 subset (Integer SSE). */
    884         have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
    885      }
    886 
    887      /* Figure out if this is an AMD or Intel that can do LZCNT. */
    888      have_lzcnt = False;
    889      if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
    890           || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
    891          && max_extended >= 0x80000001) {
    892         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    893         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    894      }
    895 
    896      /* Intel processors don't define the mmxext extension, but since it
    897         is just a sse1 subset always define it when we have sse1. */
    898      if (have_sse1)
    899         have_mmxext = True;
    900 
    901      va = VexArchX86;
    902      vai.endness = VexEndnessLE;
    903 
    904      if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
    905         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    906         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    907         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
    908         vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
    909         if (have_lzcnt)
    910            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
    911         VG_(machine_x86_have_mxcsr) = 1;
    912      } else if (have_sse2 && have_sse1 && have_mmxext) {
    913         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    914         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    915         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
    916         if (have_lzcnt)
    917            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
    918         VG_(machine_x86_have_mxcsr) = 1;
    919      } else if (have_sse1 && have_mmxext) {
    920         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    921         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    922         VG_(machine_x86_have_mxcsr) = 1;
    923      } else if (have_mmxext) {
    924         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
    925         VG_(machine_x86_have_mxcsr) = 0;
    926      } else {
    927        vai.hwcaps = 0; /*baseline - no sse at all*/
    928        VG_(machine_x86_have_mxcsr) = 0;
    929      }
    930 
    931      VG_(machine_get_cache_info)(&vai);
    932 
    933      return True;
    934    }
    935 
    936 #elif defined(VGA_amd64)
    937    { Bool have_sse3, have_cx8, have_cx16;
    938      Bool have_lzcnt, have_avx, have_bmi, have_avx2;
    939      Bool have_rdtscp;
    940      UInt eax, ebx, ecx, edx, max_basic, max_extended;
    941      ULong xgetbv_0 = 0;
    942      HChar vstr[13];
    943      vstr[0] = 0;
    944 
    945      if (!VG_(has_cpuid)())
    946         /* we can't do cpuid at all.  Give up. */
    947         return False;
    948 
    949      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    950      max_basic = eax;
    951      if (max_basic < 1)
    952         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    953         return False;
    954 
    955      /* Get processor ID string, and max basic/extended index
    956         values. */
    957      VG_(memcpy)(&vstr[0], &ebx, 4);
    958      VG_(memcpy)(&vstr[4], &edx, 4);
    959      VG_(memcpy)(&vstr[8], &ecx, 4);
    960      vstr[12] = 0;
    961 
    962      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    963      max_extended = eax;
    964 
    965      /* get capabilities bits into edx */
    966      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    967 
    968      // we assume that SSE1 and SSE2 are available by default
    969      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
    970      // ssse3   is ecx:9
    971      // sse41   is ecx:19
    972      // sse42   is ecx:20
    973 
    974      // xsave   is ecx:26
    975      // osxsave is ecx:27
    976      // avx     is ecx:28
    977      // fma     is ecx:12
    978      have_avx = False;
    979      /* have_fma = False; */
    980      if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
    981         /* Processor supports AVX instructions and XGETBV is enabled
    982            by OS and AVX instructions are enabled by the OS. */
    983         ULong w;
    984         __asm__ __volatile__("movq $0,%%rcx ; "
    985                              ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
    986                              "movq %%rax,%0"
    987                              :/*OUT*/"=r"(w) :/*IN*/
    988                              :/*TRASH*/"rdx","rcx","rax");
    989         xgetbv_0 = w;
    990         if ((xgetbv_0 & 7) == 7) {
    991            /* Only say we have AVX if the XSAVE-allowable
    992               bitfield-mask allows x87, SSE and AVX state.  We could
    993               actually run with a more restrictive XGETBV(0) value,
    994               but VEX's implementation of XSAVE and XRSTOR assumes
    995               that all 3 bits are enabled.
    996 
    997               Also, the VEX implementation of XSAVE/XRSTOR assumes that
    998               state component [2] (the YMM high halves) are located in
    999               the XSAVE image at offsets 576 .. 831.  So we have to
   1000               check that here before declaring AVX to be supported. */
   1001            UInt eax2, ebx2, ecx2, edx2;
   1002            VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
   1003            if (ebx2 == 576 && eax2 == 256) {
   1004               have_avx = True;
   1005            }
   1006            /* have_fma = (ecx & (1<<12)) != 0; */
   1007            /* have_fma: Probably correct, but gcc complains due to
   1008               unusedness. */
   1009         }
   1010      }
   1011 
   1012      /* cmpxchg8b is a minimum requirement now; if we don't have it we
   1013         must simply give up.  But all CPUs since Pentium-I have it, so
   1014         that doesn't seem like much of a restriction. */
   1015      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
   1016      if (!have_cx8)
   1017         return False;
   1018 
   1019      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
   1020      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
   1021 
   1022      /* Figure out if this CPU can do LZCNT. */
   1023      have_lzcnt = False;
   1024      if (max_extended >= 0x80000001) {
   1025         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
   1026         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
   1027      }
   1028 
   1029      /* Can we do RDTSCP? */
   1030      have_rdtscp = False;
   1031      if (max_extended >= 0x80000001) {
   1032         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
   1033         have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
   1034      }
   1035 
   1036      /* Check for BMI1 and AVX2.  If we have AVX1 (plus OS support). */
   1037      have_bmi  = False;
   1038      have_avx2 = False;
   1039      if (have_avx && max_basic >= 7) {
   1040         VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
   1041         have_bmi  = (ebx & (1<<3)) != 0; /* True => have BMI1 */
   1042         have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
   1043      }
   1044 
   1045      va          = VexArchAMD64;
   1046      vai.endness = VexEndnessLE;
   1047      vai.hwcaps  = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
   1048                  | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
   1049                  | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
   1050                  | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
   1051                  | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
   1052                  | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
   1053                  | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
   1054 
   1055      VG_(machine_get_cache_info)(&vai);
   1056 
   1057      return True;
   1058    }
   1059 
   1060 #elif defined(VGA_ppc32)
   1061    {
   1062      /* Find out which subset of the ppc32 instruction set is supported by
   1063         verifying whether various ppc32 instructions generate a SIGILL
   1064         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
   1065         AT_PLATFORM entries in the ELF auxiliary table -- see also
   1066         the_iifii.client_auxv in m_main.c.
   1067       */
   1068      vki_sigset_t          saved_set, tmp_set;
   1069      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1070      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1071 
   1072      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
   1073      volatile Bool have_isa_2_07, have_isa_3_0;
   1074      Int r;
   1075 
   1076      /* This is a kludge.  Really we ought to back-convert saved_act
   1077         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1078         since that's a no-op on all ppc32 platforms so far supported,
   1079         it's not worth the typing effort.  At least include most basic
   1080         sanity check: */
   1081      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1082 
   1083      VG_(sigemptyset)(&tmp_set);
   1084      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1085      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1086 
   1087      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1088      vg_assert(r == 0);
   1089 
   1090      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1091      vg_assert(r == 0);
   1092      tmp_sigill_act = saved_sigill_act;
   1093 
   1094      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1095      vg_assert(r == 0);
   1096      tmp_sigfpe_act = saved_sigfpe_act;
   1097 
   1098      /* NODEFER: signal handler does not return (from the kernel's point of
   1099         view), hence if it is to successfully catch a signal more than once,
   1100         we need the NODEFER flag. */
   1101      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1102      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1103      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1104      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1105      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1106      vg_assert(r == 0);
   1107 
   1108      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1109      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1110      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1111      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1112      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1113      vg_assert(r == 0);
   1114 
   1115      /* standard FP insns */
   1116      have_F = True;
   1117      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1118         have_F = False;
   1119      } else {
   1120         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
   1121      }
   1122 
   1123      /* Altivec insns */
   1124      have_V = True;
   1125      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1126         have_V = False;
   1127      } else {
   1128         /* Unfortunately some older assemblers don't speak Altivec (or
   1129            choose not to), so to be safe we directly emit the 32-bit
   1130            word corresponding to "vor 0,0,0".  This fixes a build
   1131            problem that happens on Debian 3.1 (ppc32), and probably
   1132            various other places. */
   1133         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
   1134      }
   1135 
   1136      /* General-Purpose optional (fsqrt, fsqrts) */
   1137      have_FX = True;
   1138      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1139         have_FX = False;
   1140      } else {
   1141         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
   1142      }
   1143 
   1144      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
   1145      have_GX = True;
   1146      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1147         have_GX = False;
   1148      } else {
   1149         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
   1150      }
   1151 
   1152      /* VSX support implies Power ISA 2.06 */
   1153      have_VX = True;
   1154      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1155         have_VX = False;
   1156      } else {
   1157         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
   1158      }
   1159 
   1160      /* Check for Decimal Floating Point (DFP) support. */
   1161      have_DFP = True;
   1162      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1163         have_DFP = False;
   1164      } else {
   1165         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
   1166      }
   1167 
   1168      /* Check for ISA 2.07 support. */
   1169      have_isa_2_07 = True;
   1170      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1171         have_isa_2_07 = False;
   1172      } else {
   1173         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
   1174      }
   1175 
   1176      /* Check for ISA 3.0 support. */
   1177      have_isa_3_0 = True;
   1178      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1179         have_isa_3_0 = False;
   1180      } else {
   1181         __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
   1182      }
   1183 
   1184      /* determine dcbz/dcbzl sizes while we still have the signal
   1185       * handlers registered */
   1186      find_ppc_dcbz_sz(&vai);
   1187 
   1188      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1189      vg_assert(r == 0);
   1190      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
   1191      vg_assert(r == 0);
   1192      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1193      vg_assert(r == 0);
   1194      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
   1195                     (Int)have_F, (Int)have_V, (Int)have_FX,
   1196                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
   1197                     (Int)have_isa_2_07, (Int)have_isa_3_0);
   1198      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
   1199      if (have_V && !have_F)
   1200         have_V = False;
   1201      if (have_FX && !have_F)
   1202         have_FX = False;
   1203      if (have_GX && !have_F)
   1204         have_GX = False;
   1205 
   1206      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
   1207      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
   1208 
   1209      va = VexArchPPC32;
   1210      vai.endness = VexEndnessBE;
   1211 
   1212      vai.hwcaps = 0;
   1213      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
   1214      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
   1215      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
   1216      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
   1217      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
   1218      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
   1219      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
   1220      if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0;
   1221 
   1222      VG_(machine_get_cache_info)(&vai);
   1223 
   1224      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
   1225         called before we're ready to go. */
   1226      return True;
   1227    }
   1228 
   1229 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
   1230    {
   1231      /* Same instruction set detection algorithm as for ppc32. */
   1232      vki_sigset_t          saved_set, tmp_set;
   1233      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1234      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1235 
   1236      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
   1237      volatile Bool have_isa_2_07, have_isa_3_0;
   1238      Int r;
   1239 
   1240      /* This is a kludge.  Really we ought to back-convert saved_act
   1241         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1242         since that's a no-op on all ppc64 platforms so far supported,
   1243         it's not worth the typing effort.  At least include most basic
   1244         sanity check: */
   1245      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1246 
   1247      VG_(sigemptyset)(&tmp_set);
   1248      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1249      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1250 
   1251      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1252      vg_assert(r == 0);
   1253 
   1254      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1255      vg_assert(r == 0);
   1256      tmp_sigill_act = saved_sigill_act;
   1257 
   1258      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1259      tmp_sigfpe_act = saved_sigfpe_act;
   1260 
   1261      /* NODEFER: signal handler does not return (from the kernel's point of
   1262         view), hence if it is to successfully catch a signal more than once,
   1263         we need the NODEFER flag. */
   1264      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1265      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1266      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1267      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1268      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1269 
   1270      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1271      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1272      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1273      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1274      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1275 
   1276      /* standard FP insns */
   1277      have_F = True;
   1278      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1279         have_F = False;
   1280      } else {
   1281         __asm__ __volatile__("fmr 0,0");
   1282      }
   1283 
   1284      /* Altivec insns */
   1285      have_V = True;
   1286      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1287         have_V = False;
   1288      } else {
   1289         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
   1290      }
   1291 
   1292      /* General-Purpose optional (fsqrt, fsqrts) */
   1293      have_FX = True;
   1294      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1295         have_FX = False;
   1296      } else {
   1297         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
   1298      }
   1299 
   1300      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
   1301      have_GX = True;
   1302      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1303         have_GX = False;
   1304      } else {
   1305         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
   1306      }
   1307 
   1308      /* VSX support implies Power ISA 2.06 */
   1309      have_VX = True;
   1310      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1311         have_VX = False;
   1312      } else {
   1313         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
   1314      }
   1315 
   1316      /* Check for Decimal Floating Point (DFP) support. */
   1317      have_DFP = True;
   1318      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1319         have_DFP = False;
   1320      } else {
   1321         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
   1322      }
   1323 
   1324      /* Check for ISA 2.07 support. */
   1325      have_isa_2_07 = True;
   1326      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1327         have_isa_2_07 = False;
   1328      } else {
   1329         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
   1330      }
   1331 
   1332      /* Check for ISA 3.0 support. */
   1333      have_isa_3_0 = True;
   1334      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1335         have_isa_3_0 = False;
   1336      } else {
   1337         __asm__ __volatile__(".long  0x7d205434"); /* cnttzw RT, RB */
   1338      }
   1339 
   1340      /* determine dcbz/dcbzl sizes while we still have the signal
   1341       * handlers registered */
   1342      find_ppc_dcbz_sz(&vai);
   1343 
   1344      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1345      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
   1346      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1347      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
   1348                     (Int)have_F, (Int)have_V, (Int)have_FX,
   1349                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
   1350                     (Int)have_isa_2_07, (int)have_isa_3_0);
   1351      /* on ppc64be, if we don't even have FP, just give up. */
   1352      if (!have_F)
   1353         return False;
   1354 
   1355      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
   1356 
   1357      va = VexArchPPC64;
   1358 #    if defined(VKI_LITTLE_ENDIAN)
   1359      vai.endness = VexEndnessLE;
   1360 #    elif defined(VKI_BIG_ENDIAN)
   1361      vai.endness = VexEndnessBE;
   1362 #    else
   1363      vai.endness = VexEndness_INVALID;
   1364 #    endif
   1365 
   1366      vai.hwcaps = 0;
   1367      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
   1368      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
   1369      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
   1370      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
   1371      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
   1372      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
   1373      if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0;
   1374 
   1375      VG_(machine_get_cache_info)(&vai);
   1376 
   1377      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
   1378         called before we're ready to go. */
   1379      return True;
   1380    }
   1381 
   1382 #elif defined(VGA_s390x)
   1383 
   1384 #  include "libvex_s390x_common.h"
   1385 
   1386    {
   1387      /* Instruction set detection code borrowed from ppc above. */
   1388      vki_sigset_t          saved_set, tmp_set;
   1389      vki_sigaction_fromK_t saved_sigill_act;
   1390      vki_sigaction_toK_t     tmp_sigill_act;
   1391 
   1392      volatile Bool have_LDISP, have_STFLE;
   1393      Int i, r, model;
   1394 
   1395      /* If the model is "unknown" don't treat this as an error. Assume
   1396         this is a brand-new machine model for which we don't have the
   1397         identification yet. Keeping fingers crossed. */
   1398      model = VG_(get_machine_model)();
   1399 
   1400      /* Unblock SIGILL and stash away the old action for that signal */
   1401      VG_(sigemptyset)(&tmp_set);
   1402      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1403 
   1404      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1405      vg_assert(r == 0);
   1406 
   1407      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1408      vg_assert(r == 0);
   1409      tmp_sigill_act = saved_sigill_act;
   1410 
   1411      /* NODEFER: signal handler does not return (from the kernel's point of
   1412         view), hence if it is to successfully catch a signal more than once,
   1413         we need the NODEFER flag. */
   1414      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1415      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1416      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1417      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1418      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1419 
   1420      /* Determine hwcaps. Note, we cannot use the stfle insn because it
   1421         is not supported on z900. */
   1422 
   1423      have_LDISP = True;
   1424      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1425         have_LDISP = False;
   1426      } else {
   1427        /* BASR loads the address of the next insn into r1. Needed to avoid
   1428           a segfault in XY. */
   1429         __asm__ __volatile__("basr %%r1,%%r0\n\t"
   1430                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
   1431                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
   1432      }
   1433 
   1434      /* Check availability of STFLE. If available store facility bits
   1435         in hoststfle. */
   1436      ULong hoststfle[S390_NUM_FACILITY_DW];
   1437 
   1438      for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
   1439         hoststfle[i] = 0;
   1440 
   1441      have_STFLE = True;
   1442      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1443         have_STFLE = False;
   1444      } else {
   1445          register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
   1446 
   1447          __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
   1448                               : "=m" (hoststfle), "+d"(reg0)
   1449                               : : "cc", "memory");
   1450      }
   1451 
   1452      /* Restore signals */
   1453      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1454      vg_assert(r == 0);
   1455      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1456      vg_assert(r == 0);
   1457      va = VexArchS390X;
   1458      vai.endness = VexEndnessBE;
   1459 
   1460      vai.hwcaps = model;
   1461      if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
   1462      if (have_LDISP) {
   1463         /* Use long displacement only on machines >= z990. For all other
   1464            machines it is millicoded and therefore slow. */
   1465         if (model >= VEX_S390X_MODEL_Z990)
   1466            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
   1467      }
   1468 
   1469      /* Detect presence of certain facilities using the STFLE insn.
   1470         Note, that these facilities were introduced at the same time or later
   1471         as STFLE, so the absence of STLFE implies the absence of the facility
   1472         we're trying to detect. */
   1473      struct fac_hwcaps_map {
   1474         UInt installed;
   1475         UInt facility_bit;
   1476         UInt hwcaps_bit;
   1477         const HChar name[6];   // may need adjustment for new facility names
   1478      } fac_hwcaps[] = {
   1479         { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
   1480         { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
   1481         { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
   1482         { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
   1483         { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
   1484         { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
   1485         { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
   1486         { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
   1487         { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
   1488         { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
   1489      };
   1490 
   1491      /* Set hwcaps according to the detected facilities */
   1492      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
   1493         vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
   1494         if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
   1495            fac_hwcaps[i].installed = True;
   1496            vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
   1497         }
   1498      }
   1499 
   1500      /* Build up a string showing the probed-for facilities */
   1501      HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
   1502                    (sizeof fac_hwcaps[0].name + 3) + //  %s %d
   1503                    7 + 1 + 4 + 2  // machine %4d
   1504                    + 1];  // \0
   1505      HChar *p = fac_str;
   1506      p += VG_(sprintf)(p, "machine %4d  ", model);
   1507      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
   1508         p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
   1509                           fac_hwcaps[i].installed);
   1510      }
   1511      *p++ = '\0';
   1512 
   1513      VG_(debugLog)(1, "machine", "%s\n", fac_str);
   1514      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1515 
   1516      VG_(machine_get_cache_info)(&vai);
   1517 
   1518      return True;
   1519    }
   1520 
   1521 #elif defined(VGA_arm)
   1522    {
   1523      /* Same instruction set detection algorithm as for ppc32. */
   1524      vki_sigset_t          saved_set, tmp_set;
   1525      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1526      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1527 
   1528      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
   1529      volatile Int archlevel;
   1530      Int r;
   1531 
   1532      /* This is a kludge.  Really we ought to back-convert saved_act
   1533         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1534         since that's a no-op on all ppc64 platforms so far supported,
   1535         it's not worth the typing effort.  At least include most basic
   1536         sanity check: */
   1537      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1538 
   1539      VG_(sigemptyset)(&tmp_set);
   1540      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1541      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1542 
   1543      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1544      vg_assert(r == 0);
   1545 
   1546      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1547      vg_assert(r == 0);
   1548      tmp_sigill_act = saved_sigill_act;
   1549 
   1550      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1551      tmp_sigfpe_act = saved_sigfpe_act;
   1552 
   1553      /* NODEFER: signal handler does not return (from the kernel's point of
   1554         view), hence if it is to successfully catch a signal more than once,
   1555         we need the NODEFER flag. */
   1556      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1557      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1558      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1559      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1560      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1561 
   1562      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1563      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1564      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1565      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1566      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1567 
   1568      /* VFP insns */
   1569      have_VFP = True;
   1570      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1571         have_VFP = False;
   1572      } else {
   1573         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
   1574      }
   1575      /* There are several generation of VFP extension but they differs very
   1576         little so for now we will not distinguish them. */
   1577      have_VFP2 = have_VFP;
   1578      have_VFP3 = have_VFP;
   1579 
   1580      /* NEON insns */
   1581      have_NEON = True;
   1582      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1583         have_NEON = False;
   1584      } else {
   1585         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
   1586      }
   1587 
   1588      /* ARM architecture level */
   1589      archlevel = 5; /* v5 will be base level */
   1590      if (archlevel < 7) {
   1591         archlevel = 7;
   1592         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1593            archlevel = 5;
   1594         } else {
   1595            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
   1596         }
   1597      }
   1598      if (archlevel < 6) {
   1599         archlevel = 6;
   1600         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1601            archlevel = 5;
   1602         } else {
   1603            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
   1604         }
   1605      }
   1606 
   1607      /* ARMv8 insns */
   1608      have_V8 = True;
   1609      if (archlevel == 7) {
   1610         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1611            have_V8 = False;
   1612         } else {
   1613            __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
   1614         }
   1615         if (have_V8 && have_NEON && have_VFP3) {
   1616            archlevel = 8;
   1617         }
   1618      }
   1619 
   1620      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1621      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
   1622      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1623      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1624      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1625 
   1626      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
   1627            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
   1628            (Int)have_NEON);
   1629 
   1630      VG_(machine_arm_archlevel) = archlevel;
   1631 
   1632      va = VexArchARM;
   1633      vai.endness = VexEndnessLE;
   1634 
   1635      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
   1636      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
   1637      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
   1638      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
   1639      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1640 
   1641      VG_(machine_get_cache_info)(&vai);
   1642 
   1643      return True;
   1644    }
   1645 
   1646 #elif defined(VGA_arm64)
   1647    {
   1648      va = VexArchARM64;
   1649      vai.endness = VexEndnessLE;
   1650 
   1651      /* So far there are no variants. */
   1652      vai.hwcaps = 0;
   1653 
   1654      VG_(machine_get_cache_info)(&vai);
   1655 
   1656      /* Check whether we need to use the fallback LLSC implementation.
   1657         If the check fails, give up. */
   1658      if (! VG_(parse_cpuinfo)())
   1659         return False;
   1660 
   1661      /* 0 denotes 'not set'.  The range of legitimate values here,
   1662         after being set that is, is 2 though 17 inclusive. */
   1663      vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
   1664      vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
   1665      ULong ctr_el0;
   1666      __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
   1667      vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
   1668      vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
   1669      VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
   1670                       "ctr_el0.iMinLine_szB = %d\n",
   1671                    1 << vai.arm64_dMinLine_lg2_szB,
   1672                    1 << vai.arm64_iMinLine_lg2_szB);
   1673      VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
   1674                    vai.arm64_requires_fallback_LLSC ? "yes" : "no");
   1675 
   1676      return True;
   1677    }
   1678 
   1679 #elif defined(VGA_mips32)
   1680    {
   1681      /* Define the position of F64 bit in FIR register. */
   1682 #    define FP64 22
   1683      va = VexArchMIPS32;
   1684      if (!VG_(parse_cpuinfo)())
   1685          return False;
   1686 
   1687 #    if defined(VKI_LITTLE_ENDIAN)
   1688      vai.endness = VexEndnessLE;
   1689 #    elif defined(VKI_BIG_ENDIAN)
   1690      vai.endness = VexEndnessBE;
   1691 #    else
   1692      vai.endness = VexEndness_INVALID;
   1693 #    endif
   1694 
   1695      /* Same instruction set detection algorithm as for ppc32/arm... */
   1696      vki_sigset_t          saved_set, tmp_set;
   1697      vki_sigaction_fromK_t saved_sigill_act;
   1698      vki_sigaction_toK_t   tmp_sigill_act;
   1699 
   1700      volatile Bool have_DSP, have_DSPr2;
   1701      Int r;
   1702 
   1703      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1704 
   1705      VG_(sigemptyset)(&tmp_set);
   1706      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1707 
   1708      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1709      vg_assert(r == 0);
   1710 
   1711      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1712      vg_assert(r == 0);
   1713      tmp_sigill_act = saved_sigill_act;
   1714 
   1715      /* NODEFER: signal handler does not return (from the kernel's point of
   1716         view), hence if it is to successfully catch a signal more than once,
   1717         we need the NODEFER flag. */
   1718      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1719      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1720      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1721      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1722      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1723 
   1724      if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
   1725         /* DSPr2 instructions. */
   1726         have_DSPr2 = True;
   1727         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1728            have_DSPr2 = False;
   1729         } else {
   1730            __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
   1731         }
   1732         if (have_DSPr2) {
   1733            /* We assume it's 74K, since it can run DSPr2. */
   1734            vai.hwcaps |= VEX_PRID_IMP_74K;
   1735         } else {
   1736            /* DSP instructions. */
   1737            have_DSP = True;
   1738            if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1739               have_DSP = False;
   1740            } else {
   1741               __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
   1742            }
   1743            if (have_DSP) {
   1744               /* We assume it's 34K, since it has support for DSP. */
   1745               vai.hwcaps |= VEX_PRID_IMP_34K;
   1746            }
   1747         }
   1748      }
   1749 
   1750 #    if defined(VGP_mips32_linux)
   1751      Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0);
   1752 #    else
   1753      Int fpmode = -1;
   1754 #    endif
   1755 
   1756      if (fpmode < 0) {
   1757         /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
   1758            we are using alternative way to determine FP mode */
   1759         ULong result = 0;
   1760 
   1761         if (!VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1762            __asm__ volatile (
   1763               ".set push\n\t"
   1764               ".set noreorder\n\t"
   1765               ".set oddspreg\n\t"
   1766               ".set hardfloat\n\t"
   1767               "lui $t0, 0x3FF0\n\t"
   1768               "ldc1 $f0, %0\n\t"
   1769               "mtc1 $t0, $f1\n\t"
   1770               "sdc1 $f0, %0\n\t"
   1771               ".set pop\n\t"
   1772               : "+m"(result)
   1773               :
   1774               : "t0", "$f0", "$f1", "memory");
   1775 
   1776            fpmode = (result != 0x3FF0000000000000ull);
   1777         }
   1778      }
   1779 
   1780      if (fpmode != 0)
   1781         vai.hwcaps |= VEX_MIPS_HOST_FR;
   1782 
   1783      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1784      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1785      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1786 
   1787      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1788      VG_(machine_get_cache_info)(&vai);
   1789 
   1790      return True;
   1791    }
   1792 
   1793 #elif defined(VGA_mips64)
   1794    {
   1795      va = VexArchMIPS64;
   1796      if (!VG_(parse_cpuinfo)())
   1797          return False;
   1798 
   1799 #    if defined(VKI_LITTLE_ENDIAN)
   1800      vai.endness = VexEndnessLE;
   1801 #    elif defined(VKI_BIG_ENDIAN)
   1802      vai.endness = VexEndnessBE;
   1803 #    else
   1804      vai.endness = VexEndness_INVALID;
   1805 #    endif
   1806 
   1807      vai.hwcaps |= VEX_MIPS_HOST_FR;
   1808 
   1809      VG_(machine_get_cache_info)(&vai);
   1810 
   1811      return True;
   1812    }
   1813 
   1814 #else
   1815 #  error "Unknown arch"
   1816 #endif
   1817 }
   1818 
   1819 /* Notify host cpu instruction cache line size. */
   1820 #if defined(VGA_ppc32)
   1821 void VG_(machine_ppc32_set_clszB)( Int szB )
   1822 {
   1823    vg_assert(hwcaps_done);
   1824 
   1825    /* Either the value must not have been set yet (zero) or we can
   1826       tolerate it being set to the same value multiple times, as the
   1827       stack scanning logic in m_main is a bit stupid. */
   1828    vg_assert(vai.ppc_icache_line_szB == 0
   1829              || vai.ppc_icache_line_szB == szB);
   1830 
   1831    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
   1832    vai.ppc_icache_line_szB = szB;
   1833 }
   1834 #endif
   1835 
   1836 
   1837 /* Notify host cpu instruction cache line size. */
   1838 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
   1839 void VG_(machine_ppc64_set_clszB)( Int szB )
   1840 {
   1841    vg_assert(hwcaps_done);
   1842 
   1843    /* Either the value must not have been set yet (zero) or we can
   1844       tolerate it being set to the same value multiple times, as the
   1845       stack scanning logic in m_main is a bit stupid. */
   1846    vg_assert(vai.ppc_icache_line_szB == 0
   1847              || vai.ppc_icache_line_szB == szB);
   1848 
   1849    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
   1850    vai.ppc_icache_line_szB = szB;
   1851 }
   1852 #endif
   1853 
   1854 
   1855 /* Notify host's ability to handle NEON instructions. */
   1856 #if defined(VGA_arm)
   1857 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
   1858 {
   1859    vg_assert(hwcaps_done);
   1860    /* There's nothing else we can sanity check. */
   1861 
   1862    if (has_neon) {
   1863       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1864    } else {
   1865       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
   1866    }
   1867 }
   1868 #endif
   1869 
   1870 
   1871 /* Fetch host cpu info, once established. */
   1872 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
   1873                                    /*OUT*/VexArchInfo* pVai )
   1874 {
   1875    vg_assert(hwcaps_done);
   1876    if (pVa)  *pVa  = va;
   1877    if (pVai) *pVai = vai;
   1878 }
   1879 
   1880 
   1881 /* Returns the size of the largest guest register that we will
   1882    simulate in this run.  This depends on both the guest architecture
   1883    and on the specific capabilities we are simulating for that guest
   1884    (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
   1885    or 32.  General rule: if in doubt, return a value larger than
   1886    reality.
   1887 
   1888    This information is needed by Cachegrind and Callgrind to decide
   1889    what the minimum cache line size they are prepared to simulate is.
   1890    Basically require that the minimum cache line size is at least as
   1891    large as the largest register that might get transferred to/from
   1892    memory, so as to guarantee that any such transaction can straddle
   1893    at most 2 cache lines.
   1894 */
   1895 Int VG_(machine_get_size_of_largest_guest_register) ( void )
   1896 {
   1897    vg_assert(hwcaps_done);
   1898    /* Once hwcaps_done is True, we can fish around inside va/vai to
   1899       find the information we need. */
   1900 
   1901 #  if defined(VGA_x86)
   1902    vg_assert(va == VexArchX86);
   1903    /* We don't support AVX, so 32 is out.  At the other end, even if
   1904       we don't support any SSE, the X87 can generate 10 byte
   1905       transfers, so let's say 16 to be on the safe side.  Hence the
   1906       answer is always 16. */
   1907    return 16;
   1908 
   1909 #  elif defined(VGA_amd64)
   1910    /* if AVX then 32 else 16 */
   1911    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
   1912 
   1913 #  elif defined(VGA_ppc32)
   1914    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1915    if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
   1916    if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
   1917    if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
   1918    return 8;
   1919 
   1920 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
   1921    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1922    if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
   1923    if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
   1924    if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
   1925    return 8;
   1926 
   1927 #  elif defined(VGA_s390x)
   1928    return 8;
   1929 
   1930 #  elif defined(VGA_arm)
   1931    /* Really it depends whether or not we have NEON, but let's just
   1932       assume we always do. */
   1933    return 16;
   1934 
   1935 #  elif defined(VGA_arm64)
   1936    /* ARM64 always has Neon, AFAICS. */
   1937    return 16;
   1938 
   1939 #  elif defined(VGA_mips32)
   1940    /* The guest state implies 4, but that can't really be true, can
   1941       it? */
   1942    return 8;
   1943 
   1944 #  elif defined(VGA_mips64)
   1945    return 8;
   1946 
   1947 #  else
   1948 #    error "Unknown arch"
   1949 #  endif
   1950 }
   1951 
   1952 
   1953 // Given a pointer to a function as obtained by "& functionname" in C,
   1954 // produce a pointer to the actual entry point for the function.
   1955 void* VG_(fnptr_to_fnentry)( void* f )
   1956 {
   1957 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
   1958       || defined(VGP_arm_linux) || defined(VGO_darwin)          \
   1959       || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
   1960       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
   1961       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
   1962       || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
   1963    return f;
   1964 #  elif defined(VGP_ppc64be_linux)
   1965    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
   1966       3-word function descriptor, of which the first word is the entry
   1967       address. */
   1968    UWord* descr = (UWord*)f;
   1969    return (void*)(descr[0]);
   1970 #  else
   1971 #    error "Unknown platform"
   1972 #  endif
   1973 }
   1974 
   1975 /*--------------------------------------------------------------------*/
   1976 /*--- end                                                          ---*/
   1977 /*--------------------------------------------------------------------*/
   1978