Home | History | Annotate | Download | only in coregrind
      1 /*--------------------------------------------------------------------*/
      2 /*--- Machine-related stuff.                           m_machine.c ---*/
      3 /*--------------------------------------------------------------------*/
      4 
      5 /*
      6    This file is part of Valgrind, a dynamic binary instrumentation
      7    framework.
      8 
      9    Copyright (C) 2000-2015 Julian Seward
     10       jseward (at) acm.org
     11 
     12    This program is free software; you can redistribute it and/or
     13    modify it under the terms of the GNU General Public License as
     14    published by the Free Software Foundation; either version 2 of the
     15    License, or (at your option) any later version.
     16 
     17    This program is distributed in the hope that it will be useful, but
     18    WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    General Public License for more details.
     21 
     22    You should have received a copy of the GNU General Public License
     23    along with this program; if not, write to the Free Software
     24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     25    02111-1307, USA.
     26 
     27    The GNU General Public License is contained in the file COPYING.
     28 */
     29 
     30 #include "pub_core_basics.h"
     31 #include "pub_core_vki.h"
     32 #include "pub_core_threadstate.h"
     33 #include "pub_core_libcassert.h"
     34 #include "pub_core_libcbase.h"
     35 #include "pub_core_libcfile.h"
     36 #include "pub_core_libcprint.h"
     37 #include "pub_core_mallocfree.h"
     38 #include "pub_core_machine.h"
     39 #include "pub_core_cpuid.h"
     40 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
     41 #include "pub_core_debuglog.h"
     42 
     43 
     44 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
     45 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
     46 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
     47 
     48 Addr VG_(get_IP) ( ThreadId tid ) {
     49    return INSTR_PTR( VG_(threads)[tid].arch );
     50 }
     51 Addr VG_(get_SP) ( ThreadId tid ) {
     52    return STACK_PTR( VG_(threads)[tid].arch );
     53 }
     54 Addr VG_(get_FP) ( ThreadId tid ) {
     55    return FRAME_PTR( VG_(threads)[tid].arch );
     56 }
     57 
     58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
     59    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
     60 }
     61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
     62    STACK_PTR( VG_(threads)[tid].arch ) = sp;
     63 }
     64 
     65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
     66                                 ThreadId tid )
     67 {
     68 #  if defined(VGA_x86)
     69    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
     70    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
     71    regs->misc.X86.r_ebp
     72       = VG_(threads)[tid].arch.vex.guest_EBP;
     73 #  elif defined(VGA_amd64)
     74    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
     75    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
     76    regs->misc.AMD64.r_rbp
     77       = VG_(threads)[tid].arch.vex.guest_RBP;
     78 #  elif defined(VGA_ppc32)
     79    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
     80    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
     81    regs->misc.PPC32.r_lr
     82       = VG_(threads)[tid].arch.vex.guest_LR;
     83 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
     84    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
     85    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
     86    regs->misc.PPC64.r_lr
     87       = VG_(threads)[tid].arch.vex.guest_LR;
     88 #  elif defined(VGA_arm)
     89    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
     90    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
     91    regs->misc.ARM.r14
     92       = VG_(threads)[tid].arch.vex.guest_R14;
     93    regs->misc.ARM.r12
     94       = VG_(threads)[tid].arch.vex.guest_R12;
     95    regs->misc.ARM.r11
     96       = VG_(threads)[tid].arch.vex.guest_R11;
     97    regs->misc.ARM.r7
     98       = VG_(threads)[tid].arch.vex.guest_R7;
     99 #  elif defined(VGA_arm64)
    100    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    101    regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
    102    regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
    103    regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
    104 #  elif defined(VGA_s390x)
    105    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
    106    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
    107    regs->misc.S390X.r_fp
    108       = VG_(threads)[tid].arch.vex.guest_FP;
    109    regs->misc.S390X.r_lr
    110       = VG_(threads)[tid].arch.vex.guest_LR;
    111 #  elif defined(VGA_mips32)
    112    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    113    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
    114    regs->misc.MIPS32.r30
    115       = VG_(threads)[tid].arch.vex.guest_r30;
    116    regs->misc.MIPS32.r31
    117       = VG_(threads)[tid].arch.vex.guest_r31;
    118    regs->misc.MIPS32.r28
    119       = VG_(threads)[tid].arch.vex.guest_r28;
    120 #  elif defined(VGA_mips64)
    121    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    122    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
    123    regs->misc.MIPS64.r30
    124       = VG_(threads)[tid].arch.vex.guest_r30;
    125    regs->misc.MIPS64.r31
    126       = VG_(threads)[tid].arch.vex.guest_r31;
    127    regs->misc.MIPS64.r28
    128       = VG_(threads)[tid].arch.vex.guest_r28;
    129 #  elif defined(VGA_tilegx)
    130    regs->r_pc = VG_(threads)[tid].arch.vex.guest_pc;
    131    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r54;
    132    regs->misc.TILEGX.r52
    133       = VG_(threads)[tid].arch.vex.guest_r52;
    134    regs->misc.TILEGX.r55
    135       = VG_(threads)[tid].arch.vex.guest_r55;
    136 #  else
    137 #    error "Unknown arch"
    138 #  endif
    139 }
    140 
    141 void
    142 VG_(get_shadow_regs_area) ( ThreadId tid,
    143                             /*DST*/UChar* dst,
    144                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
    145 {
    146    void*        src;
    147    ThreadState* tst;
    148    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    149    vg_assert(VG_(is_valid_tid)(tid));
    150    // Bounds check
    151    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    152    vg_assert(offset + size <= sizeof(VexGuestArchState));
    153    // Copy
    154    tst = & VG_(threads)[tid];
    155    src = NULL;
    156    switch (shadowNo) {
    157       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    158       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    159       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    160    }
    161    vg_assert(src != NULL);
    162    VG_(memcpy)( dst, src, size);
    163 }
    164 
    165 void
    166 VG_(set_shadow_regs_area) ( ThreadId tid,
    167                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
    168                             /*SRC*/const UChar* src )
    169 {
    170    void*        dst;
    171    ThreadState* tst;
    172    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    173    vg_assert(VG_(is_valid_tid)(tid));
    174    // Bounds check
    175    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    176    vg_assert(offset + size <= sizeof(VexGuestArchState));
    177    // Copy
    178    tst = & VG_(threads)[tid];
    179    dst = NULL;
    180    switch (shadowNo) {
    181       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    182       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    183       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    184    }
    185    vg_assert(dst != NULL);
    186    VG_(memcpy)( dst, src, size);
    187 }
    188 
    189 
    190 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
    191                                                         const HChar*, Addr))
    192 {
    193    VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
    194    VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
    195 #if defined(VGA_x86)
    196    (*f)(tid, "EAX", vex->guest_EAX);
    197    (*f)(tid, "ECX", vex->guest_ECX);
    198    (*f)(tid, "EDX", vex->guest_EDX);
    199    (*f)(tid, "EBX", vex->guest_EBX);
    200    (*f)(tid, "ESI", vex->guest_ESI);
    201    (*f)(tid, "EDI", vex->guest_EDI);
    202    (*f)(tid, "ESP", vex->guest_ESP);
    203    (*f)(tid, "EBP", vex->guest_EBP);
    204 #elif defined(VGA_amd64)
    205    (*f)(tid, "RAX", vex->guest_RAX);
    206    (*f)(tid, "RCX", vex->guest_RCX);
    207    (*f)(tid, "RDX", vex->guest_RDX);
    208    (*f)(tid, "RBX", vex->guest_RBX);
    209    (*f)(tid, "RSI", vex->guest_RSI);
    210    (*f)(tid, "RDI", vex->guest_RDI);
    211    (*f)(tid, "RSP", vex->guest_RSP);
    212    (*f)(tid, "RBP", vex->guest_RBP);
    213    (*f)(tid, "R8" , vex->guest_R8 );
    214    (*f)(tid, "R9" , vex->guest_R9 );
    215    (*f)(tid, "R10", vex->guest_R10);
    216    (*f)(tid, "R11", vex->guest_R11);
    217    (*f)(tid, "R12", vex->guest_R12);
    218    (*f)(tid, "R13", vex->guest_R13);
    219    (*f)(tid, "R14", vex->guest_R14);
    220    (*f)(tid, "R15", vex->guest_R15);
    221 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
    222    (*f)(tid, "GPR0" , vex->guest_GPR0 );
    223    (*f)(tid, "GPR1" , vex->guest_GPR1 );
    224    (*f)(tid, "GPR2" , vex->guest_GPR2 );
    225    (*f)(tid, "GPR3" , vex->guest_GPR3 );
    226    (*f)(tid, "GPR4" , vex->guest_GPR4 );
    227    (*f)(tid, "GPR5" , vex->guest_GPR5 );
    228    (*f)(tid, "GPR6" , vex->guest_GPR6 );
    229    (*f)(tid, "GPR7" , vex->guest_GPR7 );
    230    (*f)(tid, "GPR8" , vex->guest_GPR8 );
    231    (*f)(tid, "GPR9" , vex->guest_GPR9 );
    232    (*f)(tid, "GPR10", vex->guest_GPR10);
    233    (*f)(tid, "GPR11", vex->guest_GPR11);
    234    (*f)(tid, "GPR12", vex->guest_GPR12);
    235    (*f)(tid, "GPR13", vex->guest_GPR13);
    236    (*f)(tid, "GPR14", vex->guest_GPR14);
    237    (*f)(tid, "GPR15", vex->guest_GPR15);
    238    (*f)(tid, "GPR16", vex->guest_GPR16);
    239    (*f)(tid, "GPR17", vex->guest_GPR17);
    240    (*f)(tid, "GPR18", vex->guest_GPR18);
    241    (*f)(tid, "GPR19", vex->guest_GPR19);
    242    (*f)(tid, "GPR20", vex->guest_GPR20);
    243    (*f)(tid, "GPR21", vex->guest_GPR21);
    244    (*f)(tid, "GPR22", vex->guest_GPR22);
    245    (*f)(tid, "GPR23", vex->guest_GPR23);
    246    (*f)(tid, "GPR24", vex->guest_GPR24);
    247    (*f)(tid, "GPR25", vex->guest_GPR25);
    248    (*f)(tid, "GPR26", vex->guest_GPR26);
    249    (*f)(tid, "GPR27", vex->guest_GPR27);
    250    (*f)(tid, "GPR28", vex->guest_GPR28);
    251    (*f)(tid, "GPR29", vex->guest_GPR29);
    252    (*f)(tid, "GPR30", vex->guest_GPR30);
    253    (*f)(tid, "GPR31", vex->guest_GPR31);
    254    (*f)(tid, "CTR"  , vex->guest_CTR  );
    255    (*f)(tid, "LR"   , vex->guest_LR   );
    256 #elif defined(VGA_arm)
    257    (*f)(tid, "R0" , vex->guest_R0 );
    258    (*f)(tid, "R1" , vex->guest_R1 );
    259    (*f)(tid, "R2" , vex->guest_R2 );
    260    (*f)(tid, "R3" , vex->guest_R3 );
    261    (*f)(tid, "R4" , vex->guest_R4 );
    262    (*f)(tid, "R5" , vex->guest_R5 );
    263    (*f)(tid, "R6" , vex->guest_R6 );
    264    (*f)(tid, "R8" , vex->guest_R8 );
    265    (*f)(tid, "R9" , vex->guest_R9 );
    266    (*f)(tid, "R10", vex->guest_R10);
    267    (*f)(tid, "R11", vex->guest_R11);
    268    (*f)(tid, "R12", vex->guest_R12);
    269    (*f)(tid, "R13", vex->guest_R13);
    270    (*f)(tid, "R14", vex->guest_R14);
    271 #elif defined(VGA_s390x)
    272    (*f)(tid, "r0" , vex->guest_r0 );
    273    (*f)(tid, "r1" , vex->guest_r1 );
    274    (*f)(tid, "r2" , vex->guest_r2 );
    275    (*f)(tid, "r3" , vex->guest_r3 );
    276    (*f)(tid, "r4" , vex->guest_r4 );
    277    (*f)(tid, "r5" , vex->guest_r5 );
    278    (*f)(tid, "r6" , vex->guest_r6 );
    279    (*f)(tid, "r7" , vex->guest_r7 );
    280    (*f)(tid, "r8" , vex->guest_r8 );
    281    (*f)(tid, "r9" , vex->guest_r9 );
    282    (*f)(tid, "r10", vex->guest_r10);
    283    (*f)(tid, "r11", vex->guest_r11);
    284    (*f)(tid, "r12", vex->guest_r12);
    285    (*f)(tid, "r13", vex->guest_r13);
    286    (*f)(tid, "r14", vex->guest_r14);
    287    (*f)(tid, "r15", vex->guest_r15);
    288 #elif defined(VGA_mips32) || defined(VGA_mips64)
    289    (*f)(tid, "r0" , vex->guest_r0 );
    290    (*f)(tid, "r1" , vex->guest_r1 );
    291    (*f)(tid, "r2" , vex->guest_r2 );
    292    (*f)(tid, "r3" , vex->guest_r3 );
    293    (*f)(tid, "r4" , vex->guest_r4 );
    294    (*f)(tid, "r5" , vex->guest_r5 );
    295    (*f)(tid, "r6" , vex->guest_r6 );
    296    (*f)(tid, "r7" , vex->guest_r7 );
    297    (*f)(tid, "r8" , vex->guest_r8 );
    298    (*f)(tid, "r9" , vex->guest_r9 );
    299    (*f)(tid, "r10", vex->guest_r10);
    300    (*f)(tid, "r11", vex->guest_r11);
    301    (*f)(tid, "r12", vex->guest_r12);
    302    (*f)(tid, "r13", vex->guest_r13);
    303    (*f)(tid, "r14", vex->guest_r14);
    304    (*f)(tid, "r15", vex->guest_r15);
    305    (*f)(tid, "r16", vex->guest_r16);
    306    (*f)(tid, "r17", vex->guest_r17);
    307    (*f)(tid, "r18", vex->guest_r18);
    308    (*f)(tid, "r19", vex->guest_r19);
    309    (*f)(tid, "r20", vex->guest_r20);
    310    (*f)(tid, "r21", vex->guest_r21);
    311    (*f)(tid, "r22", vex->guest_r22);
    312    (*f)(tid, "r23", vex->guest_r23);
    313    (*f)(tid, "r24", vex->guest_r24);
    314    (*f)(tid, "r25", vex->guest_r25);
    315    (*f)(tid, "r26", vex->guest_r26);
    316    (*f)(tid, "r27", vex->guest_r27);
    317    (*f)(tid, "r28", vex->guest_r28);
    318    (*f)(tid, "r29", vex->guest_r29);
    319    (*f)(tid, "r30", vex->guest_r30);
    320    (*f)(tid, "r31", vex->guest_r31);
    321 #elif defined(VGA_arm64)
    322    (*f)(tid, "x0" , vex->guest_X0 );
    323    (*f)(tid, "x1" , vex->guest_X1 );
    324    (*f)(tid, "x2" , vex->guest_X2 );
    325    (*f)(tid, "x3" , vex->guest_X3 );
    326    (*f)(tid, "x4" , vex->guest_X4 );
    327    (*f)(tid, "x5" , vex->guest_X5 );
    328    (*f)(tid, "x6" , vex->guest_X6 );
    329    (*f)(tid, "x7" , vex->guest_X7 );
    330    (*f)(tid, "x8" , vex->guest_X8 );
    331    (*f)(tid, "x9" , vex->guest_X9 );
    332    (*f)(tid, "x10", vex->guest_X10);
    333    (*f)(tid, "x11", vex->guest_X11);
    334    (*f)(tid, "x12", vex->guest_X12);
    335    (*f)(tid, "x13", vex->guest_X13);
    336    (*f)(tid, "x14", vex->guest_X14);
    337    (*f)(tid, "x15", vex->guest_X15);
    338    (*f)(tid, "x16", vex->guest_X16);
    339    (*f)(tid, "x17", vex->guest_X17);
    340    (*f)(tid, "x18", vex->guest_X18);
    341    (*f)(tid, "x19", vex->guest_X19);
    342    (*f)(tid, "x20", vex->guest_X20);
    343    (*f)(tid, "x21", vex->guest_X21);
    344    (*f)(tid, "x22", vex->guest_X22);
    345    (*f)(tid, "x23", vex->guest_X23);
    346    (*f)(tid, "x24", vex->guest_X24);
    347    (*f)(tid, "x25", vex->guest_X25);
    348    (*f)(tid, "x26", vex->guest_X26);
    349    (*f)(tid, "x27", vex->guest_X27);
    350    (*f)(tid, "x28", vex->guest_X28);
    351    (*f)(tid, "x29", vex->guest_X29);
    352    (*f)(tid, "x30", vex->guest_X30);
    353 #elif defined(VGA_tilegx)
    354    (*f)(tid, "r0",  vex->guest_r0 );
    355    (*f)(tid, "r1",  vex->guest_r1 );
    356    (*f)(tid, "r2",  vex->guest_r2 );
    357    (*f)(tid, "r3",  vex->guest_r3 );
    358    (*f)(tid, "r4",  vex->guest_r4 );
    359    (*f)(tid, "r5",  vex->guest_r5 );
    360    (*f)(tid, "r6",  vex->guest_r6 );
    361    (*f)(tid, "r7",  vex->guest_r7 );
    362    (*f)(tid, "r8",  vex->guest_r8 );
    363    (*f)(tid, "r9",  vex->guest_r9 );
    364    (*f)(tid, "r10", vex->guest_r10);
    365    (*f)(tid, "r11", vex->guest_r11);
    366    (*f)(tid, "r12", vex->guest_r12);
    367    (*f)(tid, "r13", vex->guest_r13);
    368    (*f)(tid, "r14", vex->guest_r14);
    369    (*f)(tid, "r15", vex->guest_r15);
    370    (*f)(tid, "r16", vex->guest_r16);
    371    (*f)(tid, "r17", vex->guest_r17);
    372    (*f)(tid, "r18", vex->guest_r18);
    373    (*f)(tid, "r19", vex->guest_r19);
    374    (*f)(tid, "r20", vex->guest_r20);
    375    (*f)(tid, "r21", vex->guest_r21);
    376    (*f)(tid, "r22", vex->guest_r22);
    377    (*f)(tid, "r23", vex->guest_r23);
    378    (*f)(tid, "r24", vex->guest_r24);
    379    (*f)(tid, "r25", vex->guest_r25);
    380    (*f)(tid, "r26", vex->guest_r26);
    381    (*f)(tid, "r27", vex->guest_r27);
    382    (*f)(tid, "r28", vex->guest_r28);
    383    (*f)(tid, "r29", vex->guest_r29);
    384    (*f)(tid, "r30", vex->guest_r30);
    385    (*f)(tid, "r31", vex->guest_r31);
    386    (*f)(tid, "r32", vex->guest_r32);
    387    (*f)(tid, "r33", vex->guest_r33);
    388    (*f)(tid, "r34", vex->guest_r34);
    389    (*f)(tid, "r35", vex->guest_r35);
    390    (*f)(tid, "r36", vex->guest_r36);
    391    (*f)(tid, "r37", vex->guest_r37);
    392    (*f)(tid, "r38", vex->guest_r38);
    393    (*f)(tid, "r39", vex->guest_r39);
    394    (*f)(tid, "r40", vex->guest_r40);
    395    (*f)(tid, "r41", vex->guest_r41);
    396    (*f)(tid, "r42", vex->guest_r42);
    397    (*f)(tid, "r43", vex->guest_r43);
    398    (*f)(tid, "r44", vex->guest_r44);
    399    (*f)(tid, "r45", vex->guest_r45);
    400    (*f)(tid, "r46", vex->guest_r46);
    401    (*f)(tid, "r47", vex->guest_r47);
    402    (*f)(tid, "r48", vex->guest_r48);
    403    (*f)(tid, "r49", vex->guest_r49);
    404    (*f)(tid, "r50", vex->guest_r50);
    405    (*f)(tid, "r51", vex->guest_r51);
    406    (*f)(tid, "r52", vex->guest_r52);
    407    (*f)(tid, "r53", vex->guest_r53);
    408    (*f)(tid, "r54", vex->guest_r54);
    409    (*f)(tid, "r55", vex->guest_r55);
    410 #else
    411 #  error Unknown arch
    412 #endif
    413 }
    414 
    415 
    416 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
    417 {
    418    ThreadId tid;
    419 
    420    for (tid = 1; tid < VG_N_THREADS; tid++) {
    421       if (VG_(is_valid_tid)(tid)
    422           || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
    423          // live thread or thread instructed to die by another thread that
    424          // called exit.
    425          apply_to_GPs_of_tid(tid, f);
    426       }
    427    }
    428 }
    429 
    430 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
    431 {
    432    *tid = (ThreadId)(-1);
    433 }
    434 
    435 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
    436                             /*OUT*/Addr* stack_min,
    437                             /*OUT*/Addr* stack_max)
    438 {
    439    ThreadId i;
    440    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
    441       if (i == VG_INVALID_THREADID)
    442          continue;
    443       if (VG_(threads)[i].status != VgTs_Empty) {
    444          *tid       = i;
    445          *stack_min = VG_(get_SP)(i);
    446          *stack_max = VG_(threads)[i].client_stack_highest_byte;
    447          return True;
    448       }
    449    }
    450    return False;
    451 }
    452 
    453 Addr VG_(thread_get_stack_max)(ThreadId tid)
    454 {
    455    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    456    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    457    return VG_(threads)[tid].client_stack_highest_byte;
    458 }
    459 
    460 SizeT VG_(thread_get_stack_size)(ThreadId tid)
    461 {
    462    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    463    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    464    return VG_(threads)[tid].client_stack_szB;
    465 }
    466 
    467 Addr VG_(thread_get_altstack_min)(ThreadId tid)
    468 {
    469    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    470    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    471    return (Addr)VG_(threads)[tid].altstack.ss_sp;
    472 }
    473 
    474 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
    475 {
    476    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    477    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    478    return VG_(threads)[tid].altstack.ss_size;
    479 }
    480 
    481 //-------------------------------------------------------------
    482 /* Details about the capabilities of the underlying (host) CPU.  These
    483    details are acquired by (1) enquiring with the CPU at startup, or
    484    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
    485    line size).  It's a bit nasty in the sense that there's no obvious
    486    way to stop uses of some of this info before it's ready to go.
    487    See pub_core_machine.h for more information about that.
    488 
    489    VG_(machine_get_hwcaps) may use signals (although it attempts to
    490    leave signal state unchanged) and therefore should only be
    491    called before m_main sets up the client's signal state.
    492 */
    493 
    494 /* --------- State --------- */
    495 static Bool hwcaps_done = False;
    496 
    497 /* --- all archs --- */
    498 static VexArch     va = VexArch_INVALID;
    499 static VexArchInfo vai;
    500 
    501 #if defined(VGA_x86)
    502 UInt VG_(machine_x86_have_mxcsr) = 0;
    503 #endif
    504 #if defined(VGA_ppc32)
    505 UInt VG_(machine_ppc32_has_FP)  = 0;
    506 UInt VG_(machine_ppc32_has_VMX) = 0;
    507 #endif
    508 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
    509 ULong VG_(machine_ppc64_has_VMX) = 0;
    510 #endif
    511 #if defined(VGA_arm)
    512 Int VG_(machine_arm_archlevel) = 4;
    513 #endif
    514 
    515 
    516 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
    517    testing, so we need a VG_MINIMAL_JMP_BUF. */
    518 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
    519     || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
    520 #include "pub_core_libcsetjmp.h"
    521 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
    522 static void handler_unsup_insn ( Int x ) {
    523    VG_MINIMAL_LONGJMP(env_unsup_insn);
    524 }
    525 #endif
    526 
    527 
    528 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
    529  * handlers are installed.  Determines the sizes affected by dcbz
    530  * and dcbzl instructions and updates the given VexArchInfo structure
    531  * accordingly.
    532  *
    533  * Not very defensive: assumes that as long as the dcbz/dcbzl
    534  * instructions don't raise a SIGILL, that they will zero an aligned,
    535  * contiguous block of memory of a sensible size. */
    536 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
    537 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
    538 {
    539    Int dcbz_szB = 0;
    540    Int dcbzl_szB;
    541 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
    542    char test_block[4*MAX_DCBZL_SZB];
    543    char *aligned = test_block;
    544    Int i;
    545 
    546    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
    547    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
    548    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
    549 
    550    /* dcbz often clears 32B, although sometimes whatever the native cache
    551     * block size is */
    552    VG_(memset)(test_block, 0xff, sizeof(test_block));
    553    __asm__ __volatile__("dcbz 0,%0"
    554                         : /*out*/
    555                         : "r" (aligned) /*in*/
    556                         : "memory" /*clobber*/);
    557    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    558       if (!test_block[i])
    559          ++dcbz_szB;
    560    }
    561    vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
    562 
    563    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
    564    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    565       dcbzl_szB = 0; /* indicates unsupported */
    566    }
    567    else {
    568       VG_(memset)(test_block, 0xff, sizeof(test_block));
    569       /* some older assemblers won't understand the dcbzl instruction
    570        * variant, so we directly emit the instruction ourselves */
    571       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
    572                            : /*out*/
    573                            : "r" (aligned) /*in*/
    574                            : "memory", "r9" /*clobber*/);
    575       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    576          if (!test_block[i])
    577             ++dcbzl_szB;
    578       }
    579       vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
    580    }
    581 
    582    arch_info->ppc_dcbz_szB  = dcbz_szB;
    583    arch_info->ppc_dcbzl_szB = dcbzl_szB;
    584 
    585    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
    586                  dcbz_szB, dcbzl_szB);
    587 #  undef MAX_DCBZL_SZB
    588 }
    589 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
    590 
    591 #ifdef VGA_s390x
    592 
    593 /* Read /proc/cpuinfo. Look for lines like these
    594 
    595    processor 0: version = FF,  identification = 0117C9,  machine = 2064
    596 
    597    and return the machine model. If the machine model could not be determined
    598    or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
    599 
    600 static UInt VG_(get_machine_model)(void)
    601 {
    602    static struct model_map {
    603       const HChar name[5];
    604       UInt  id;
    605    } model_map[] = {
    606       { "2064", VEX_S390X_MODEL_Z900 },
    607       { "2066", VEX_S390X_MODEL_Z800 },
    608       { "2084", VEX_S390X_MODEL_Z990 },
    609       { "2086", VEX_S390X_MODEL_Z890 },
    610       { "2094", VEX_S390X_MODEL_Z9_EC },
    611       { "2096", VEX_S390X_MODEL_Z9_BC },
    612       { "2097", VEX_S390X_MODEL_Z10_EC },
    613       { "2098", VEX_S390X_MODEL_Z10_BC },
    614       { "2817", VEX_S390X_MODEL_Z196 },
    615       { "2818", VEX_S390X_MODEL_Z114 },
    616       { "2827", VEX_S390X_MODEL_ZEC12 },
    617       { "2828", VEX_S390X_MODEL_ZBC12 },
    618       { "2964", VEX_S390X_MODEL_Z13 },
    619    };
    620 
    621    Int    model, n, fh;
    622    SysRes fd;
    623    SizeT  num_bytes, file_buf_size;
    624    HChar *p, *m, *model_name, *file_buf;
    625 
    626    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    627    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    628    if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
    629 
    630    fh  = sr_Res(fd);
    631 
    632    /* Determine the size of /proc/cpuinfo.
    633       Work around broken-ness in /proc file system implementation.
    634       fstat returns a zero size for /proc/cpuinfo although it is
    635       claimed to be a regular file. */
    636    num_bytes = 0;
    637    file_buf_size = 1000;
    638    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    639    while (42) {
    640       n = VG_(read)(fh, file_buf, file_buf_size);
    641       if (n < 0) break;
    642 
    643       num_bytes += n;
    644       if (n < file_buf_size) break;  /* reached EOF */
    645    }
    646 
    647    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    648 
    649    if (num_bytes > file_buf_size) {
    650       VG_(free)( file_buf );
    651       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    652       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    653       n = VG_(read)( fh, file_buf, num_bytes );
    654       if (n < 0) num_bytes = 0;
    655    }
    656 
    657    file_buf[num_bytes] = '\0';
    658    VG_(close)(fh);
    659 
    660    /* Parse file */
    661    model = VEX_S390X_MODEL_UNKNOWN;
    662    for (p = file_buf; *p; ++p) {
    663       /* Beginning of line */
    664      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
    665 
    666      m = VG_(strstr)( p, "machine" );
    667      if (m == NULL) continue;
    668 
    669      p = m + sizeof "machine" - 1;
    670      while ( VG_(isspace)( *p ) || *p == '=') {
    671        if (*p == '\n') goto next_line;
    672        ++p;
    673      }
    674 
    675      model_name = p;
    676      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
    677        struct model_map *mm = model_map + n;
    678        SizeT len = VG_(strlen)( mm->name );
    679        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
    680             VG_(isspace)( model_name[len] )) {
    681          if (mm->id < model) model = mm->id;
    682          p = model_name + len;
    683          break;
    684        }
    685      }
    686      /* Skip until end-of-line */
    687      while (*p != '\n')
    688        ++p;
    689    next_line: ;
    690    }
    691 
    692    VG_(free)( file_buf );
    693    VG_(debugLog)(1, "machine", "model = %s\n",
    694                  model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
    695                                                   : model_map[model].name);
    696    return model;
    697 }
    698 
    699 #endif /* VGA_s390x */
    700 
    701 #if defined(VGA_mips32) || defined(VGA_mips64)
    702 
    703 /* Read /proc/cpuinfo and return the machine model. */
    704 static UInt VG_(get_machine_model)(void)
    705 {
    706    const char *search_MIPS_str = "MIPS";
    707    const char *search_Broadcom_str = "Broadcom";
    708    const char *search_Netlogic_str = "Netlogic";
    709    const char *search_Cavium_str= "Cavium";
    710    Int    n, fh;
    711    SysRes fd;
    712    SizeT  num_bytes, file_buf_size;
    713    HChar  *file_buf;
    714 
    715    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    716    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    717    if ( sr_isError(fd) ) return -1;
    718 
    719    fh  = sr_Res(fd);
    720 
    721    /* Determine the size of /proc/cpuinfo.
    722       Work around broken-ness in /proc file system implementation.
    723       fstat returns a zero size for /proc/cpuinfo although it is
    724       claimed to be a regular file. */
    725    num_bytes = 0;
    726    file_buf_size = 1000;
    727    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    728    while (42) {
    729       n = VG_(read)(fh, file_buf, file_buf_size);
    730       if (n < 0) break;
    731 
    732       num_bytes += n;
    733       if (n < file_buf_size) break;  /* reached EOF */
    734    }
    735 
    736    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    737 
    738    if (num_bytes > file_buf_size) {
    739       VG_(free)( file_buf );
    740       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    741       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    742       n = VG_(read)( fh, file_buf, num_bytes );
    743       if (n < 0) num_bytes = 0;
    744    }
    745 
    746    file_buf[num_bytes] = '\0';
    747    VG_(close)(fh);
    748 
    749    /* Parse file */
    750    if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
    751        return VEX_PRID_COMP_BROADCOM;
    752    if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
    753        return VEX_PRID_COMP_NETLOGIC;
    754    if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
    755        return VEX_PRID_COMP_CAVIUM;
    756    if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
    757        return VEX_PRID_COMP_MIPS;
    758 
    759    /* Did not find string in the proc file. */
    760    return -1;
    761 }
    762 
    763 #endif
    764 
    765 /* Determine what insn set and insn set variant the host has, and
    766    record it.  To be called once at system startup.  Returns False if
    767    this a CPU incapable of running Valgrind.
    768    Also determine information about the caches on this host. */
    769 
    770 Bool VG_(machine_get_hwcaps)( void )
    771 {
    772    vg_assert(hwcaps_done == False);
    773    hwcaps_done = True;
    774 
    775    // Whack default settings into vai, so that we only need to fill in
    776    // any interesting bits.
    777    LibVEX_default_VexArchInfo(&vai);
    778 
    779 #if defined(VGA_x86)
    780    { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
    781      UInt eax, ebx, ecx, edx, max_extended;
    782      HChar vstr[13];
    783      vstr[0] = 0;
    784 
    785      if (!VG_(has_cpuid)())
    786         /* we can't do cpuid at all.  Give up. */
    787         return False;
    788 
    789      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    790      if (eax < 1)
    791         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    792         return False;
    793 
    794      /* Get processor ID string, and max basic/extended index
    795         values. */
    796      VG_(memcpy)(&vstr[0], &ebx, 4);
    797      VG_(memcpy)(&vstr[4], &edx, 4);
    798      VG_(memcpy)(&vstr[8], &ecx, 4);
    799      vstr[12] = 0;
    800 
    801      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    802      max_extended = eax;
    803 
    804      /* get capabilities bits into edx */
    805      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    806 
    807      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
    808      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
    809      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
    810 
    811      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    812         must simply give up.  But all CPUs since Pentium-I have it, so
    813         that doesn't seem like much of a restriction. */
    814      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    815      if (!have_cx8)
    816         return False;
    817 
    818      /* Figure out if this is an AMD that can do MMXEXT. */
    819      have_mmxext = False;
    820      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
    821          && max_extended >= 0x80000001) {
    822         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    823         /* Some older AMD processors support a sse1 subset (Integer SSE). */
    824         have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
    825      }
    826 
    827      /* Figure out if this is an AMD or Intel that can do LZCNT. */
    828      have_lzcnt = False;
    829      if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
    830           || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
    831          && max_extended >= 0x80000001) {
    832         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    833         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    834      }
    835 
    836      /* Intel processors don't define the mmxext extension, but since it
    837         is just a sse1 subset always define it when we have sse1. */
    838      if (have_sse1)
    839         have_mmxext = True;
    840 
    841      va = VexArchX86;
    842      vai.endness = VexEndnessLE;
    843 
    844      if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
    845         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    846         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    847         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
    848         vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
    849         if (have_lzcnt)
    850            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
    851         VG_(machine_x86_have_mxcsr) = 1;
    852      } else if (have_sse2 && have_sse1 && have_mmxext) {
    853         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    854         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    855         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
    856         if (have_lzcnt)
    857            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
    858         VG_(machine_x86_have_mxcsr) = 1;
    859      } else if (have_sse1 && have_mmxext) {
    860         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    861         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    862         VG_(machine_x86_have_mxcsr) = 1;
    863      } else if (have_mmxext) {
    864         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
    865         VG_(machine_x86_have_mxcsr) = 0;
    866      } else {
    867        vai.hwcaps = 0; /*baseline - no sse at all*/
    868        VG_(machine_x86_have_mxcsr) = 0;
    869      }
    870 
    871      VG_(machine_get_cache_info)(&vai);
    872 
    873      return True;
    874    }
    875 
    876 #elif defined(VGA_amd64)
    877    { Bool have_sse3, have_cx8, have_cx16;
    878      Bool have_lzcnt, have_avx, have_bmi, have_avx2;
    879      Bool have_rdtscp;
    880      UInt eax, ebx, ecx, edx, max_basic, max_extended;
    881      ULong xgetbv_0 = 0;
    882      HChar vstr[13];
    883      vstr[0] = 0;
    884 
    885      if (!VG_(has_cpuid)())
    886         /* we can't do cpuid at all.  Give up. */
    887         return False;
    888 
    889      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    890      max_basic = eax;
    891      if (max_basic < 1)
    892         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    893         return False;
    894 
    895      /* Get processor ID string, and max basic/extended index
    896         values. */
    897      VG_(memcpy)(&vstr[0], &ebx, 4);
    898      VG_(memcpy)(&vstr[4], &edx, 4);
    899      VG_(memcpy)(&vstr[8], &ecx, 4);
    900      vstr[12] = 0;
    901 
    902      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    903      max_extended = eax;
    904 
    905      /* get capabilities bits into edx */
    906      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    907 
    908      // we assume that SSE1 and SSE2 are available by default
    909      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
    910      // ssse3   is ecx:9
    911      // sse41   is ecx:19
    912      // sse42   is ecx:20
    913 
    914      // xsave   is ecx:26
    915      // osxsave is ecx:27
    916      // avx     is ecx:28
    917      // fma     is ecx:12
    918      have_avx = False;
    919      /* have_fma = False; */
    920      if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
    921         /* Processor supports AVX instructions and XGETBV is enabled
    922            by OS and AVX instructions are enabled by the OS. */
    923         ULong w;
    924         __asm__ __volatile__("movq $0,%%rcx ; "
    925                              ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
    926                              "movq %%rax,%0"
    927                              :/*OUT*/"=r"(w) :/*IN*/
    928                              :/*TRASH*/"rdx","rcx","rax");
    929         xgetbv_0 = w;
    930         if ((xgetbv_0 & 7) == 7) {
    931            /* Only say we have AVX if the XSAVE-allowable
    932               bitfield-mask allows x87, SSE and AVX state.  We could
    933               actually run with a more restrictive XGETBV(0) value,
    934               but VEX's implementation of XSAVE and XRSTOR assumes
    935               that all 3 bits are enabled.
    936 
    937               Also, the VEX implementation of XSAVE/XRSTOR assumes that
    938               state component [2] (the YMM high halves) are located in
    939               the XSAVE image at offsets 576 .. 831.  So we have to
    940               check that here before declaring AVX to be supported. */
    941            UInt eax2, ebx2, ecx2, edx2;
    942            VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
    943            if (ebx2 == 576 && eax2 == 256) {
    944               have_avx = True;
    945            }
    946            /* have_fma = (ecx & (1<<12)) != 0; */
    947            /* have_fma: Probably correct, but gcc complains due to
    948               unusedness. */
    949         }
    950      }
    951 
    952      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    953         must simply give up.  But all CPUs since Pentium-I have it, so
    954         that doesn't seem like much of a restriction. */
    955      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    956      if (!have_cx8)
    957         return False;
    958 
    959      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
    960      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
    961 
    962      /* Figure out if this CPU can do LZCNT. */
    963      have_lzcnt = False;
    964      if (max_extended >= 0x80000001) {
    965         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    966         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    967      }
    968 
    969      /* Can we do RDTSCP? */
    970      have_rdtscp = False;
    971      if (max_extended >= 0x80000001) {
    972         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    973         have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
    974      }
    975 
    976      /* Check for BMI1 and AVX2.  If we have AVX1 (plus OS support). */
    977      have_bmi  = False;
    978      have_avx2 = False;
    979      if (have_avx && max_basic >= 7) {
    980         VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
    981         have_bmi  = (ebx & (1<<3)) != 0; /* True => have BMI1 */
    982         have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
    983      }
    984 
    985      va          = VexArchAMD64;
    986      vai.endness = VexEndnessLE;
    987      vai.hwcaps  = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
    988                  | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
    989                  | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
    990                  | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
    991                  | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
    992                  | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
    993                  | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
    994 
    995      VG_(machine_get_cache_info)(&vai);
    996 
    997      return True;
    998    }
    999 
   1000 #elif defined(VGA_ppc32)
   1001    {
   1002      /* Find out which subset of the ppc32 instruction set is supported by
   1003         verifying whether various ppc32 instructions generate a SIGILL
   1004         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
   1005         AT_PLATFORM entries in the ELF auxiliary table -- see also
   1006         the_iifii.client_auxv in m_main.c.
   1007       */
   1008      vki_sigset_t          saved_set, tmp_set;
   1009      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1010      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1011 
   1012      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
   1013      volatile Bool have_isa_2_07;
   1014      Int r;
   1015 
   1016      /* This is a kludge.  Really we ought to back-convert saved_act
   1017         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1018         since that's a no-op on all ppc32 platforms so far supported,
   1019         it's not worth the typing effort.  At least include most basic
   1020         sanity check: */
   1021      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1022 
   1023      VG_(sigemptyset)(&tmp_set);
   1024      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1025      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1026 
   1027      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1028      vg_assert(r == 0);
   1029 
   1030      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1031      vg_assert(r == 0);
   1032      tmp_sigill_act = saved_sigill_act;
   1033 
   1034      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1035      vg_assert(r == 0);
   1036      tmp_sigfpe_act = saved_sigfpe_act;
   1037 
   1038      /* NODEFER: signal handler does not return (from the kernel's point of
   1039         view), hence if it is to successfully catch a signal more than once,
   1040         we need the NODEFER flag. */
   1041      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1042      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1043      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1044      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1045      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1046      vg_assert(r == 0);
   1047 
   1048      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1049      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1050      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1051      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1052      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1053      vg_assert(r == 0);
   1054 
   1055      /* standard FP insns */
   1056      have_F = True;
   1057      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1058         have_F = False;
   1059      } else {
   1060         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
   1061      }
   1062 
   1063      /* Altivec insns */
   1064      have_V = True;
   1065      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1066         have_V = False;
   1067      } else {
   1068         /* Unfortunately some older assemblers don't speak Altivec (or
   1069            choose not to), so to be safe we directly emit the 32-bit
   1070            word corresponding to "vor 0,0,0".  This fixes a build
   1071            problem that happens on Debian 3.1 (ppc32), and probably
   1072            various other places. */
   1073         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
   1074      }
   1075 
   1076      /* General-Purpose optional (fsqrt, fsqrts) */
   1077      have_FX = True;
   1078      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1079         have_FX = False;
   1080      } else {
   1081         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
   1082      }
   1083 
   1084      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
   1085      have_GX = True;
   1086      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1087         have_GX = False;
   1088      } else {
   1089         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
   1090      }
   1091 
   1092      /* VSX support implies Power ISA 2.06 */
   1093      have_VX = True;
   1094      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1095         have_VX = False;
   1096      } else {
   1097         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
   1098      }
   1099 
   1100      /* Check for Decimal Floating Point (DFP) support. */
   1101      have_DFP = True;
   1102      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1103         have_DFP = False;
   1104      } else {
   1105         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
   1106      }
   1107 
   1108      /* Check for ISA 2.07 support. */
   1109      have_isa_2_07 = True;
   1110      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1111         have_isa_2_07 = False;
   1112      } else {
   1113         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
   1114      }
   1115 
   1116      /* determine dcbz/dcbzl sizes while we still have the signal
   1117       * handlers registered */
   1118      find_ppc_dcbz_sz(&vai);
   1119 
   1120      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1121      vg_assert(r == 0);
   1122      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
   1123      vg_assert(r == 0);
   1124      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1125      vg_assert(r == 0);
   1126      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
   1127                     (Int)have_F, (Int)have_V, (Int)have_FX,
   1128                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
   1129                     (Int)have_isa_2_07);
   1130      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
   1131      if (have_V && !have_F)
   1132         have_V = False;
   1133      if (have_FX && !have_F)
   1134         have_FX = False;
   1135      if (have_GX && !have_F)
   1136         have_GX = False;
   1137 
   1138      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
   1139      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
   1140 
   1141      va = VexArchPPC32;
   1142      vai.endness = VexEndnessBE;
   1143 
   1144      vai.hwcaps = 0;
   1145      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
   1146      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
   1147      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
   1148      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
   1149      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
   1150      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
   1151      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
   1152 
   1153      VG_(machine_get_cache_info)(&vai);
   1154 
   1155      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
   1156         called before we're ready to go. */
   1157      return True;
   1158    }
   1159 
   1160 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
   1161    {
   1162      /* Same instruction set detection algorithm as for ppc32. */
   1163      vki_sigset_t          saved_set, tmp_set;
   1164      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1165      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1166 
   1167      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
   1168      volatile Bool have_isa_2_07;
   1169      Int r;
   1170 
   1171      /* This is a kludge.  Really we ought to back-convert saved_act
   1172         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1173         since that's a no-op on all ppc64 platforms so far supported,
   1174         it's not worth the typing effort.  At least include most basic
   1175         sanity check: */
   1176      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1177 
   1178      VG_(sigemptyset)(&tmp_set);
   1179      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1180      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1181 
   1182      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1183      vg_assert(r == 0);
   1184 
   1185      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1186      vg_assert(r == 0);
   1187      tmp_sigill_act = saved_sigill_act;
   1188 
   1189      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1190      tmp_sigfpe_act = saved_sigfpe_act;
   1191 
   1192      /* NODEFER: signal handler does not return (from the kernel's point of
   1193         view), hence if it is to successfully catch a signal more than once,
   1194         we need the NODEFER flag. */
   1195      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1196      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1197      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1198      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1199      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1200 
   1201      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1202      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1203      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1204      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1205      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1206 
   1207      /* standard FP insns */
   1208      have_F = True;
   1209      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1210         have_F = False;
   1211      } else {
   1212         __asm__ __volatile__("fmr 0,0");
   1213      }
   1214 
   1215      /* Altivec insns */
   1216      have_V = True;
   1217      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1218         have_V = False;
   1219      } else {
   1220         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
   1221      }
   1222 
   1223      /* General-Purpose optional (fsqrt, fsqrts) */
   1224      have_FX = True;
   1225      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1226         have_FX = False;
   1227      } else {
   1228         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
   1229      }
   1230 
   1231      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
   1232      have_GX = True;
   1233      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1234         have_GX = False;
   1235      } else {
   1236         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
   1237      }
   1238 
   1239      /* VSX support implies Power ISA 2.06 */
   1240      have_VX = True;
   1241      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1242         have_VX = False;
   1243      } else {
   1244         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
   1245      }
   1246 
   1247      /* Check for Decimal Floating Point (DFP) support. */
   1248      have_DFP = True;
   1249      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1250         have_DFP = False;
   1251      } else {
   1252         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
   1253      }
   1254 
   1255      /* Check for ISA 2.07 support. */
   1256      have_isa_2_07 = True;
   1257      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1258         have_isa_2_07 = False;
   1259      } else {
   1260         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
   1261      }
   1262 
   1263      /* determine dcbz/dcbzl sizes while we still have the signal
   1264       * handlers registered */
   1265      find_ppc_dcbz_sz(&vai);
   1266 
   1267      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1268      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
   1269      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1270      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
   1271                     (Int)have_F, (Int)have_V, (Int)have_FX,
   1272                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
   1273                     (Int)have_isa_2_07);
   1274      /* on ppc64be, if we don't even have FP, just give up. */
   1275      if (!have_F)
   1276         return False;
   1277 
   1278      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
   1279 
   1280      va = VexArchPPC64;
   1281 #    if defined(VKI_LITTLE_ENDIAN)
   1282      vai.endness = VexEndnessLE;
   1283 #    elif defined(VKI_BIG_ENDIAN)
   1284      vai.endness = VexEndnessBE;
   1285 #    else
   1286      vai.endness = VexEndness_INVALID;
   1287 #    endif
   1288 
   1289      vai.hwcaps = 0;
   1290      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
   1291      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
   1292      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
   1293      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
   1294      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
   1295      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
   1296 
   1297      VG_(machine_get_cache_info)(&vai);
   1298 
   1299      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
   1300         called before we're ready to go. */
   1301      return True;
   1302    }
   1303 
   1304 #elif defined(VGA_s390x)
   1305 
   1306 #  include "libvex_s390x_common.h"
   1307 
   1308    {
   1309      /* Instruction set detection code borrowed from ppc above. */
   1310      vki_sigset_t          saved_set, tmp_set;
   1311      vki_sigaction_fromK_t saved_sigill_act;
   1312      vki_sigaction_toK_t     tmp_sigill_act;
   1313 
   1314      volatile Bool have_LDISP, have_STFLE;
   1315      Int i, r, model;
   1316 
   1317      /* If the model is "unknown" don't treat this as an error. Assume
   1318         this is a brand-new machine model for which we don't have the
   1319         identification yet. Keeping fingers crossed. */
   1320      model = VG_(get_machine_model)();
   1321 
   1322      /* Unblock SIGILL and stash away the old action for that signal */
   1323      VG_(sigemptyset)(&tmp_set);
   1324      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1325 
   1326      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1327      vg_assert(r == 0);
   1328 
   1329      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1330      vg_assert(r == 0);
   1331      tmp_sigill_act = saved_sigill_act;
   1332 
   1333      /* NODEFER: signal handler does not return (from the kernel's point of
   1334         view), hence if it is to successfully catch a signal more than once,
   1335         we need the NODEFER flag. */
   1336      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1337      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1338      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1339      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1340      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1341 
   1342      /* Determine hwcaps. Note, we cannot use the stfle insn because it
   1343         is not supported on z900. */
   1344 
   1345      have_LDISP = True;
   1346      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1347         have_LDISP = False;
   1348      } else {
   1349        /* BASR loads the address of the next insn into r1. Needed to avoid
   1350           a segfault in XY. */
   1351         __asm__ __volatile__("basr %%r1,%%r0\n\t"
   1352                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
   1353                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
   1354      }
   1355 
   1356      /* Check availability of STFLE. If available store facility bits
   1357         in hoststfle. */
   1358      ULong hoststfle[S390_NUM_FACILITY_DW];
   1359 
   1360      for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
   1361         hoststfle[i] = 0;
   1362 
   1363      have_STFLE = True;
   1364      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1365         have_STFLE = False;
   1366      } else {
   1367          register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
   1368 
   1369          __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
   1370                               : "=m" (hoststfle), "+d"(reg0)
   1371                               : : "cc", "memory");
   1372      }
   1373 
   1374      /* Restore signals */
   1375      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1376      vg_assert(r == 0);
   1377      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1378      vg_assert(r == 0);
   1379      va = VexArchS390X;
   1380      vai.endness = VexEndnessBE;
   1381 
   1382      vai.hwcaps = model;
   1383      if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
   1384      if (have_LDISP) {
   1385         /* Use long displacement only on machines >= z990. For all other
   1386            machines it is millicoded and therefore slow. */
   1387         if (model >= VEX_S390X_MODEL_Z990)
   1388            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
   1389      }
   1390 
   1391      /* Detect presence of certain facilities using the STFLE insn.
   1392         Note, that these facilities were introduced at the same time or later
   1393         as STFLE, so the absence of STLFE implies the absence of the facility
   1394         we're trying to detect. */
   1395      struct fac_hwcaps_map {
   1396         UInt installed;
   1397         UInt facility_bit;
   1398         UInt hwcaps_bit;
   1399         const HChar name[6];   // may need adjustment for new facility names
   1400      } fac_hwcaps[] = {
   1401         { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
   1402         { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
   1403         { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
   1404         { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
   1405         { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
   1406         { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
   1407         { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
   1408         { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
   1409         { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
   1410         { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
   1411      };
   1412 
   1413      /* Set hwcaps according to the detected facilities */
   1414      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
   1415         vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
   1416         if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
   1417            fac_hwcaps[i].installed = True;
   1418            vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
   1419         }
   1420      }
   1421 
   1422      /* Build up a string showing the probed-for facilities */
   1423      HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
   1424                    (sizeof fac_hwcaps[0].name + 3) + //  %s %d
   1425                    7 + 1 + 4 + 2  // machine %4d
   1426                    + 1];  // \0
   1427      HChar *p = fac_str;
   1428      p += VG_(sprintf)(p, "machine %4d  ", model);
   1429      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
   1430         p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
   1431                           fac_hwcaps[i].installed);
   1432      }
   1433      *p++ = '\0';
   1434 
   1435      VG_(debugLog)(1, "machine", "%s\n", fac_str);
   1436      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1437 
   1438      VG_(machine_get_cache_info)(&vai);
   1439 
   1440      return True;
   1441    }
   1442 
   1443 #elif defined(VGA_arm)
   1444    {
   1445      /* Same instruction set detection algorithm as for ppc32. */
   1446      vki_sigset_t          saved_set, tmp_set;
   1447      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1448      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1449 
   1450      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
   1451      volatile Int archlevel;
   1452      Int r;
   1453 
   1454      /* This is a kludge.  Really we ought to back-convert saved_act
   1455         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1456         since that's a no-op on all ppc64 platforms so far supported,
   1457         it's not worth the typing effort.  At least include most basic
   1458         sanity check: */
   1459      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1460 
   1461      VG_(sigemptyset)(&tmp_set);
   1462      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1463      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1464 
   1465      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1466      vg_assert(r == 0);
   1467 
   1468      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1469      vg_assert(r == 0);
   1470      tmp_sigill_act = saved_sigill_act;
   1471 
   1472      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1473      tmp_sigfpe_act = saved_sigfpe_act;
   1474 
   1475      /* NODEFER: signal handler does not return (from the kernel's point of
   1476         view), hence if it is to successfully catch a signal more than once,
   1477         we need the NODEFER flag. */
   1478      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1479      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1480      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1481      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1482      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1483 
   1484      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1485      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1486      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1487      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1488      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1489 
   1490      /* VFP insns */
   1491      have_VFP = True;
   1492      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1493         have_VFP = False;
   1494      } else {
   1495         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
   1496      }
   1497      /* There are several generation of VFP extension but they differs very
   1498         little so for now we will not distinguish them. */
   1499      have_VFP2 = have_VFP;
   1500      have_VFP3 = have_VFP;
   1501 
   1502      /* NEON insns */
   1503      have_NEON = True;
   1504      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1505         have_NEON = False;
   1506      } else {
   1507         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
   1508      }
   1509 
   1510      /* ARM architecture level */
   1511      archlevel = 5; /* v5 will be base level */
   1512      if (archlevel < 7) {
   1513         archlevel = 7;
   1514         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1515            archlevel = 5;
   1516         } else {
   1517            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
   1518         }
   1519      }
   1520      if (archlevel < 6) {
   1521         archlevel = 6;
   1522         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1523            archlevel = 5;
   1524         } else {
   1525            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
   1526         }
   1527      }
   1528 
   1529      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1530      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
   1531      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1532      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1533      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1534 
   1535      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
   1536            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
   1537            (Int)have_NEON);
   1538 
   1539      VG_(machine_arm_archlevel) = archlevel;
   1540 
   1541      va = VexArchARM;
   1542      vai.endness = VexEndnessLE;
   1543 
   1544      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
   1545      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
   1546      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
   1547      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
   1548      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1549 
   1550      VG_(machine_get_cache_info)(&vai);
   1551 
   1552      return True;
   1553    }
   1554 
   1555 #elif defined(VGA_arm64)
   1556    {
   1557      va = VexArchARM64;
   1558      vai.endness = VexEndnessLE;
   1559 
   1560      /* So far there are no variants. */
   1561      vai.hwcaps = 0;
   1562 
   1563      VG_(machine_get_cache_info)(&vai);
   1564 
   1565      /* 0 denotes 'not set'.  The range of legitimate values here,
   1566         after being set that is, is 2 though 17 inclusive. */
   1567      vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
   1568      vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
   1569      ULong ctr_el0;
   1570      __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
   1571      vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
   1572      vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
   1573      VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
   1574                       "ctr_el0.iMinLine_szB = %d\n",
   1575                    1 << vai.arm64_dMinLine_lg2_szB,
   1576                    1 << vai.arm64_iMinLine_lg2_szB);
   1577 
   1578      return True;
   1579    }
   1580 
   1581 #elif defined(VGA_mips32)
   1582    {
   1583      /* Define the position of F64 bit in FIR register. */
   1584 #    define FP64 22
   1585      va = VexArchMIPS32;
   1586      UInt model = VG_(get_machine_model)();
   1587      if (model == -1)
   1588          return False;
   1589 
   1590      vai.hwcaps = model;
   1591 
   1592 #    if defined(VKI_LITTLE_ENDIAN)
   1593      vai.endness = VexEndnessLE;
   1594 #    elif defined(VKI_BIG_ENDIAN)
   1595      vai.endness = VexEndnessBE;
   1596 #    else
   1597      vai.endness = VexEndness_INVALID;
   1598 #    endif
   1599 
   1600      /* Same instruction set detection algorithm as for ppc32/arm... */
   1601      vki_sigset_t          saved_set, tmp_set;
   1602      vki_sigaction_fromK_t saved_sigill_act;
   1603      vki_sigaction_toK_t   tmp_sigill_act;
   1604 
   1605      volatile Bool have_DSP, have_DSPr2;
   1606      Int r;
   1607 
   1608      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1609 
   1610      VG_(sigemptyset)(&tmp_set);
   1611      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1612 
   1613      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1614      vg_assert(r == 0);
   1615 
   1616      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1617      vg_assert(r == 0);
   1618      tmp_sigill_act = saved_sigill_act;
   1619 
   1620      /* NODEFER: signal handler does not return (from the kernel's point of
   1621         view), hence if it is to successfully catch a signal more than once,
   1622         we need the NODEFER flag. */
   1623      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1624      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1625      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1626      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1627      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1628 
   1629      if (model == VEX_PRID_COMP_MIPS) {
   1630         /* DSPr2 instructions. */
   1631         have_DSPr2 = True;
   1632         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1633            have_DSPr2 = False;
   1634         } else {
   1635            __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
   1636         }
   1637         if (have_DSPr2) {
   1638            /* We assume it's 74K, since it can run DSPr2. */
   1639            vai.hwcaps |= VEX_PRID_IMP_74K;
   1640         } else {
   1641            /* DSP instructions. */
   1642            have_DSP = True;
   1643            if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1644               have_DSP = False;
   1645            } else {
   1646               __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
   1647            }
   1648            if (have_DSP) {
   1649               /* We assume it's 34K, since it has support for DSP. */
   1650               vai.hwcaps |= VEX_PRID_IMP_34K;
   1651            }
   1652         }
   1653      }
   1654 
   1655      /* Check if CPU has FPU and 32 dbl. prec. FP registers */
   1656      int FIR = 0;
   1657      __asm__ __volatile__(
   1658         "cfc1 %0, $0"  "\n\t"
   1659         : "=r" (FIR)
   1660      );
   1661      if (FIR & (1 << FP64)) {
   1662         vai.hwcaps |= VEX_PRID_CPU_32FPR;
   1663      }
   1664 
   1665      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1666      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1667      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1668 
   1669      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1670      VG_(machine_get_cache_info)(&vai);
   1671 
   1672      return True;
   1673    }
   1674 
   1675 #elif defined(VGA_mips64)
   1676    {
   1677      va = VexArchMIPS64;
   1678      UInt model = VG_(get_machine_model)();
   1679      if (model == -1)
   1680          return False;
   1681 
   1682      vai.hwcaps = model;
   1683 
   1684 #    if defined(VKI_LITTLE_ENDIAN)
   1685      vai.endness = VexEndnessLE;
   1686 #    elif defined(VKI_BIG_ENDIAN)
   1687      vai.endness = VexEndnessBE;
   1688 #    else
   1689      vai.endness = VexEndness_INVALID;
   1690 #    endif
   1691 
   1692      VG_(machine_get_cache_info)(&vai);
   1693 
   1694      return True;
   1695    }
   1696 
   1697 #elif defined(VGA_tilegx)
   1698    {
   1699      va = VexArchTILEGX;
   1700      vai.hwcaps = VEX_HWCAPS_TILEGX_BASE;
   1701      vai.endness = VexEndnessLE;
   1702 
   1703      VG_(machine_get_cache_info)(&vai);
   1704 
   1705      return True;
   1706    }
   1707 
   1708 #else
   1709 #  error "Unknown arch"
   1710 #endif
   1711 }
   1712 
   1713 /* Notify host cpu instruction cache line size. */
   1714 #if defined(VGA_ppc32)
   1715 void VG_(machine_ppc32_set_clszB)( Int szB )
   1716 {
   1717    vg_assert(hwcaps_done);
   1718 
   1719    /* Either the value must not have been set yet (zero) or we can
   1720       tolerate it being set to the same value multiple times, as the
   1721       stack scanning logic in m_main is a bit stupid. */
   1722    vg_assert(vai.ppc_icache_line_szB == 0
   1723              || vai.ppc_icache_line_szB == szB);
   1724 
   1725    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
   1726    vai.ppc_icache_line_szB = szB;
   1727 }
   1728 #endif
   1729 
   1730 
   1731 /* Notify host cpu instruction cache line size. */
   1732 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
   1733 void VG_(machine_ppc64_set_clszB)( Int szB )
   1734 {
   1735    vg_assert(hwcaps_done);
   1736 
   1737    /* Either the value must not have been set yet (zero) or we can
   1738       tolerate it being set to the same value multiple times, as the
   1739       stack scanning logic in m_main is a bit stupid. */
   1740    vg_assert(vai.ppc_icache_line_szB == 0
   1741              || vai.ppc_icache_line_szB == szB);
   1742 
   1743    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
   1744    vai.ppc_icache_line_szB = szB;
   1745 }
   1746 #endif
   1747 
   1748 
   1749 /* Notify host's ability to handle NEON instructions. */
   1750 #if defined(VGA_arm)
   1751 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
   1752 {
   1753    vg_assert(hwcaps_done);
   1754    /* There's nothing else we can sanity check. */
   1755 
   1756    if (has_neon) {
   1757       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1758    } else {
   1759       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
   1760    }
   1761 }
   1762 #endif
   1763 
   1764 
   1765 /* Fetch host cpu info, once established. */
   1766 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
   1767                                    /*OUT*/VexArchInfo* pVai )
   1768 {
   1769    vg_assert(hwcaps_done);
   1770    if (pVa)  *pVa  = va;
   1771    if (pVai) *pVai = vai;
   1772 }
   1773 
   1774 
   1775 /* Returns the size of the largest guest register that we will
   1776    simulate in this run.  This depends on both the guest architecture
   1777    and on the specific capabilities we are simulating for that guest
   1778    (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
   1779    or 32.  General rule: if in doubt, return a value larger than
   1780    reality.
   1781 
   1782    This information is needed by Cachegrind and Callgrind to decide
   1783    what the minimum cache line size they are prepared to simulate is.
   1784    Basically require that the minimum cache line size is at least as
   1785    large as the largest register that might get transferred to/from
   1786    memory, so as to guarantee that any such transaction can straddle
   1787    at most 2 cache lines.
   1788 */
   1789 Int VG_(machine_get_size_of_largest_guest_register) ( void )
   1790 {
   1791    vg_assert(hwcaps_done);
   1792    /* Once hwcaps_done is True, we can fish around inside va/vai to
   1793       find the information we need. */
   1794 
   1795 #  if defined(VGA_x86)
   1796    vg_assert(va == VexArchX86);
   1797    /* We don't support AVX, so 32 is out.  At the other end, even if
   1798       we don't support any SSE, the X87 can generate 10 byte
   1799       transfers, so let's say 16 to be on the safe side.  Hence the
   1800       answer is always 16. */
   1801    return 16;
   1802 
   1803 #  elif defined(VGA_amd64)
   1804    /* if AVX then 32 else 16 */
   1805    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
   1806 
   1807 #  elif defined(VGA_ppc32)
   1808    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1809    if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
   1810    if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
   1811    if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
   1812    return 8;
   1813 
   1814 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
   1815    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1816    if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
   1817    if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
   1818    if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
   1819    return 8;
   1820 
   1821 #  elif defined(VGA_s390x)
   1822    return 8;
   1823 
   1824 #  elif defined(VGA_arm)
   1825    /* Really it depends whether or not we have NEON, but let's just
   1826       assume we always do. */
   1827    return 16;
   1828 
   1829 #  elif defined(VGA_arm64)
   1830    /* ARM64 always has Neon, AFAICS. */
   1831    return 16;
   1832 
   1833 #  elif defined(VGA_mips32)
   1834    /* The guest state implies 4, but that can't really be true, can
   1835       it? */
   1836    return 8;
   1837 
   1838 #  elif defined(VGA_mips64)
   1839    return 8;
   1840 
   1841 #  elif defined(VGA_tilegx)
   1842    return 8;
   1843 
   1844 #  else
   1845 #    error "Unknown arch"
   1846 #  endif
   1847 }
   1848 
   1849 
   1850 // Given a pointer to a function as obtained by "& functionname" in C,
   1851 // produce a pointer to the actual entry point for the function.
   1852 void* VG_(fnptr_to_fnentry)( void* f )
   1853 {
   1854 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
   1855       || defined(VGP_arm_linux) || defined(VGO_darwin)          \
   1856       || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
   1857       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
   1858       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
   1859       || defined(VGP_tilegx_linux) || defined(VGP_x86_solaris) \
   1860       || defined(VGP_amd64_solaris)
   1861    return f;
   1862 #  elif defined(VGP_ppc64be_linux)
   1863    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
   1864       3-word function descriptor, of which the first word is the entry
   1865       address. */
   1866    UWord* descr = (UWord*)f;
   1867    return (void*)(descr[0]);
   1868 #  else
   1869 #    error "Unknown platform"
   1870 #  endif
   1871 }
   1872 
   1873 /*--------------------------------------------------------------------*/
   1874 /*--- end                                                          ---*/
   1875 /*--------------------------------------------------------------------*/
   1876