Home | History | Annotate | Download | only in coregrind
      1 /*--------------------------------------------------------------------*/
      2 /*--- Machine-related stuff.                           m_machine.c ---*/
      3 /*--------------------------------------------------------------------*/
      4 
      5 /*
      6    This file is part of Valgrind, a dynamic binary instrumentation
      7    framework.
      8 
      9    Copyright (C) 2000-2013 Julian Seward
     10       jseward (at) acm.org
     11 
     12    This program is free software; you can redistribute it and/or
     13    modify it under the terms of the GNU General Public License as
     14    published by the Free Software Foundation; either version 2 of the
     15    License, or (at your option) any later version.
     16 
     17    This program is distributed in the hope that it will be useful, but
     18    WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    General Public License for more details.
     21 
     22    You should have received a copy of the GNU General Public License
     23    along with this program; if not, write to the Free Software
     24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     25    02111-1307, USA.
     26 
     27    The GNU General Public License is contained in the file COPYING.
     28 */
     29 
     30 #include "pub_core_basics.h"
     31 #include "pub_core_vki.h"
     32 #include "pub_core_threadstate.h"
     33 #include "pub_core_libcassert.h"
     34 #include "pub_core_libcbase.h"
     35 #include "pub_core_libcfile.h"
     36 #include "pub_core_libcprint.h"
     37 #include "pub_core_mallocfree.h"
     38 #include "pub_core_machine.h"
     39 #include "pub_core_cpuid.h"
     40 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
     41 #include "pub_core_debuglog.h"
     42 
     43 
     44 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
     45 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
     46 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
     47 
     48 Addr VG_(get_IP) ( ThreadId tid ) {
     49    return INSTR_PTR( VG_(threads)[tid].arch );
     50 }
     51 Addr VG_(get_SP) ( ThreadId tid ) {
     52    return STACK_PTR( VG_(threads)[tid].arch );
     53 }
     54 Addr VG_(get_FP) ( ThreadId tid ) {
     55    return FRAME_PTR( VG_(threads)[tid].arch );
     56 }
     57 
     58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
     59    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
     60 }
     61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
     62    STACK_PTR( VG_(threads)[tid].arch ) = sp;
     63 }
     64 
     65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
     66                                 ThreadId tid )
     67 {
     68 #  if defined(VGA_x86)
     69    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
     70    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
     71    regs->misc.X86.r_ebp
     72       = VG_(threads)[tid].arch.vex.guest_EBP;
     73 #  elif defined(VGA_amd64)
     74    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
     75    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
     76    regs->misc.AMD64.r_rbp
     77       = VG_(threads)[tid].arch.vex.guest_RBP;
     78 #  elif defined(VGA_ppc32)
     79    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
     80    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
     81    regs->misc.PPC32.r_lr
     82       = VG_(threads)[tid].arch.vex.guest_LR;
     83 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
     84    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
     85    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
     86    regs->misc.PPC64.r_lr
     87       = VG_(threads)[tid].arch.vex.guest_LR;
     88 #  elif defined(VGA_arm)
     89    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
     90    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
     91    regs->misc.ARM.r14
     92       = VG_(threads)[tid].arch.vex.guest_R14;
     93    regs->misc.ARM.r12
     94       = VG_(threads)[tid].arch.vex.guest_R12;
     95    regs->misc.ARM.r11
     96       = VG_(threads)[tid].arch.vex.guest_R11;
     97    regs->misc.ARM.r7
     98       = VG_(threads)[tid].arch.vex.guest_R7;
     99 #  elif defined(VGA_arm64)
    100    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    101    regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
    102    regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
    103    regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
    104 #  elif defined(VGA_s390x)
    105    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
    106    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
    107    regs->misc.S390X.r_fp
    108       = VG_(threads)[tid].arch.vex.guest_FP;
    109    regs->misc.S390X.r_lr
    110       = VG_(threads)[tid].arch.vex.guest_LR;
    111 #  elif defined(VGA_mips32)
    112    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    113    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
    114    regs->misc.MIPS32.r30
    115       = VG_(threads)[tid].arch.vex.guest_r30;
    116    regs->misc.MIPS32.r31
    117       = VG_(threads)[tid].arch.vex.guest_r31;
    118    regs->misc.MIPS32.r28
    119       = VG_(threads)[tid].arch.vex.guest_r28;
    120 #  elif defined(VGA_mips64)
    121    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    122    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
    123    regs->misc.MIPS64.r30
    124       = VG_(threads)[tid].arch.vex.guest_r30;
    125    regs->misc.MIPS64.r31
    126       = VG_(threads)[tid].arch.vex.guest_r31;
    127    regs->misc.MIPS64.r28
    128       = VG_(threads)[tid].arch.vex.guest_r28;
    129 #  elif defined(VGA_tilegx)
    130    regs->r_pc = VG_(threads)[tid].arch.vex.guest_pc;
    131    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r54;
    132    regs->misc.TILEGX.r52
    133       = VG_(threads)[tid].arch.vex.guest_r52;
    134    regs->misc.TILEGX.r55
    135       = VG_(threads)[tid].arch.vex.guest_r55;
    136 #  else
    137 #    error "Unknown arch"
    138 #  endif
    139 }
    140 
    141 void
    142 VG_(get_shadow_regs_area) ( ThreadId tid,
    143                             /*DST*/UChar* dst,
    144                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
    145 {
    146    void*        src;
    147    ThreadState* tst;
    148    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    149    vg_assert(VG_(is_valid_tid)(tid));
    150    // Bounds check
    151    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    152    vg_assert(offset + size <= sizeof(VexGuestArchState));
    153    // Copy
    154    tst = & VG_(threads)[tid];
    155    src = NULL;
    156    switch (shadowNo) {
    157       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    158       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    159       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    160    }
    161    vg_assert(src != NULL);
    162    VG_(memcpy)( dst, src, size);
    163 }
    164 
    165 void
    166 VG_(set_shadow_regs_area) ( ThreadId tid,
    167                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
    168                             /*SRC*/const UChar* src )
    169 {
    170    void*        dst;
    171    ThreadState* tst;
    172    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    173    vg_assert(VG_(is_valid_tid)(tid));
    174    // Bounds check
    175    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    176    vg_assert(offset + size <= sizeof(VexGuestArchState));
    177    // Copy
    178    tst = & VG_(threads)[tid];
    179    dst = NULL;
    180    switch (shadowNo) {
    181       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    182       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    183       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    184    }
    185    vg_assert(dst != NULL);
    186    VG_(memcpy)( dst, src, size);
    187 }
    188 
    189 
    190 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
    191                                                         const HChar*, Addr))
    192 {
    193    VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
    194    VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %d\n", tid);
    195 #if defined(VGA_x86)
    196    (*f)(tid, "EAX", vex->guest_EAX);
    197    (*f)(tid, "ECX", vex->guest_ECX);
    198    (*f)(tid, "EDX", vex->guest_EDX);
    199    (*f)(tid, "EBX", vex->guest_EBX);
    200    (*f)(tid, "ESI", vex->guest_ESI);
    201    (*f)(tid, "EDI", vex->guest_EDI);
    202    (*f)(tid, "ESP", vex->guest_ESP);
    203    (*f)(tid, "EBP", vex->guest_EBP);
    204 #elif defined(VGA_amd64)
    205    (*f)(tid, "RAX", vex->guest_RAX);
    206    (*f)(tid, "RCX", vex->guest_RCX);
    207    (*f)(tid, "RDX", vex->guest_RDX);
    208    (*f)(tid, "RBX", vex->guest_RBX);
    209    (*f)(tid, "RSI", vex->guest_RSI);
    210    (*f)(tid, "RDI", vex->guest_RDI);
    211    (*f)(tid, "RSP", vex->guest_RSP);
    212    (*f)(tid, "RBP", vex->guest_RBP);
    213    (*f)(tid, "R8" , vex->guest_R8 );
    214    (*f)(tid, "R9" , vex->guest_R9 );
    215    (*f)(tid, "R10", vex->guest_R10);
    216    (*f)(tid, "R11", vex->guest_R11);
    217    (*f)(tid, "R12", vex->guest_R12);
    218    (*f)(tid, "R13", vex->guest_R13);
    219    (*f)(tid, "R14", vex->guest_R14);
    220    (*f)(tid, "R15", vex->guest_R15);
    221 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
    222    (*f)(tid, "GPR0" , vex->guest_GPR0 );
    223    (*f)(tid, "GPR1" , vex->guest_GPR1 );
    224    (*f)(tid, "GPR2" , vex->guest_GPR2 );
    225    (*f)(tid, "GPR3" , vex->guest_GPR3 );
    226    (*f)(tid, "GPR4" , vex->guest_GPR4 );
    227    (*f)(tid, "GPR5" , vex->guest_GPR5 );
    228    (*f)(tid, "GPR6" , vex->guest_GPR6 );
    229    (*f)(tid, "GPR7" , vex->guest_GPR7 );
    230    (*f)(tid, "GPR8" , vex->guest_GPR8 );
    231    (*f)(tid, "GPR9" , vex->guest_GPR9 );
    232    (*f)(tid, "GPR10", vex->guest_GPR10);
    233    (*f)(tid, "GPR11", vex->guest_GPR11);
    234    (*f)(tid, "GPR12", vex->guest_GPR12);
    235    (*f)(tid, "GPR13", vex->guest_GPR13);
    236    (*f)(tid, "GPR14", vex->guest_GPR14);
    237    (*f)(tid, "GPR15", vex->guest_GPR15);
    238    (*f)(tid, "GPR16", vex->guest_GPR16);
    239    (*f)(tid, "GPR17", vex->guest_GPR17);
    240    (*f)(tid, "GPR18", vex->guest_GPR18);
    241    (*f)(tid, "GPR19", vex->guest_GPR19);
    242    (*f)(tid, "GPR20", vex->guest_GPR20);
    243    (*f)(tid, "GPR21", vex->guest_GPR21);
    244    (*f)(tid, "GPR22", vex->guest_GPR22);
    245    (*f)(tid, "GPR23", vex->guest_GPR23);
    246    (*f)(tid, "GPR24", vex->guest_GPR24);
    247    (*f)(tid, "GPR25", vex->guest_GPR25);
    248    (*f)(tid, "GPR26", vex->guest_GPR26);
    249    (*f)(tid, "GPR27", vex->guest_GPR27);
    250    (*f)(tid, "GPR28", vex->guest_GPR28);
    251    (*f)(tid, "GPR29", vex->guest_GPR29);
    252    (*f)(tid, "GPR30", vex->guest_GPR30);
    253    (*f)(tid, "GPR31", vex->guest_GPR31);
    254    (*f)(tid, "CTR"  , vex->guest_CTR  );
    255    (*f)(tid, "LR"   , vex->guest_LR   );
    256 #elif defined(VGA_arm)
    257    (*f)(tid, "R0" , vex->guest_R0 );
    258    (*f)(tid, "R1" , vex->guest_R1 );
    259    (*f)(tid, "R2" , vex->guest_R2 );
    260    (*f)(tid, "R3" , vex->guest_R3 );
    261    (*f)(tid, "R4" , vex->guest_R4 );
    262    (*f)(tid, "R5" , vex->guest_R5 );
    263    (*f)(tid, "R6" , vex->guest_R6 );
    264    (*f)(tid, "R8" , vex->guest_R8 );
    265    (*f)(tid, "R9" , vex->guest_R9 );
    266    (*f)(tid, "R10", vex->guest_R10);
    267    (*f)(tid, "R11", vex->guest_R11);
    268    (*f)(tid, "R12", vex->guest_R12);
    269    (*f)(tid, "R13", vex->guest_R13);
    270    (*f)(tid, "R14", vex->guest_R14);
    271 #elif defined(VGA_s390x)
    272    (*f)(tid, "r0" , vex->guest_r0 );
    273    (*f)(tid, "r1" , vex->guest_r1 );
    274    (*f)(tid, "r2" , vex->guest_r2 );
    275    (*f)(tid, "r3" , vex->guest_r3 );
    276    (*f)(tid, "r4" , vex->guest_r4 );
    277    (*f)(tid, "r5" , vex->guest_r5 );
    278    (*f)(tid, "r6" , vex->guest_r6 );
    279    (*f)(tid, "r7" , vex->guest_r7 );
    280    (*f)(tid, "r8" , vex->guest_r8 );
    281    (*f)(tid, "r9" , vex->guest_r9 );
    282    (*f)(tid, "r10", vex->guest_r10);
    283    (*f)(tid, "r11", vex->guest_r11);
    284    (*f)(tid, "r12", vex->guest_r12);
    285    (*f)(tid, "r13", vex->guest_r13);
    286    (*f)(tid, "r14", vex->guest_r14);
    287    (*f)(tid, "r15", vex->guest_r15);
    288 #elif defined(VGA_mips32) || defined(VGA_mips64)
    289    (*f)(tid, "r0" , vex->guest_r0 );
    290    (*f)(tid, "r1" , vex->guest_r1 );
    291    (*f)(tid, "r2" , vex->guest_r2 );
    292    (*f)(tid, "r3" , vex->guest_r3 );
    293    (*f)(tid, "r4" , vex->guest_r4 );
    294    (*f)(tid, "r5" , vex->guest_r5 );
    295    (*f)(tid, "r6" , vex->guest_r6 );
    296    (*f)(tid, "r7" , vex->guest_r7 );
    297    (*f)(tid, "r8" , vex->guest_r8 );
    298    (*f)(tid, "r9" , vex->guest_r9 );
    299    (*f)(tid, "r10", vex->guest_r10);
    300    (*f)(tid, "r11", vex->guest_r11);
    301    (*f)(tid, "r12", vex->guest_r12);
    302    (*f)(tid, "r13", vex->guest_r13);
    303    (*f)(tid, "r14", vex->guest_r14);
    304    (*f)(tid, "r15", vex->guest_r15);
    305    (*f)(tid, "r16", vex->guest_r16);
    306    (*f)(tid, "r17", vex->guest_r17);
    307    (*f)(tid, "r18", vex->guest_r18);
    308    (*f)(tid, "r19", vex->guest_r19);
    309    (*f)(tid, "r20", vex->guest_r20);
    310    (*f)(tid, "r21", vex->guest_r21);
    311    (*f)(tid, "r22", vex->guest_r22);
    312    (*f)(tid, "r23", vex->guest_r23);
    313    (*f)(tid, "r24", vex->guest_r24);
    314    (*f)(tid, "r25", vex->guest_r25);
    315    (*f)(tid, "r26", vex->guest_r26);
    316    (*f)(tid, "r27", vex->guest_r27);
    317    (*f)(tid, "r28", vex->guest_r28);
    318    (*f)(tid, "r29", vex->guest_r29);
    319    (*f)(tid, "r30", vex->guest_r30);
    320    (*f)(tid, "r31", vex->guest_r31);
    321 #elif defined(VGA_arm64)
    322    (*f)(tid, "x0" , vex->guest_X0 );
    323    (*f)(tid, "x1" , vex->guest_X1 );
    324    (*f)(tid, "x2" , vex->guest_X2 );
    325    (*f)(tid, "x3" , vex->guest_X3 );
    326    (*f)(tid, "x4" , vex->guest_X4 );
    327    (*f)(tid, "x5" , vex->guest_X5 );
    328    (*f)(tid, "x6" , vex->guest_X6 );
    329    (*f)(tid, "x7" , vex->guest_X7 );
    330    (*f)(tid, "x8" , vex->guest_X8 );
    331    (*f)(tid, "x9" , vex->guest_X9 );
    332    (*f)(tid, "x10", vex->guest_X10);
    333    (*f)(tid, "x11", vex->guest_X11);
    334    (*f)(tid, "x12", vex->guest_X12);
    335    (*f)(tid, "x13", vex->guest_X13);
    336    (*f)(tid, "x14", vex->guest_X14);
    337    (*f)(tid, "x15", vex->guest_X15);
    338    (*f)(tid, "x16", vex->guest_X16);
    339    (*f)(tid, "x17", vex->guest_X17);
    340    (*f)(tid, "x18", vex->guest_X18);
    341    (*f)(tid, "x19", vex->guest_X19);
    342    (*f)(tid, "x20", vex->guest_X20);
    343    (*f)(tid, "x21", vex->guest_X21);
    344    (*f)(tid, "x22", vex->guest_X22);
    345    (*f)(tid, "x23", vex->guest_X23);
    346    (*f)(tid, "x24", vex->guest_X24);
    347    (*f)(tid, "x25", vex->guest_X25);
    348    (*f)(tid, "x26", vex->guest_X26);
    349    (*f)(tid, "x27", vex->guest_X27);
    350    (*f)(tid, "x28", vex->guest_X28);
    351    (*f)(tid, "x29", vex->guest_X29);
    352    (*f)(tid, "x30", vex->guest_X30);
    353 #elif defined(VGA_tilegx)
    354    (*f)(tid, "r0",  vex->guest_r0 );
    355    (*f)(tid, "r1",  vex->guest_r1 );
    356    (*f)(tid, "r2",  vex->guest_r2 );
    357    (*f)(tid, "r3",  vex->guest_r3 );
    358    (*f)(tid, "r4",  vex->guest_r4 );
    359    (*f)(tid, "r5",  vex->guest_r5 );
    360    (*f)(tid, "r6",  vex->guest_r6 );
    361    (*f)(tid, "r7",  vex->guest_r7 );
    362    (*f)(tid, "r8",  vex->guest_r8 );
    363    (*f)(tid, "r9",  vex->guest_r9 );
    364    (*f)(tid, "r10", vex->guest_r10);
    365    (*f)(tid, "r11", vex->guest_r11);
    366    (*f)(tid, "r12", vex->guest_r12);
    367    (*f)(tid, "r13", vex->guest_r13);
    368    (*f)(tid, "r14", vex->guest_r14);
    369    (*f)(tid, "r15", vex->guest_r15);
    370    (*f)(tid, "r16", vex->guest_r16);
    371    (*f)(tid, "r17", vex->guest_r17);
    372    (*f)(tid, "r18", vex->guest_r18);
    373    (*f)(tid, "r19", vex->guest_r19);
    374    (*f)(tid, "r20", vex->guest_r20);
    375    (*f)(tid, "r21", vex->guest_r21);
    376    (*f)(tid, "r22", vex->guest_r22);
    377    (*f)(tid, "r23", vex->guest_r23);
    378    (*f)(tid, "r24", vex->guest_r24);
    379    (*f)(tid, "r25", vex->guest_r25);
    380    (*f)(tid, "r26", vex->guest_r26);
    381    (*f)(tid, "r27", vex->guest_r27);
    382    (*f)(tid, "r28", vex->guest_r28);
    383    (*f)(tid, "r29", vex->guest_r29);
    384    (*f)(tid, "r30", vex->guest_r30);
    385    (*f)(tid, "r31", vex->guest_r31);
    386    (*f)(tid, "r32", vex->guest_r32);
    387    (*f)(tid, "r33", vex->guest_r33);
    388    (*f)(tid, "r34", vex->guest_r34);
    389    (*f)(tid, "r35", vex->guest_r35);
    390    (*f)(tid, "r36", vex->guest_r36);
    391    (*f)(tid, "r37", vex->guest_r37);
    392    (*f)(tid, "r38", vex->guest_r38);
    393    (*f)(tid, "r39", vex->guest_r39);
    394    (*f)(tid, "r40", vex->guest_r40);
    395    (*f)(tid, "r41", vex->guest_r41);
    396    (*f)(tid, "r42", vex->guest_r42);
    397    (*f)(tid, "r43", vex->guest_r43);
    398    (*f)(tid, "r44", vex->guest_r44);
    399    (*f)(tid, "r45", vex->guest_r45);
    400    (*f)(tid, "r46", vex->guest_r46);
    401    (*f)(tid, "r47", vex->guest_r47);
    402    (*f)(tid, "r48", vex->guest_r48);
    403    (*f)(tid, "r49", vex->guest_r49);
    404    (*f)(tid, "r50", vex->guest_r50);
    405    (*f)(tid, "r51", vex->guest_r51);
    406    (*f)(tid, "r52", vex->guest_r52);
    407    (*f)(tid, "r53", vex->guest_r53);
    408    (*f)(tid, "r54", vex->guest_r54);
    409    (*f)(tid, "r55", vex->guest_r55);
    410 #else
    411 #  error Unknown arch
    412 #endif
    413 }
    414 
    415 
    416 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
    417 {
    418    ThreadId tid;
    419 
    420    for (tid = 1; tid < VG_N_THREADS; tid++) {
    421       if (VG_(is_valid_tid)(tid)
    422           || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
    423          // live thread or thread instructed to die by another thread that
    424          // called exit.
    425          apply_to_GPs_of_tid(tid, f);
    426       }
    427    }
    428 }
    429 
    430 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
    431 {
    432    *tid = (ThreadId)(-1);
    433 }
    434 
    435 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
    436                             /*OUT*/Addr* stack_min,
    437                             /*OUT*/Addr* stack_max)
    438 {
    439    ThreadId i;
    440    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
    441       if (i == VG_INVALID_THREADID)
    442          continue;
    443       if (VG_(threads)[i].status != VgTs_Empty) {
    444          *tid       = i;
    445          *stack_min = VG_(get_SP)(i);
    446          *stack_max = VG_(threads)[i].client_stack_highest_byte;
    447          return True;
    448       }
    449    }
    450    return False;
    451 }
    452 
    453 Addr VG_(thread_get_stack_max)(ThreadId tid)
    454 {
    455    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    456    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    457    return VG_(threads)[tid].client_stack_highest_byte;
    458 }
    459 
    460 SizeT VG_(thread_get_stack_size)(ThreadId tid)
    461 {
    462    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    463    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    464    return VG_(threads)[tid].client_stack_szB;
    465 }
    466 
    467 Addr VG_(thread_get_altstack_min)(ThreadId tid)
    468 {
    469    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    470    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    471    return (Addr)VG_(threads)[tid].altstack.ss_sp;
    472 }
    473 
    474 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
    475 {
    476    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    477    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    478    return VG_(threads)[tid].altstack.ss_size;
    479 }
    480 
    481 //-------------------------------------------------------------
    482 /* Details about the capabilities of the underlying (host) CPU.  These
    483    details are acquired by (1) enquiring with the CPU at startup, or
    484    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
    485    line size).  It's a bit nasty in the sense that there's no obvious
    486    way to stop uses of some of this info before it's ready to go.
    487    See pub_core_machine.h for more information about that.
    488 
    489    VG_(machine_get_hwcaps) may use signals (although it attempts to
    490    leave signal state unchanged) and therefore should only be
    491    called before m_main sets up the client's signal state.
    492 */
    493 
    494 /* --------- State --------- */
    495 static Bool hwcaps_done = False;
    496 
    497 /* --- all archs --- */
    498 static VexArch     va = VexArch_INVALID;
    499 static VexArchInfo vai;
    500 
    501 #if defined(VGA_x86)
    502 UInt VG_(machine_x86_have_mxcsr) = 0;
    503 #endif
    504 #if defined(VGA_ppc32)
    505 UInt VG_(machine_ppc32_has_FP)  = 0;
    506 UInt VG_(machine_ppc32_has_VMX) = 0;
    507 #endif
    508 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
    509 ULong VG_(machine_ppc64_has_VMX) = 0;
    510 #endif
    511 #if defined(VGA_arm)
    512 Int VG_(machine_arm_archlevel) = 4;
    513 #endif
    514 
    515 
    516 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
    517    testing, so we need a VG_MINIMAL_JMP_BUF. */
    518 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
    519     || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
    520 #include "pub_core_libcsetjmp.h"
    521 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
    522 static void handler_unsup_insn ( Int x ) {
    523    VG_MINIMAL_LONGJMP(env_unsup_insn);
    524 }
    525 #endif
    526 
    527 
    528 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
    529  * handlers are installed.  Determines the the sizes affected by dcbz
    530  * and dcbzl instructions and updates the given VexArchInfo structure
    531  * accordingly.
    532  *
    533  * Not very defensive: assumes that as long as the dcbz/dcbzl
    534  * instructions don't raise a SIGILL, that they will zero an aligned,
    535  * contiguous block of memory of a sensible size. */
    536 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
    537 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
    538 {
    539    Int dcbz_szB = 0;
    540    Int dcbzl_szB;
    541 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
    542    char test_block[4*MAX_DCBZL_SZB];
    543    char *aligned = test_block;
    544    Int i;
    545 
    546    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
    547    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
    548    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
    549 
    550    /* dcbz often clears 32B, although sometimes whatever the native cache
    551     * block size is */
    552    VG_(memset)(test_block, 0xff, sizeof(test_block));
    553    __asm__ __volatile__("dcbz 0,%0"
    554                         : /*out*/
    555                         : "r" (aligned) /*in*/
    556                         : "memory" /*clobber*/);
    557    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    558       if (!test_block[i])
    559          ++dcbz_szB;
    560    }
    561    vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
    562 
    563    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
    564    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    565       dcbzl_szB = 0; /* indicates unsupported */
    566    }
    567    else {
    568       VG_(memset)(test_block, 0xff, sizeof(test_block));
    569       /* some older assemblers won't understand the dcbzl instruction
    570        * variant, so we directly emit the instruction ourselves */
    571       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
    572                            : /*out*/
    573                            : "r" (aligned) /*in*/
    574                            : "memory", "r9" /*clobber*/);
    575       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    576          if (!test_block[i])
    577             ++dcbzl_szB;
    578       }
    579       vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
    580    }
    581 
    582    arch_info->ppc_dcbz_szB  = dcbz_szB;
    583    arch_info->ppc_dcbzl_szB = dcbzl_szB;
    584 
    585    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
    586                  dcbz_szB, dcbzl_szB);
    587 #  undef MAX_DCBZL_SZB
    588 }
    589 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
    590 
    591 #ifdef VGA_s390x
    592 
    593 /* Read /proc/cpuinfo. Look for lines like these
    594 
    595    processor 0: version = FF,  identification = 0117C9,  machine = 2064
    596 
    597    and return the machine model. If the machine model could not be determined
    598    or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
    599 
    600 static UInt VG_(get_machine_model)(void)
    601 {
    602    static struct model_map {
    603       const HChar name[5];
    604       UInt  id;
    605    } model_map[] = {
    606       { "2064", VEX_S390X_MODEL_Z900 },
    607       { "2066", VEX_S390X_MODEL_Z800 },
    608       { "2084", VEX_S390X_MODEL_Z990 },
    609       { "2086", VEX_S390X_MODEL_Z890 },
    610       { "2094", VEX_S390X_MODEL_Z9_EC },
    611       { "2096", VEX_S390X_MODEL_Z9_BC },
    612       { "2097", VEX_S390X_MODEL_Z10_EC },
    613       { "2098", VEX_S390X_MODEL_Z10_BC },
    614       { "2817", VEX_S390X_MODEL_Z196 },
    615       { "2818", VEX_S390X_MODEL_Z114 },
    616       { "2827", VEX_S390X_MODEL_ZEC12 },
    617       { "2828", VEX_S390X_MODEL_ZBC12 },
    618       { "2964", VEX_S390X_MODEL_Z13 },
    619    };
    620 
    621    Int    model, n, fh;
    622    SysRes fd;
    623    SizeT  num_bytes, file_buf_size;
    624    HChar *p, *m, *model_name, *file_buf;
    625 
    626    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    627    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    628    if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
    629 
    630    fh  = sr_Res(fd);
    631 
    632    /* Determine the size of /proc/cpuinfo.
    633       Work around broken-ness in /proc file system implementation.
    634       fstat returns a zero size for /proc/cpuinfo although it is
    635       claimed to be a regular file. */
    636    num_bytes = 0;
    637    file_buf_size = 1000;
    638    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    639    while (42) {
    640       n = VG_(read)(fh, file_buf, file_buf_size);
    641       if (n < 0) break;
    642 
    643       num_bytes += n;
    644       if (n < file_buf_size) break;  /* reached EOF */
    645    }
    646 
    647    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    648 
    649    if (num_bytes > file_buf_size) {
    650       VG_(free)( file_buf );
    651       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    652       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    653       n = VG_(read)( fh, file_buf, num_bytes );
    654       if (n < 0) num_bytes = 0;
    655    }
    656 
    657    file_buf[num_bytes] = '\0';
    658    VG_(close)(fh);
    659 
    660    /* Parse file */
    661    model = VEX_S390X_MODEL_UNKNOWN;
    662    for (p = file_buf; *p; ++p) {
    663       /* Beginning of line */
    664      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
    665 
    666      m = VG_(strstr)( p, "machine" );
    667      if (m == NULL) continue;
    668 
    669      p = m + sizeof "machine" - 1;
    670      while ( VG_(isspace)( *p ) || *p == '=') {
    671        if (*p == '\n') goto next_line;
    672        ++p;
    673      }
    674 
    675      model_name = p;
    676      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
    677        struct model_map *mm = model_map + n;
    678        SizeT len = VG_(strlen)( mm->name );
    679        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
    680             VG_(isspace)( model_name[len] )) {
    681          if (mm->id < model) model = mm->id;
    682          p = model_name + len;
    683          break;
    684        }
    685      }
    686      /* Skip until end-of-line */
    687      while (*p != '\n')
    688        ++p;
    689    next_line: ;
    690    }
    691 
    692    VG_(free)( file_buf );
    693    VG_(debugLog)(1, "machine", "model = %s\n",
    694                  model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
    695                                                   : model_map[model].name);
    696    return model;
    697 }
    698 
    699 #endif /* VGA_s390x */
    700 
    701 #if defined(VGA_mips32) || defined(VGA_mips64)
    702 
    703 /* Read /proc/cpuinfo and return the machine model. */
    704 static UInt VG_(get_machine_model)(void)
    705 {
    706    const char *search_MIPS_str = "MIPS";
    707    const char *search_Broadcom_str = "Broadcom";
    708    const char *search_Netlogic_str = "Netlogic";
    709    const char *search_Cavium_str= "Cavium";
    710    Int    n, fh;
    711    SysRes fd;
    712    SizeT  num_bytes, file_buf_size;
    713    HChar  *file_buf;
    714 
    715    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    716    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    717    if ( sr_isError(fd) ) return -1;
    718 
    719    fh  = sr_Res(fd);
    720 
    721    /* Determine the size of /proc/cpuinfo.
    722       Work around broken-ness in /proc file system implementation.
    723       fstat returns a zero size for /proc/cpuinfo although it is
    724       claimed to be a regular file. */
    725    num_bytes = 0;
    726    file_buf_size = 1000;
    727    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    728    while (42) {
    729       n = VG_(read)(fh, file_buf, file_buf_size);
    730       if (n < 0) break;
    731 
    732       num_bytes += n;
    733       if (n < file_buf_size) break;  /* reached EOF */
    734    }
    735 
    736    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    737 
    738    if (num_bytes > file_buf_size) {
    739       VG_(free)( file_buf );
    740       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    741       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    742       n = VG_(read)( fh, file_buf, num_bytes );
    743       if (n < 0) num_bytes = 0;
    744    }
    745 
    746    file_buf[num_bytes] = '\0';
    747    VG_(close)(fh);
    748 
    749    /* Parse file */
    750    if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
    751        return VEX_PRID_COMP_BROADCOM;
    752    if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
    753        return VEX_PRID_COMP_NETLOGIC;
    754    if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
    755        return VEX_PRID_COMP_CAVIUM;
    756    if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
    757        return VEX_PRID_COMP_MIPS;
    758 
    759    /* Did not find string in the proc file. */
    760    return -1;
    761 }
    762 
    763 #endif
    764 
    765 /* Determine what insn set and insn set variant the host has, and
    766    record it.  To be called once at system startup.  Returns False if
    767    this a CPU incapable of running Valgrind.
    768    Also determine information about the caches on this host. */
    769 
    770 Bool VG_(machine_get_hwcaps)( void )
    771 {
    772    vg_assert(hwcaps_done == False);
    773    hwcaps_done = True;
    774 
    775    // Whack default settings into vai, so that we only need to fill in
    776    // any interesting bits.
    777    LibVEX_default_VexArchInfo(&vai);
    778 
    779 #if defined(VGA_x86)
    780    { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
    781      UInt eax, ebx, ecx, edx, max_extended;
    782      HChar vstr[13];
    783      vstr[0] = 0;
    784 
    785      if (!VG_(has_cpuid)())
    786         /* we can't do cpuid at all.  Give up. */
    787         return False;
    788 
    789      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    790      if (eax < 1)
    791         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    792         return False;
    793 
    794      /* Get processor ID string, and max basic/extended index
    795         values. */
    796      VG_(memcpy)(&vstr[0], &ebx, 4);
    797      VG_(memcpy)(&vstr[4], &edx, 4);
    798      VG_(memcpy)(&vstr[8], &ecx, 4);
    799      vstr[12] = 0;
    800 
    801      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    802      max_extended = eax;
    803 
    804      /* get capabilities bits into edx */
    805      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    806 
    807      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
    808      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
    809      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
    810 
    811      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    812         must simply give up.  But all CPUs since Pentium-I have it, so
    813         that doesn't seem like much of a restriction. */
    814      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    815      if (!have_cx8)
    816         return False;
    817 
    818      /* Figure out if this is an AMD that can do MMXEXT. */
    819      have_mmxext = False;
    820      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
    821          && max_extended >= 0x80000001) {
    822         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    823         /* Some older AMD processors support a sse1 subset (Integer SSE). */
    824         have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
    825      }
    826 
    827      /* Figure out if this is an AMD or Intel that can do LZCNT. */
    828      have_lzcnt = False;
    829      if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
    830           || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
    831          && max_extended >= 0x80000001) {
    832         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    833         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    834      }
    835 
    836      /* Intel processors don't define the mmxext extension, but since it
    837         is just a sse1 subset always define it when we have sse1. */
    838      if (have_sse1)
    839         have_mmxext = True;
    840 
    841      va = VexArchX86;
    842      vai.endness = VexEndnessLE;
    843 
    844      if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
    845         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    846         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    847         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
    848         vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
    849         if (have_lzcnt)
    850            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
    851         VG_(machine_x86_have_mxcsr) = 1;
    852      } else if (have_sse2 && have_sse1 && have_mmxext) {
    853         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    854         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    855         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
    856         if (have_lzcnt)
    857            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
    858         VG_(machine_x86_have_mxcsr) = 1;
    859      } else if (have_sse1 && have_mmxext) {
    860         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
    861         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
    862         VG_(machine_x86_have_mxcsr) = 1;
    863      } else if (have_mmxext) {
    864         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
    865         VG_(machine_x86_have_mxcsr) = 0;
    866      } else {
    867        vai.hwcaps = 0; /*baseline - no sse at all*/
    868        VG_(machine_x86_have_mxcsr) = 0;
    869      }
    870 
    871      VG_(machine_get_cache_info)(&vai);
    872 
    873      return True;
    874    }
    875 
    876 #elif defined(VGA_amd64)
    877    { Bool have_sse3, have_cx8, have_cx16;
    878      Bool have_lzcnt, have_avx, have_bmi, have_avx2;
    879      Bool have_rdtscp;
    880      UInt eax, ebx, ecx, edx, max_basic, max_extended;
    881      HChar vstr[13];
    882      vstr[0] = 0;
    883 
    884      if (!VG_(has_cpuid)())
    885         /* we can't do cpuid at all.  Give up. */
    886         return False;
    887 
    888      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    889      max_basic = eax;
    890      if (max_basic < 1)
    891         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    892         return False;
    893 
    894      /* Get processor ID string, and max basic/extended index
    895         values. */
    896      VG_(memcpy)(&vstr[0], &ebx, 4);
    897      VG_(memcpy)(&vstr[4], &edx, 4);
    898      VG_(memcpy)(&vstr[8], &ecx, 4);
    899      vstr[12] = 0;
    900 
    901      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    902      max_extended = eax;
    903 
    904      /* get capabilities bits into edx */
    905      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    906 
    907      // we assume that SSE1 and SSE2 are available by default
    908      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
    909      // ssse3   is ecx:9
    910      // sse41   is ecx:19
    911      // sse42   is ecx:20
    912 
    913      // osxsave is ecx:27
    914      // avx     is ecx:28
    915      // fma     is ecx:12
    916      have_avx = False;
    917      /* have_fma = False; */
    918      if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
    919         /* processor supports AVX instructions and XGETBV is enabled
    920            by OS */
    921         ULong w;
    922         __asm__ __volatile__("movq $0,%%rcx ; "
    923                              ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
    924                              "movq %%rax,%0"
    925                              :/*OUT*/"=r"(w) :/*IN*/
    926                              :/*TRASH*/"rdx","rcx");
    927         if ((w & 6) == 6) {
    928            /* OS has enabled both XMM and YMM state support */
    929            have_avx = True;
    930            /* have_fma = (ecx & (1<<12)) != 0; */
    931            /* have_fma: Probably correct, but gcc complains due to
    932               unusedness. &*/
    933         }
    934      }
    935 
    936      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    937         must simply give up.  But all CPUs since Pentium-I have it, so
    938         that doesn't seem like much of a restriction. */
    939      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    940      if (!have_cx8)
    941         return False;
    942 
    943      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
    944      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
    945 
    946      /* Figure out if this CPU can do LZCNT. */
    947      have_lzcnt = False;
    948      if (max_extended >= 0x80000001) {
    949         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    950         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    951      }
    952 
    953      /* Can we do RDTSCP? */
    954      have_rdtscp = False;
    955      if (max_extended >= 0x80000001) {
    956         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    957         have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
    958      }
    959 
    960      /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
    961      have_bmi = False;
    962      have_avx2 = False;
    963      if (have_avx && max_basic >= 7) {
    964         VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
    965         have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
    966         have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
    967      }
    968 
    969      va          = VexArchAMD64;
    970      vai.endness = VexEndnessLE;
    971      vai.hwcaps  = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
    972                  | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
    973                  | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
    974                  | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
    975                  | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
    976                  | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
    977                  | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
    978 
    979      VG_(machine_get_cache_info)(&vai);
    980 
    981      return True;
    982    }
    983 
    984 #elif defined(VGA_ppc32)
    985    {
    986      /* Find out which subset of the ppc32 instruction set is supported by
    987         verifying whether various ppc32 instructions generate a SIGILL
    988         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
    989         AT_PLATFORM entries in the ELF auxiliary table -- see also
    990         the_iifii.client_auxv in m_main.c.
    991       */
    992      vki_sigset_t          saved_set, tmp_set;
    993      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
    994      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
    995 
    996      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
    997      volatile Bool have_isa_2_07;
    998      Int r;
    999 
   1000      /* This is a kludge.  Really we ought to back-convert saved_act
   1001         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1002         since that's a no-op on all ppc32 platforms so far supported,
   1003         it's not worth the typing effort.  At least include most basic
   1004         sanity check: */
   1005      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1006 
   1007      VG_(sigemptyset)(&tmp_set);
   1008      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1009      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1010 
   1011      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1012      vg_assert(r == 0);
   1013 
   1014      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1015      vg_assert(r == 0);
   1016      tmp_sigill_act = saved_sigill_act;
   1017 
   1018      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1019      vg_assert(r == 0);
   1020      tmp_sigfpe_act = saved_sigfpe_act;
   1021 
   1022      /* NODEFER: signal handler does not return (from the kernel's point of
   1023         view), hence if it is to successfully catch a signal more than once,
   1024         we need the NODEFER flag. */
   1025      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1026      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1027      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1028      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1029      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1030      vg_assert(r == 0);
   1031 
   1032      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1033      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1034      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1035      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1036      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1037      vg_assert(r == 0);
   1038 
   1039      /* standard FP insns */
   1040      have_F = True;
   1041      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1042         have_F = False;
   1043      } else {
   1044         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
   1045      }
   1046 
   1047      /* Altivec insns */
   1048      have_V = True;
   1049      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1050         have_V = False;
   1051      } else {
   1052         /* Unfortunately some older assemblers don't speak Altivec (or
   1053            choose not to), so to be safe we directly emit the 32-bit
   1054            word corresponding to "vor 0,0,0".  This fixes a build
   1055            problem that happens on Debian 3.1 (ppc32), and probably
   1056            various other places. */
   1057         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
   1058      }
   1059 
   1060      /* General-Purpose optional (fsqrt, fsqrts) */
   1061      have_FX = True;
   1062      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1063         have_FX = False;
   1064      } else {
   1065         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
   1066      }
   1067 
   1068      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
   1069      have_GX = True;
   1070      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1071         have_GX = False;
   1072      } else {
   1073         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
   1074      }
   1075 
   1076      /* VSX support implies Power ISA 2.06 */
   1077      have_VX = True;
   1078      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1079         have_VX = False;
   1080      } else {
   1081         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
   1082      }
   1083 
   1084      /* Check for Decimal Floating Point (DFP) support. */
   1085      have_DFP = True;
   1086      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1087         have_DFP = False;
   1088      } else {
   1089         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
   1090      }
   1091 
   1092      /* Check for ISA 2.07 support. */
   1093      have_isa_2_07 = True;
   1094      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1095         have_isa_2_07 = False;
   1096      } else {
   1097         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
   1098      }
   1099 
   1100      /* determine dcbz/dcbzl sizes while we still have the signal
   1101       * handlers registered */
   1102      find_ppc_dcbz_sz(&vai);
   1103 
   1104      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1105      vg_assert(r == 0);
   1106      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
   1107      vg_assert(r == 0);
   1108      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1109      vg_assert(r == 0);
   1110      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
   1111                     (Int)have_F, (Int)have_V, (Int)have_FX,
   1112                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
   1113                     (Int)have_isa_2_07);
   1114      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
   1115      if (have_V && !have_F)
   1116         have_V = False;
   1117      if (have_FX && !have_F)
   1118         have_FX = False;
   1119      if (have_GX && !have_F)
   1120         have_GX = False;
   1121 
   1122      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
   1123      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
   1124 
   1125      va = VexArchPPC32;
   1126      vai.endness = VexEndnessBE;
   1127 
   1128      vai.hwcaps = 0;
   1129      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
   1130      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
   1131      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
   1132      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
   1133      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
   1134      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
   1135      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
   1136 
   1137      VG_(machine_get_cache_info)(&vai);
   1138 
   1139      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
   1140         called before we're ready to go. */
   1141      return True;
   1142    }
   1143 
   1144 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
   1145    {
   1146      /* Same instruction set detection algorithm as for ppc32. */
   1147      vki_sigset_t          saved_set, tmp_set;
   1148      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1149      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1150 
   1151      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
   1152      volatile Bool have_isa_2_07;
   1153      Int r;
   1154 
   1155      /* This is a kludge.  Really we ought to back-convert saved_act
   1156         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1157         since that's a no-op on all ppc64 platforms so far supported,
   1158         it's not worth the typing effort.  At least include most basic
   1159         sanity check: */
   1160      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1161 
   1162      VG_(sigemptyset)(&tmp_set);
   1163      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1164      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1165 
   1166      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1167      vg_assert(r == 0);
   1168 
   1169      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1170      vg_assert(r == 0);
   1171      tmp_sigill_act = saved_sigill_act;
   1172 
   1173      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1174      tmp_sigfpe_act = saved_sigfpe_act;
   1175 
   1176      /* NODEFER: signal handler does not return (from the kernel's point of
   1177         view), hence if it is to successfully catch a signal more than once,
   1178         we need the NODEFER flag. */
   1179      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1180      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1181      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1182      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1183      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1184 
   1185      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1186      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1187      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1188      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1189      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1190 
   1191      /* standard FP insns */
   1192      have_F = True;
   1193      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1194         have_F = False;
   1195      } else {
   1196         __asm__ __volatile__("fmr 0,0");
   1197      }
   1198 
   1199      /* Altivec insns */
   1200      have_V = True;
   1201      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1202         have_V = False;
   1203      } else {
   1204         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
   1205      }
   1206 
   1207      /* General-Purpose optional (fsqrt, fsqrts) */
   1208      have_FX = True;
   1209      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1210         have_FX = False;
   1211      } else {
   1212         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
   1213      }
   1214 
   1215      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
   1216      have_GX = True;
   1217      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1218         have_GX = False;
   1219      } else {
   1220         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
   1221      }
   1222 
   1223      /* VSX support implies Power ISA 2.06 */
   1224      have_VX = True;
   1225      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1226         have_VX = False;
   1227      } else {
   1228         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
   1229      }
   1230 
   1231      /* Check for Decimal Floating Point (DFP) support. */
   1232      have_DFP = True;
   1233      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1234         have_DFP = False;
   1235      } else {
   1236         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
   1237      }
   1238 
   1239      /* Check for ISA 2.07 support. */
   1240      have_isa_2_07 = True;
   1241      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1242         have_isa_2_07 = False;
   1243      } else {
   1244         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
   1245      }
   1246 
   1247      /* determine dcbz/dcbzl sizes while we still have the signal
   1248       * handlers registered */
   1249      find_ppc_dcbz_sz(&vai);
   1250 
   1251      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1252      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
   1253      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1254      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
   1255                     (Int)have_F, (Int)have_V, (Int)have_FX,
   1256                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
   1257                     (Int)have_isa_2_07);
   1258      /* on ppc64be, if we don't even have FP, just give up. */
   1259      if (!have_F)
   1260         return False;
   1261 
   1262      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
   1263 
   1264      va = VexArchPPC64;
   1265 #    if defined(VKI_LITTLE_ENDIAN)
   1266      vai.endness = VexEndnessLE;
   1267 #    elif defined(VKI_BIG_ENDIAN)
   1268      vai.endness = VexEndnessBE;
   1269 #    else
   1270      vai.endness = VexEndness_INVALID;
   1271 #    endif
   1272 
   1273      vai.hwcaps = 0;
   1274      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
   1275      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
   1276      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
   1277      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
   1278      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
   1279      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
   1280 
   1281      VG_(machine_get_cache_info)(&vai);
   1282 
   1283      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
   1284         called before we're ready to go. */
   1285      return True;
   1286    }
   1287 
   1288 #elif defined(VGA_s390x)
   1289 
   1290 #  include "libvex_s390x_common.h"
   1291 
   1292    {
   1293      /* Instruction set detection code borrowed from ppc above. */
   1294      vki_sigset_t          saved_set, tmp_set;
   1295      vki_sigaction_fromK_t saved_sigill_act;
   1296      vki_sigaction_toK_t     tmp_sigill_act;
   1297 
   1298      volatile Bool have_LDISP, have_STFLE;
   1299      Int i, r, model;
   1300 
   1301      /* If the model is "unknown" don't treat this as an error. Assume
   1302         this is a brand-new machine model for which we don't have the
   1303         identification yet. Keeping fingers crossed. */
   1304      model = VG_(get_machine_model)();
   1305 
   1306      /* Unblock SIGILL and stash away the old action for that signal */
   1307      VG_(sigemptyset)(&tmp_set);
   1308      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1309 
   1310      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1311      vg_assert(r == 0);
   1312 
   1313      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1314      vg_assert(r == 0);
   1315      tmp_sigill_act = saved_sigill_act;
   1316 
   1317      /* NODEFER: signal handler does not return (from the kernel's point of
   1318         view), hence if it is to successfully catch a signal more than once,
   1319         we need the NODEFER flag. */
   1320      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1321      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1322      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1323      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1324      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1325 
   1326      /* Determine hwcaps. Note, we cannot use the stfle insn because it
   1327         is not supported on z900. */
   1328 
   1329      have_LDISP = True;
   1330      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1331         have_LDISP = False;
   1332      } else {
   1333        /* BASR loads the address of the next insn into r1. Needed to avoid
   1334           a segfault in XY. */
   1335         __asm__ __volatile__("basr %%r1,%%r0\n\t"
   1336                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
   1337                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
   1338      }
   1339 
   1340      /* Check availability of STFLE. If available store facility bits
   1341         in hoststfle. */
   1342      ULong hoststfle[S390_NUM_FACILITY_DW];
   1343 
   1344      for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
   1345         hoststfle[i] = 0;
   1346 
   1347      have_STFLE = True;
   1348      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1349         have_STFLE = False;
   1350      } else {
   1351          register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
   1352 
   1353          __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
   1354                               : "=m" (hoststfle), "+d"(reg0)
   1355                               : : "cc", "memory");
   1356      }
   1357 
   1358      /* Restore signals */
   1359      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1360      vg_assert(r == 0);
   1361      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1362      vg_assert(r == 0);
   1363      va = VexArchS390X;
   1364      vai.endness = VexEndnessBE;
   1365 
   1366      vai.hwcaps = model;
   1367      if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
   1368      if (have_LDISP) {
   1369         /* Use long displacement only on machines >= z990. For all other
   1370            machines it is millicoded and therefore slow. */
   1371         if (model >= VEX_S390X_MODEL_Z990)
   1372            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
   1373      }
   1374 
   1375      /* Detect presence of certain facilities using the STFLE insn.
   1376         Note, that these facilities were introduced at the same time or later
   1377         as STFLE, so the absence of STLFE implies the absence of the facility
   1378         we're trying to detect. */
   1379      struct fac_hwcaps_map {
   1380         UInt installed;
   1381         UInt facility_bit;
   1382         UInt hwcaps_bit;
   1383         const HChar name[6];   // may need adjustment for new facility names
   1384      } fac_hwcaps[] = {
   1385         { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
   1386         { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
   1387         { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
   1388         { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
   1389         { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
   1390         { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
   1391         { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
   1392         { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
   1393         { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
   1394         { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
   1395      };
   1396 
   1397      /* Set hwcaps according to the detected facilities */
   1398      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
   1399         vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
   1400         if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
   1401            fac_hwcaps[i].installed = True;
   1402            vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
   1403         }
   1404      }
   1405 
   1406      /* Build up a string showing the probed-for facilities */
   1407      HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
   1408                    (sizeof fac_hwcaps[0].name + 3) + //  %s %d
   1409                    7 + 1 + 4 + 2  // machine %4d
   1410                    + 1];  // \0
   1411      HChar *p = fac_str;
   1412      p += VG_(sprintf)(p, "machine %4d  ", model);
   1413      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
   1414         p += VG_(sprintf)(p, " %s %1d", fac_hwcaps[i].name,
   1415                           fac_hwcaps[i].installed);
   1416      }
   1417      *p++ = '\0';
   1418 
   1419      VG_(debugLog)(1, "machine", "%s\n", fac_str);
   1420      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1421 
   1422      VG_(machine_get_cache_info)(&vai);
   1423 
   1424      return True;
   1425    }
   1426 
   1427 #elif defined(VGA_arm)
   1428    {
   1429      /* Same instruction set detection algorithm as for ppc32. */
   1430      vki_sigset_t          saved_set, tmp_set;
   1431      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1432      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1433 
   1434      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
   1435      volatile Int archlevel;
   1436      Int r;
   1437 
   1438      /* This is a kludge.  Really we ought to back-convert saved_act
   1439         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1440         since that's a no-op on all ppc64 platforms so far supported,
   1441         it's not worth the typing effort.  At least include most basic
   1442         sanity check: */
   1443      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1444 
   1445      VG_(sigemptyset)(&tmp_set);
   1446      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1447      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1448 
   1449      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1450      vg_assert(r == 0);
   1451 
   1452      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1453      vg_assert(r == 0);
   1454      tmp_sigill_act = saved_sigill_act;
   1455 
   1456      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1457      tmp_sigfpe_act = saved_sigfpe_act;
   1458 
   1459      /* NODEFER: signal handler does not return (from the kernel's point of
   1460         view), hence if it is to successfully catch a signal more than once,
   1461         we need the NODEFER flag. */
   1462      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1463      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1464      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1465      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1466      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1467 
   1468      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1469      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1470      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1471      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1472      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1473 
   1474      /* VFP insns */
   1475      have_VFP = True;
   1476      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1477         have_VFP = False;
   1478      } else {
   1479         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
   1480      }
   1481      /* There are several generation of VFP extension but they differs very
   1482         little so for now we will not distinguish them. */
   1483      have_VFP2 = have_VFP;
   1484      have_VFP3 = have_VFP;
   1485 
   1486      /* NEON insns */
   1487      have_NEON = True;
   1488      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1489         have_NEON = False;
   1490      } else {
   1491         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
   1492      }
   1493 
   1494      /* ARM architecture level */
   1495      archlevel = 5; /* v5 will be base level */
   1496      if (archlevel < 7) {
   1497         archlevel = 7;
   1498         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1499            archlevel = 5;
   1500         } else {
   1501            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
   1502         }
   1503      }
   1504      if (archlevel < 6) {
   1505         archlevel = 6;
   1506         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1507            archlevel = 5;
   1508         } else {
   1509            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
   1510         }
   1511      }
   1512 
   1513      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1514      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
   1515      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1516      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1517      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1518 
   1519      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
   1520            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
   1521            (Int)have_NEON);
   1522 
   1523      VG_(machine_arm_archlevel) = archlevel;
   1524 
   1525      va = VexArchARM;
   1526      vai.endness = VexEndnessLE;
   1527 
   1528      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
   1529      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
   1530      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
   1531      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
   1532      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1533 
   1534      VG_(machine_get_cache_info)(&vai);
   1535 
   1536      return True;
   1537    }
   1538 
   1539 #elif defined(VGA_arm64)
   1540    {
   1541      va = VexArchARM64;
   1542      vai.endness = VexEndnessLE;
   1543 
   1544      /* So far there are no variants. */
   1545      vai.hwcaps = 0;
   1546 
   1547      VG_(machine_get_cache_info)(&vai);
   1548 
   1549      /* 0 denotes 'not set'.  The range of legitimate values here,
   1550         after being set that is, is 2 though 17 inclusive. */
   1551      vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
   1552      vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
   1553      ULong ctr_el0;
   1554      __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
   1555      vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
   1556      vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
   1557      VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
   1558                       "ctr_el0.iMinLine_szB = %d\n",
   1559                    1 << vai.arm64_dMinLine_lg2_szB,
   1560                    1 << vai.arm64_iMinLine_lg2_szB);
   1561 
   1562      return True;
   1563    }
   1564 
   1565 #elif defined(VGA_mips32)
   1566    {
   1567      /* Define the position of F64 bit in FIR register. */
   1568 #    define FP64 22
   1569      va = VexArchMIPS32;
   1570      UInt model = VG_(get_machine_model)();
   1571      if (model == -1)
   1572          return False;
   1573 
   1574      vai.hwcaps = model;
   1575 
   1576 #    if defined(VKI_LITTLE_ENDIAN)
   1577      vai.endness = VexEndnessLE;
   1578 #    elif defined(VKI_BIG_ENDIAN)
   1579      vai.endness = VexEndnessBE;
   1580 #    else
   1581      vai.endness = VexEndness_INVALID;
   1582 #    endif
   1583 
   1584      /* Same instruction set detection algorithm as for ppc32/arm... */
   1585      vki_sigset_t          saved_set, tmp_set;
   1586      vki_sigaction_fromK_t saved_sigill_act;
   1587      vki_sigaction_toK_t   tmp_sigill_act;
   1588 
   1589      volatile Bool have_DSP, have_DSPr2;
   1590      Int r;
   1591 
   1592      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1593 
   1594      VG_(sigemptyset)(&tmp_set);
   1595      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1596 
   1597      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1598      vg_assert(r == 0);
   1599 
   1600      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1601      vg_assert(r == 0);
   1602      tmp_sigill_act = saved_sigill_act;
   1603 
   1604      /* NODEFER: signal handler does not return (from the kernel's point of
   1605         view), hence if it is to successfully catch a signal more than once,
   1606         we need the NODEFER flag. */
   1607      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1608      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1609      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1610      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1611      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1612 
   1613      if (model == VEX_PRID_COMP_MIPS) {
   1614         /* DSPr2 instructions. */
   1615         have_DSPr2 = True;
   1616         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1617            have_DSPr2 = False;
   1618         } else {
   1619            __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
   1620         }
   1621         if (have_DSPr2) {
   1622            /* We assume it's 74K, since it can run DSPr2. */
   1623            vai.hwcaps |= VEX_PRID_IMP_74K;
   1624         } else {
   1625            /* DSP instructions. */
   1626            have_DSP = True;
   1627            if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1628               have_DSP = False;
   1629            } else {
   1630               __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
   1631            }
   1632            if (have_DSP) {
   1633               /* We assume it's 34K, since it has support for DSP. */
   1634               vai.hwcaps |= VEX_PRID_IMP_34K;
   1635            }
   1636         }
   1637      }
   1638 
   1639      /* Check if CPU has FPU and 32 dbl. prec. FP registers */
   1640      int FIR = 0;
   1641      __asm__ __volatile__(
   1642         "cfc1 %0, $0"  "\n\t"
   1643         : "=r" (FIR)
   1644      );
   1645      if (FIR & (1 << FP64)) {
   1646         vai.hwcaps |= VEX_PRID_CPU_32FPR;
   1647      }
   1648 
   1649      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1650      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1651      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1652 
   1653      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1654      VG_(machine_get_cache_info)(&vai);
   1655 
   1656      return True;
   1657    }
   1658 
   1659 #elif defined(VGA_mips64)
   1660    {
   1661      va = VexArchMIPS64;
   1662      UInt model = VG_(get_machine_model)();
   1663      if (model == -1)
   1664          return False;
   1665 
   1666      vai.hwcaps = model;
   1667 
   1668 #    if defined(VKI_LITTLE_ENDIAN)
   1669      vai.endness = VexEndnessLE;
   1670 #    elif defined(VKI_BIG_ENDIAN)
   1671      vai.endness = VexEndnessBE;
   1672 #    else
   1673      vai.endness = VexEndness_INVALID;
   1674 #    endif
   1675 
   1676      VG_(machine_get_cache_info)(&vai);
   1677 
   1678      return True;
   1679    }
   1680 
   1681 #elif defined(VGA_tilegx)
   1682    {
   1683      va = VexArchTILEGX;
   1684      vai.hwcaps = VEX_HWCAPS_TILEGX_BASE;
   1685      vai.endness = VexEndnessLE;
   1686 
   1687      VG_(machine_get_cache_info)(&vai);
   1688 
   1689      return True;
   1690    }
   1691 
   1692 #else
   1693 #  error "Unknown arch"
   1694 #endif
   1695 }
   1696 
   1697 /* Notify host cpu instruction cache line size. */
   1698 #if defined(VGA_ppc32)
   1699 void VG_(machine_ppc32_set_clszB)( Int szB )
   1700 {
   1701    vg_assert(hwcaps_done);
   1702 
   1703    /* Either the value must not have been set yet (zero) or we can
   1704       tolerate it being set to the same value multiple times, as the
   1705       stack scanning logic in m_main is a bit stupid. */
   1706    vg_assert(vai.ppc_icache_line_szB == 0
   1707              || vai.ppc_icache_line_szB == szB);
   1708 
   1709    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
   1710    vai.ppc_icache_line_szB = szB;
   1711 }
   1712 #endif
   1713 
   1714 
   1715 /* Notify host cpu instruction cache line size. */
   1716 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
   1717 void VG_(machine_ppc64_set_clszB)( Int szB )
   1718 {
   1719    vg_assert(hwcaps_done);
   1720 
   1721    /* Either the value must not have been set yet (zero) or we can
   1722       tolerate it being set to the same value multiple times, as the
   1723       stack scanning logic in m_main is a bit stupid. */
   1724    vg_assert(vai.ppc_icache_line_szB == 0
   1725              || vai.ppc_icache_line_szB == szB);
   1726 
   1727    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
   1728    vai.ppc_icache_line_szB = szB;
   1729 }
   1730 #endif
   1731 
   1732 
   1733 /* Notify host's ability to handle NEON instructions. */
   1734 #if defined(VGA_arm)
   1735 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
   1736 {
   1737    vg_assert(hwcaps_done);
   1738    /* There's nothing else we can sanity check. */
   1739 
   1740    if (has_neon) {
   1741       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1742    } else {
   1743       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
   1744    }
   1745 }
   1746 #endif
   1747 
   1748 
   1749 /* Fetch host cpu info, once established. */
   1750 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
   1751                                    /*OUT*/VexArchInfo* pVai )
   1752 {
   1753    vg_assert(hwcaps_done);
   1754    if (pVa)  *pVa  = va;
   1755    if (pVai) *pVai = vai;
   1756 }
   1757 
   1758 
   1759 /* Returns the size of the largest guest register that we will
   1760    simulate in this run.  This depends on both the guest architecture
   1761    and on the specific capabilities we are simulating for that guest
   1762    (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
   1763    or 32.  General rule: if in doubt, return a value larger than
   1764    reality.
   1765 
   1766    This information is needed by Cachegrind and Callgrind to decide
   1767    what the minimum cache line size they are prepared to simulate is.
   1768    Basically require that the minimum cache line size is at least as
   1769    large as the largest register that might get transferred to/from
   1770    memory, so as to guarantee that any such transaction can straddle
   1771    at most 2 cache lines.
   1772 */
   1773 Int VG_(machine_get_size_of_largest_guest_register) ( void )
   1774 {
   1775    vg_assert(hwcaps_done);
   1776    /* Once hwcaps_done is True, we can fish around inside va/vai to
   1777       find the information we need. */
   1778 
   1779 #  if defined(VGA_x86)
   1780    vg_assert(va == VexArchX86);
   1781    /* We don't support AVX, so 32 is out.  At the other end, even if
   1782       we don't support any SSE, the X87 can generate 10 byte
   1783       transfers, so let's say 16 to be on the safe side.  Hence the
   1784       answer is always 16. */
   1785    return 16;
   1786 
   1787 #  elif defined(VGA_amd64)
   1788    /* if AVX then 32 else 16 */
   1789    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
   1790 
   1791 #  elif defined(VGA_ppc32)
   1792    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1793    if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
   1794    if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
   1795    if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
   1796    return 8;
   1797 
   1798 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
   1799    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1800    if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
   1801    if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
   1802    if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
   1803    return 8;
   1804 
   1805 #  elif defined(VGA_s390x)
   1806    return 8;
   1807 
   1808 #  elif defined(VGA_arm)
   1809    /* Really it depends whether or not we have NEON, but let's just
   1810       assume we always do. */
   1811    return 16;
   1812 
   1813 #  elif defined(VGA_arm64)
   1814    /* ARM64 always has Neon, AFAICS. */
   1815    return 16;
   1816 
   1817 #  elif defined(VGA_mips32)
   1818    /* The guest state implies 4, but that can't really be true, can
   1819       it? */
   1820    return 8;
   1821 
   1822 #  elif defined(VGA_mips64)
   1823    return 8;
   1824 
   1825 #  elif defined(VGA_tilegx)
   1826    return 8;
   1827 
   1828 #  else
   1829 #    error "Unknown arch"
   1830 #  endif
   1831 }
   1832 
   1833 
   1834 // Given a pointer to a function as obtained by "& functionname" in C,
   1835 // produce a pointer to the actual entry point for the function.
   1836 void* VG_(fnptr_to_fnentry)( void* f )
   1837 {
   1838 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
   1839       || defined(VGP_arm_linux) || defined(VGO_darwin)          \
   1840       || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
   1841       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
   1842       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
   1843       || defined(VGP_tilegx_linux)
   1844    return f;
   1845 #  elif defined(VGP_ppc64be_linux)
   1846    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
   1847       3-word function descriptor, of which the first word is the entry
   1848       address. */
   1849    UWord* descr = (UWord*)f;
   1850    return (void*)(descr[0]);
   1851 #  else
   1852 #    error "Unknown platform"
   1853 #  endif
   1854 }
   1855 
   1856 /*--------------------------------------------------------------------*/
   1857 /*--- end                                                          ---*/
   1858 /*--------------------------------------------------------------------*/
   1859