Home | History | Annotate | Download | only in coregrind
      1 /*--------------------------------------------------------------------*/
      2 /*--- Machine-related stuff.                           m_machine.c ---*/
      3 /*--------------------------------------------------------------------*/
      4 
      5 /*
      6    This file is part of Valgrind, a dynamic binary instrumentation
      7    framework.
      8 
      9    Copyright (C) 2000-2012 Julian Seward
     10       jseward (at) acm.org
     11 
     12    This program is free software; you can redistribute it and/or
     13    modify it under the terms of the GNU General Public License as
     14    published by the Free Software Foundation; either version 2 of the
     15    License, or (at your option) any later version.
     16 
     17    This program is distributed in the hope that it will be useful, but
     18    WITHOUT ANY WARRANTY; without even the implied warranty of
     19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20    General Public License for more details.
     21 
     22    You should have received a copy of the GNU General Public License
     23    along with this program; if not, write to the Free Software
     24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     25    02111-1307, USA.
     26 
     27    The GNU General Public License is contained in the file COPYING.
     28 */
     29 
     30 #include "pub_core_basics.h"
     31 #include "pub_core_vki.h"
     32 #include "pub_core_libcsetjmp.h"   // setjmp facilities
     33 #include "pub_core_threadstate.h"
     34 #include "pub_core_libcassert.h"
     35 #include "pub_core_libcbase.h"
     36 #include "pub_core_libcfile.h"
     37 #include "pub_core_mallocfree.h"
     38 #include "pub_core_machine.h"
     39 #include "pub_core_cpuid.h"
     40 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
     41 #include "pub_core_debuglog.h"
     42 
     43 
     44 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
     45 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
     46 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
     47 
     48 Addr VG_(get_IP) ( ThreadId tid ) {
     49    return INSTR_PTR( VG_(threads)[tid].arch );
     50 }
     51 Addr VG_(get_SP) ( ThreadId tid ) {
     52    return STACK_PTR( VG_(threads)[tid].arch );
     53 }
     54 Addr VG_(get_FP) ( ThreadId tid ) {
     55    return FRAME_PTR( VG_(threads)[tid].arch );
     56 }
     57 
     58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
     59    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
     60 }
     61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
     62    STACK_PTR( VG_(threads)[tid].arch ) = sp;
     63 }
     64 
     65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
     66                                 ThreadId tid )
     67 {
     68 #  if defined(VGA_x86)
     69    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
     70    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
     71    regs->misc.X86.r_ebp
     72       = VG_(threads)[tid].arch.vex.guest_EBP;
     73 #  elif defined(VGA_amd64)
     74    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
     75    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
     76    regs->misc.AMD64.r_rbp
     77       = VG_(threads)[tid].arch.vex.guest_RBP;
     78 #  elif defined(VGA_ppc32)
     79    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
     80    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
     81    regs->misc.PPC32.r_lr
     82       = VG_(threads)[tid].arch.vex.guest_LR;
     83 #  elif defined(VGA_ppc64)
     84    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
     85    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
     86    regs->misc.PPC64.r_lr
     87       = VG_(threads)[tid].arch.vex.guest_LR;
     88 #  elif defined(VGA_arm)
     89    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
     90    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
     91    regs->misc.ARM.r14
     92       = VG_(threads)[tid].arch.vex.guest_R14;
     93    regs->misc.ARM.r12
     94       = VG_(threads)[tid].arch.vex.guest_R12;
     95    regs->misc.ARM.r11
     96       = VG_(threads)[tid].arch.vex.guest_R11;
     97    regs->misc.ARM.r7
     98       = VG_(threads)[tid].arch.vex.guest_R7;
     99 #  elif defined(VGA_s390x)
    100    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
    101    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
    102    regs->misc.S390X.r_fp
    103       = VG_(threads)[tid].arch.vex.guest_r11;
    104    regs->misc.S390X.r_lr
    105       = VG_(threads)[tid].arch.vex.guest_r14;
    106 #  elif defined(VGA_mips32)
    107    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
    108    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
    109    regs->misc.MIPS32.r30
    110       = VG_(threads)[tid].arch.vex.guest_r30;
    111    regs->misc.MIPS32.r31
    112       = VG_(threads)[tid].arch.vex.guest_r31;
    113    regs->misc.MIPS32.r28
    114       = VG_(threads)[tid].arch.vex.guest_r28;
    115 #  else
    116 #    error "Unknown arch"
    117 #  endif
    118 }
    119 
    120 
    121 void VG_(set_syscall_return_shadows) ( ThreadId tid,
    122                                        /* shadow vals for the result */
    123                                        UWord s1res, UWord s2res,
    124                                        /* shadow vals for the error val */
    125                                        UWord s1err, UWord s2err )
    126 {
    127 #  if defined(VGP_x86_linux)
    128    VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
    129    VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
    130 #  elif defined(VGP_amd64_linux)
    131    VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
    132    VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
    133 #  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
    134    VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
    135    VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
    136 #  elif defined(VGP_arm_linux)
    137    VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
    138    VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
    139 #  elif defined(VGO_darwin)
    140    // GrP fixme darwin syscalls may return more values (2 registers plus error)
    141 #  elif defined(VGP_s390x_linux)
    142    VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
    143    VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
    144 #  elif defined(VGP_mips32_linux)
    145    VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
    146    VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
    147 #  else
    148 #    error "Unknown plat"
    149 #  endif
    150 }
    151 
    152 void
    153 VG_(get_shadow_regs_area) ( ThreadId tid,
    154                             /*DST*/UChar* dst,
    155                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
    156 {
    157    void*        src;
    158    ThreadState* tst;
    159    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    160    vg_assert(VG_(is_valid_tid)(tid));
    161    // Bounds check
    162    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    163    vg_assert(offset + size <= sizeof(VexGuestArchState));
    164    // Copy
    165    tst = & VG_(threads)[tid];
    166    src = NULL;
    167    switch (shadowNo) {
    168       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    169       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    170       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    171    }
    172    tl_assert(src != NULL);
    173    VG_(memcpy)( dst, src, size);
    174 }
    175 
    176 void
    177 VG_(set_shadow_regs_area) ( ThreadId tid,
    178                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
    179                             /*SRC*/const UChar* src )
    180 {
    181    void*        dst;
    182    ThreadState* tst;
    183    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    184    vg_assert(VG_(is_valid_tid)(tid));
    185    // Bounds check
    186    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    187    vg_assert(offset + size <= sizeof(VexGuestArchState));
    188    // Copy
    189    tst = & VG_(threads)[tid];
    190    dst = NULL;
    191    switch (shadowNo) {
    192       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
    193       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
    194       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
    195    }
    196    tl_assert(dst != NULL);
    197    VG_(memcpy)( dst, src, size);
    198 }
    199 
    200 
    201 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, HChar*, Addr))
    202 {
    203    VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
    204 #if defined(VGA_x86)
    205    (*f)(tid, "EAX", vex->guest_EAX);
    206    (*f)(tid, "ECX", vex->guest_ECX);
    207    (*f)(tid, "EDX", vex->guest_EDX);
    208    (*f)(tid, "EBX", vex->guest_EBX);
    209    (*f)(tid, "ESI", vex->guest_ESI);
    210    (*f)(tid, "EDI", vex->guest_EDI);
    211    (*f)(tid, "ESP", vex->guest_ESP);
    212    (*f)(tid, "EBP", vex->guest_EBP);
    213 #elif defined(VGA_amd64)
    214    (*f)(tid, "RAX", vex->guest_RAX);
    215    (*f)(tid, "RCX", vex->guest_RCX);
    216    (*f)(tid, "RDX", vex->guest_RDX);
    217    (*f)(tid, "RBX", vex->guest_RBX);
    218    (*f)(tid, "RSI", vex->guest_RSI);
    219    (*f)(tid, "RDI", vex->guest_RDI);
    220    (*f)(tid, "RSP", vex->guest_RSP);
    221    (*f)(tid, "RBP", vex->guest_RBP);
    222    (*f)(tid, "R8" , vex->guest_R8 );
    223    (*f)(tid, "R9" , vex->guest_R9 );
    224    (*f)(tid, "R10", vex->guest_R10);
    225    (*f)(tid, "R11", vex->guest_R11);
    226    (*f)(tid, "R12", vex->guest_R12);
    227    (*f)(tid, "R13", vex->guest_R13);
    228    (*f)(tid, "R14", vex->guest_R14);
    229    (*f)(tid, "R15", vex->guest_R15);
    230 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
    231    (*f)(tid, "GPR0" , vex->guest_GPR0 );
    232    (*f)(tid, "GPR1" , vex->guest_GPR1 );
    233    (*f)(tid, "GPR2" , vex->guest_GPR2 );
    234    (*f)(tid, "GPR3" , vex->guest_GPR3 );
    235    (*f)(tid, "GPR4" , vex->guest_GPR4 );
    236    (*f)(tid, "GPR5" , vex->guest_GPR5 );
    237    (*f)(tid, "GPR6" , vex->guest_GPR6 );
    238    (*f)(tid, "GPR7" , vex->guest_GPR7 );
    239    (*f)(tid, "GPR8" , vex->guest_GPR8 );
    240    (*f)(tid, "GPR9" , vex->guest_GPR9 );
    241    (*f)(tid, "GPR10", vex->guest_GPR10);
    242    (*f)(tid, "GPR11", vex->guest_GPR11);
    243    (*f)(tid, "GPR12", vex->guest_GPR12);
    244    (*f)(tid, "GPR13", vex->guest_GPR13);
    245    (*f)(tid, "GPR14", vex->guest_GPR14);
    246    (*f)(tid, "GPR15", vex->guest_GPR15);
    247    (*f)(tid, "GPR16", vex->guest_GPR16);
    248    (*f)(tid, "GPR17", vex->guest_GPR17);
    249    (*f)(tid, "GPR18", vex->guest_GPR18);
    250    (*f)(tid, "GPR19", vex->guest_GPR19);
    251    (*f)(tid, "GPR20", vex->guest_GPR20);
    252    (*f)(tid, "GPR21", vex->guest_GPR21);
    253    (*f)(tid, "GPR22", vex->guest_GPR22);
    254    (*f)(tid, "GPR23", vex->guest_GPR23);
    255    (*f)(tid, "GPR24", vex->guest_GPR24);
    256    (*f)(tid, "GPR25", vex->guest_GPR25);
    257    (*f)(tid, "GPR26", vex->guest_GPR26);
    258    (*f)(tid, "GPR27", vex->guest_GPR27);
    259    (*f)(tid, "GPR28", vex->guest_GPR28);
    260    (*f)(tid, "GPR29", vex->guest_GPR29);
    261    (*f)(tid, "GPR30", vex->guest_GPR30);
    262    (*f)(tid, "GPR31", vex->guest_GPR31);
    263    (*f)(tid, "CTR"  , vex->guest_CTR  );
    264    (*f)(tid, "LR"   , vex->guest_LR   );
    265 #elif defined(VGA_arm)
    266    (*f)(tid, "R0" , vex->guest_R0 );
    267    (*f)(tid, "R1" , vex->guest_R1 );
    268    (*f)(tid, "R2" , vex->guest_R2 );
    269    (*f)(tid, "R3" , vex->guest_R3 );
    270    (*f)(tid, "R4" , vex->guest_R4 );
    271    (*f)(tid, "R5" , vex->guest_R5 );
    272    (*f)(tid, "R6" , vex->guest_R6 );
    273    (*f)(tid, "R8" , vex->guest_R8 );
    274    (*f)(tid, "R9" , vex->guest_R9 );
    275    (*f)(tid, "R10", vex->guest_R10);
    276    (*f)(tid, "R11", vex->guest_R11);
    277    (*f)(tid, "R12", vex->guest_R12);
    278    (*f)(tid, "R13", vex->guest_R13);
    279    (*f)(tid, "R14", vex->guest_R14);
    280 #elif defined(VGA_s390x)
    281    (*f)(tid, "r0" , vex->guest_r0 );
    282    (*f)(tid, "r1" , vex->guest_r1 );
    283    (*f)(tid, "r2" , vex->guest_r2 );
    284    (*f)(tid, "r3" , vex->guest_r3 );
    285    (*f)(tid, "r4" , vex->guest_r4 );
    286    (*f)(tid, "r5" , vex->guest_r5 );
    287    (*f)(tid, "r6" , vex->guest_r6 );
    288    (*f)(tid, "r7" , vex->guest_r7 );
    289    (*f)(tid, "r8" , vex->guest_r8 );
    290    (*f)(tid, "r9" , vex->guest_r9 );
    291    (*f)(tid, "r10", vex->guest_r10);
    292    (*f)(tid, "r11", vex->guest_r11);
    293    (*f)(tid, "r12", vex->guest_r12);
    294    (*f)(tid, "r13", vex->guest_r13);
    295    (*f)(tid, "r14", vex->guest_r14);
    296    (*f)(tid, "r15", vex->guest_r15);
    297 #elif defined(VGA_mips32)
    298    (*f)(tid, "r0" , vex->guest_r0 );
    299    (*f)(tid, "r1" , vex->guest_r1 );
    300    (*f)(tid, "r2" , vex->guest_r2 );
    301    (*f)(tid, "r3" , vex->guest_r3 );
    302    (*f)(tid, "r4" , vex->guest_r4 );
    303    (*f)(tid, "r5" , vex->guest_r5 );
    304    (*f)(tid, "r6" , vex->guest_r6 );
    305    (*f)(tid, "r7" , vex->guest_r7 );
    306    (*f)(tid, "r8" , vex->guest_r8 );
    307    (*f)(tid, "r9" , vex->guest_r9 );
    308    (*f)(tid, "r10", vex->guest_r10);
    309    (*f)(tid, "r11", vex->guest_r11);
    310    (*f)(tid, "r12", vex->guest_r12);
    311    (*f)(tid, "r13", vex->guest_r13);
    312    (*f)(tid, "r14", vex->guest_r14);
    313    (*f)(tid, "r15", vex->guest_r15);
    314    (*f)(tid, "r16", vex->guest_r16);
    315    (*f)(tid, "r17", vex->guest_r17);
    316    (*f)(tid, "r18", vex->guest_r18);
    317    (*f)(tid, "r19", vex->guest_r19);
    318    (*f)(tid, "r20", vex->guest_r20);
    319    (*f)(tid, "r21", vex->guest_r21);
    320    (*f)(tid, "r22", vex->guest_r22);
    321    (*f)(tid, "r23", vex->guest_r23);
    322    (*f)(tid, "r24", vex->guest_r24);
    323    (*f)(tid, "r25", vex->guest_r25);
    324    (*f)(tid, "r26", vex->guest_r26);
    325    (*f)(tid, "r27", vex->guest_r27);
    326    (*f)(tid, "r28", vex->guest_r28);
    327    (*f)(tid, "r29", vex->guest_r29);
    328    (*f)(tid, "r30", vex->guest_r30);
    329    (*f)(tid, "r31", vex->guest_r31);
    330 #else
    331 #  error Unknown arch
    332 #endif
    333 }
    334 
    335 
    336 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, HChar*, UWord))
    337 {
    338    ThreadId tid;
    339 
    340    for (tid = 1; tid < VG_N_THREADS; tid++) {
    341       if (VG_(is_valid_tid)(tid)) {
    342          apply_to_GPs_of_tid(tid, f);
    343       }
    344    }
    345 }
    346 
    347 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
    348 {
    349    *tid = (ThreadId)(-1);
    350 }
    351 
    352 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
    353                             /*OUT*/Addr* stack_min,
    354                             /*OUT*/Addr* stack_max)
    355 {
    356    ThreadId i;
    357    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
    358       if (i == VG_INVALID_THREADID)
    359          continue;
    360       if (VG_(threads)[i].status != VgTs_Empty) {
    361          *tid       = i;
    362          *stack_min = VG_(get_SP)(i);
    363          *stack_max = VG_(threads)[i].client_stack_highest_word;
    364          return True;
    365       }
    366    }
    367    return False;
    368 }
    369 
    370 Addr VG_(thread_get_stack_max)(ThreadId tid)
    371 {
    372    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    373    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    374    return VG_(threads)[tid].client_stack_highest_word;
    375 }
    376 
    377 SizeT VG_(thread_get_stack_size)(ThreadId tid)
    378 {
    379    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    380    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    381    return VG_(threads)[tid].client_stack_szB;
    382 }
    383 
    384 Addr VG_(thread_get_altstack_min)(ThreadId tid)
    385 {
    386    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    387    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    388    return (Addr)VG_(threads)[tid].altstack.ss_sp;
    389 }
    390 
    391 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
    392 {
    393    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
    394    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
    395    return VG_(threads)[tid].altstack.ss_size;
    396 }
    397 
    398 //-------------------------------------------------------------
    399 /* Details about the capabilities of the underlying (host) CPU.  These
    400    details are acquired by (1) enquiring with the CPU at startup, or
    401    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
    402    line size).  It's a bit nasty in the sense that there's no obvious
    403    way to stop uses of some of this info before it's ready to go.
    404    See pub_core_machine.h for more information about that.
    405 
    406    VG_(machine_get_hwcaps) may use signals (although it attempts to
    407    leave signal state unchanged) and therefore should only be
    408    called before m_main sets up the client's signal state.
    409 */
    410 
    411 /* --------- State --------- */
    412 static Bool hwcaps_done = False;
    413 
    414 /* --- all archs --- */
    415 static VexArch     va = VexArch_INVALID;
    416 static VexArchInfo vai;
    417 
    418 #if defined(VGA_x86)
    419 UInt VG_(machine_x86_have_mxcsr) = 0;
    420 #endif
    421 #if defined(VGA_ppc32)
    422 UInt VG_(machine_ppc32_has_FP)  = 0;
    423 UInt VG_(machine_ppc32_has_VMX) = 0;
    424 #endif
    425 #if defined(VGA_ppc64)
    426 ULong VG_(machine_ppc64_has_VMX) = 0;
    427 #endif
    428 #if defined(VGA_arm)
    429 Int VG_(machine_arm_archlevel) = 4;
    430 #endif
    431 
    432 /* fixs390: anything for s390x here ? */
    433 
    434 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
    435    testing, so we need a VG_MINIMAL_JMP_BUF. */
    436 #if defined(VGA_ppc32) || defined(VGA_ppc64) \
    437     || defined(VGA_arm) || defined(VGA_s390x)
    438 #include "pub_tool_libcsetjmp.h"
    439 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
    440 static void handler_unsup_insn ( Int x ) {
    441    VG_MINIMAL_LONGJMP(env_unsup_insn);
    442 }
    443 #endif
    444 
    445 
    446 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
    447  * handlers are installed.  Determines the the sizes affected by dcbz
    448  * and dcbzl instructions and updates the given VexArchInfo structure
    449  * accordingly.
    450  *
    451  * Not very defensive: assumes that as long as the dcbz/dcbzl
    452  * instructions don't raise a SIGILL, that they will zero an aligned,
    453  * contiguous block of memory of a sensible size. */
    454 #if defined(VGA_ppc32) || defined(VGA_ppc64)
    455 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
    456 {
    457    Int dcbz_szB = 0;
    458    Int dcbzl_szB;
    459 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
    460    char test_block[4*MAX_DCBZL_SZB];
    461    char *aligned = test_block;
    462    Int i;
    463 
    464    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
    465    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
    466    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
    467 
    468    /* dcbz often clears 32B, although sometimes whatever the native cache
    469     * block size is */
    470    VG_(memset)(test_block, 0xff, sizeof(test_block));
    471    __asm__ __volatile__("dcbz 0,%0"
    472                         : /*out*/
    473                         : "r" (aligned) /*in*/
    474                         : "memory" /*clobber*/);
    475    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    476       if (!test_block[i])
    477          ++dcbz_szB;
    478    }
    479    vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
    480 
    481    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
    482    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    483       dcbzl_szB = 0; /* indicates unsupported */
    484    }
    485    else {
    486       VG_(memset)(test_block, 0xff, sizeof(test_block));
    487       /* some older assemblers won't understand the dcbzl instruction
    488        * variant, so we directly emit the instruction ourselves */
    489       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
    490                            : /*out*/
    491                            : "r" (aligned) /*in*/
    492                            : "memory", "r9" /*clobber*/);
    493       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
    494          if (!test_block[i])
    495             ++dcbzl_szB;
    496       }
    497       vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
    498    }
    499 
    500    arch_info->ppc_dcbz_szB  = dcbz_szB;
    501    arch_info->ppc_dcbzl_szB = dcbzl_szB;
    502 
    503    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
    504                  dcbz_szB, dcbzl_szB);
    505 #  undef MAX_DCBZL_SZB
    506 }
    507 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
    508 
    509 #ifdef VGA_s390x
    510 
    511 /* Read /proc/cpuinfo. Look for lines like these
    512 
    513    processor 0: version = FF,  identification = 0117C9,  machine = 2064
    514 
    515    and return the machine model. If the machine model could not be determined
    516    or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
    517 
    518 static UInt VG_(get_machine_model)(void)
    519 {
    520    static struct model_map {
    521       HChar name[5];
    522       UInt  id;
    523    } model_map[] = {
    524       { "2064", VEX_S390X_MODEL_Z900 },
    525       { "2066", VEX_S390X_MODEL_Z800 },
    526       { "2084", VEX_S390X_MODEL_Z990 },
    527       { "2086", VEX_S390X_MODEL_Z890 },
    528       { "2094", VEX_S390X_MODEL_Z9_EC },
    529       { "2096", VEX_S390X_MODEL_Z9_BC },
    530       { "2097", VEX_S390X_MODEL_Z10_EC },
    531       { "2098", VEX_S390X_MODEL_Z10_BC },
    532       { "2817", VEX_S390X_MODEL_Z196 },
    533       { "2818", VEX_S390X_MODEL_Z114 },
    534    };
    535 
    536    Int    model, n, fh;
    537    SysRes fd;
    538    SizeT  num_bytes, file_buf_size;
    539    HChar *p, *m, *model_name, *file_buf;
    540 
    541    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    542    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    543    if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
    544 
    545    fh  = sr_Res(fd);
    546 
    547    /* Determine the size of /proc/cpuinfo.
    548       Work around broken-ness in /proc file system implementation.
    549       fstat returns a zero size for /proc/cpuinfo although it is
    550       claimed to be a regular file. */
    551    num_bytes = 0;
    552    file_buf_size = 1000;
    553    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    554    while (42) {
    555       n = VG_(read)(fh, file_buf, file_buf_size);
    556       if (n < 0) break;
    557 
    558       num_bytes += n;
    559       if (n < file_buf_size) break;  /* reached EOF */
    560    }
    561 
    562    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    563 
    564    if (num_bytes > file_buf_size) {
    565       VG_(free)( file_buf );
    566       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    567       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    568       n = VG_(read)( fh, file_buf, num_bytes );
    569       if (n < 0) num_bytes = 0;
    570    }
    571 
    572    file_buf[num_bytes] = '\0';
    573    VG_(close)(fh);
    574 
    575    /* Parse file */
    576    model = VEX_S390X_MODEL_UNKNOWN;
    577    for (p = file_buf; *p; ++p) {
    578       /* Beginning of line */
    579      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
    580 
    581      m = VG_(strstr)( p, "machine" );
    582      if (m == NULL) continue;
    583 
    584      p = m + sizeof "machine" - 1;
    585      while ( VG_(isspace)( *p ) || *p == '=') {
    586        if (*p == '\n') goto next_line;
    587        ++p;
    588      }
    589 
    590      model_name = p;
    591      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
    592        struct model_map *mm = model_map + n;
    593        SizeT len = VG_(strlen)( mm->name );
    594        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
    595             VG_(isspace)( model_name[len] )) {
    596          if (mm->id < model) model = mm->id;
    597          p = model_name + len;
    598          break;
    599        }
    600      }
    601      /* Skip until end-of-line */
    602      while (*p != '\n')
    603        ++p;
    604    next_line: ;
    605    }
    606 
    607    VG_(free)( file_buf );
    608    VG_(debugLog)(1, "machine", "model = %s\n",
    609                  model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
    610                                                   : model_map[model].name);
    611    return model;
    612 }
    613 
    614 #endif /* VGA_s390x */
    615 
    616 #ifdef VGA_mips32
    617 
    618 /* Read /proc/cpuinfo and return the machine model. */
    619 static UInt VG_(get_machine_model)(void)
    620 {
    621    char *search_MIPS_str = "MIPS";
    622    char *search_Broadcom_str = "Broadcom";
    623    Int    n, fh;
    624    SysRes fd;
    625    SizeT  num_bytes, file_buf_size;
    626    HChar  *file_buf;
    627 
    628    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
    629    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
    630    if ( sr_isError(fd) ) return -1;
    631 
    632    fh  = sr_Res(fd);
    633 
    634    /* Determine the size of /proc/cpuinfo.
    635       Work around broken-ness in /proc file system implementation.
    636       fstat returns a zero size for /proc/cpuinfo although it is
    637       claimed to be a regular file. */
    638    num_bytes = 0;
    639    file_buf_size = 1000;
    640    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
    641    while (42) {
    642       n = VG_(read)(fh, file_buf, file_buf_size);
    643       if (n < 0) break;
    644 
    645       num_bytes += n;
    646       if (n < file_buf_size) break;  /* reached EOF */
    647    }
    648 
    649    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
    650 
    651    if (num_bytes > file_buf_size) {
    652       VG_(free)( file_buf );
    653       VG_(lseek)( fh, 0, VKI_SEEK_SET );
    654       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
    655       n = VG_(read)( fh, file_buf, num_bytes );
    656       if (n < 0) num_bytes = 0;
    657    }
    658 
    659    file_buf[num_bytes] = '\0';
    660    VG_(close)(fh);
    661 
    662    /* Parse file */
    663    if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
    664        return VEX_PRID_COMP_BROADCOM;
    665    if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
    666        return VEX_PRID_COMP_MIPS;
    667 
    668    /* Did not find string in the proc file. */
    669    return -1;
    670 }
    671 
    672 #endif
    673 
    674 /* Determine what insn set and insn set variant the host has, and
    675    record it.  To be called once at system startup.  Returns False if
    676    this a CPU incapable of running Valgrind. */
    677 
    678 Bool VG_(machine_get_hwcaps)( void )
    679 {
    680    vg_assert(hwcaps_done == False);
    681    hwcaps_done = True;
    682 
    683    // Whack default settings into vai, so that we only need to fill in
    684    // any interesting bits.
    685    LibVEX_default_VexArchInfo(&vai);
    686 
    687 #if defined(VGA_x86)
    688    { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
    689      UInt eax, ebx, ecx, edx, max_extended;
    690      UChar vstr[13];
    691      vstr[0] = 0;
    692 
    693      if (!VG_(has_cpuid)())
    694         /* we can't do cpuid at all.  Give up. */
    695         return False;
    696 
    697      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    698      if (eax < 1)
    699         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    700         return False;
    701 
    702      /* Get processor ID string, and max basic/extended index
    703         values. */
    704      VG_(memcpy)(&vstr[0], &ebx, 4);
    705      VG_(memcpy)(&vstr[4], &edx, 4);
    706      VG_(memcpy)(&vstr[8], &ecx, 4);
    707      vstr[12] = 0;
    708 
    709      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    710      max_extended = eax;
    711 
    712      /* get capabilities bits into edx */
    713      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    714 
    715      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
    716      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
    717 
    718      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    719         must simply give up.  But all CPUs since Pentium-I have it, so
    720         that doesn't seem like much of a restriction. */
    721      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    722      if (!have_cx8)
    723         return False;
    724 
    725      /* Figure out if this is an AMD that can do LZCNT. */
    726      have_lzcnt = False;
    727      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
    728          && max_extended >= 0x80000001) {
    729         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    730         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    731      }
    732 
    733      if (have_sse2 && have_sse1) {
    734         va          = VexArchX86;
    735         vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
    736         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
    737         if (have_lzcnt)
    738            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
    739         VG_(machine_x86_have_mxcsr) = 1;
    740         return True;
    741      }
    742 
    743      if (have_sse1) {
    744         va          = VexArchX86;
    745         vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
    746         VG_(machine_x86_have_mxcsr) = 1;
    747         return True;
    748      }
    749 
    750      va         = VexArchX86;
    751      vai.hwcaps = 0; /*baseline - no sse at all*/
    752      VG_(machine_x86_have_mxcsr) = 0;
    753      return True;
    754    }
    755 
    756 #elif defined(VGA_amd64)
    757    { Bool have_sse3, have_cx8, have_cx16;
    758      Bool have_lzcnt, have_avx /*, have_fma*/;
    759      UInt eax, ebx, ecx, edx, max_extended;
    760      UChar vstr[13];
    761      vstr[0] = 0;
    762 
    763      if (!VG_(has_cpuid)())
    764         /* we can't do cpuid at all.  Give up. */
    765         return False;
    766 
    767      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
    768      if (eax < 1)
    769         /* we can't ask for cpuid(x) for x > 0.  Give up. */
    770         return False;
    771 
    772      /* Get processor ID string, and max basic/extended index
    773         values. */
    774      VG_(memcpy)(&vstr[0], &ebx, 4);
    775      VG_(memcpy)(&vstr[4], &edx, 4);
    776      VG_(memcpy)(&vstr[8], &ecx, 4);
    777      vstr[12] = 0;
    778 
    779      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
    780      max_extended = eax;
    781 
    782      /* get capabilities bits into edx */
    783      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
    784 
    785      // we assume that SSE1 and SSE2 are available by default
    786      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
    787      // ssse3   is ecx:9
    788      // sse41   is ecx:19
    789      // sse42   is ecx:20
    790 
    791      // osxsave is ecx:27
    792      // avx     is ecx:28
    793      // fma     is ecx:12
    794      have_avx = False;
    795      /* have_fma = False; */
    796      if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
    797         /* processor supports AVX instructions and XGETBV is enabled
    798            by OS */
    799         ULong w;
    800         __asm__ __volatile__("movq $0,%%rcx ; "
    801                              ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
    802                              "movq %%rax,%0"
    803                              :/*OUT*/"=r"(w) :/*IN*/
    804                              :/*TRASH*/"rdx","rcx");
    805         if ((w & 6) == 6) {
    806            /* OS has enabled both XMM and YMM state support */
    807            have_avx = True;
    808            /* have_fma = (ecx & (1<<12)) != 0; */
    809            /* have_fma: Probably correct, but gcc complains due to
    810               unusedness. &*/
    811         }
    812      }
    813 
    814 
    815      /* cmpxchg8b is a minimum requirement now; if we don't have it we
    816         must simply give up.  But all CPUs since Pentium-I have it, so
    817         that doesn't seem like much of a restriction. */
    818      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
    819      if (!have_cx8)
    820         return False;
    821 
    822      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
    823      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
    824 
    825      /* Figure out if this is an AMD that can do LZCNT. */
    826      have_lzcnt = False;
    827      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
    828          && max_extended >= 0x80000001) {
    829         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
    830         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
    831      }
    832 
    833      va         = VexArchAMD64;
    834      vai.hwcaps = (have_sse3  ? VEX_HWCAPS_AMD64_SSE3  : 0)
    835                 | (have_cx16  ? VEX_HWCAPS_AMD64_CX16  : 0)
    836                 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
    837                 | (have_avx   ? VEX_HWCAPS_AMD64_AVX   : 0);
    838      return True;
    839    }
    840 
    841 #elif defined(VGA_ppc32)
    842    {
    843      /* Find out which subset of the ppc32 instruction set is supported by
    844         verifying whether various ppc32 instructions generate a SIGILL
    845         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
    846         AT_PLATFORM entries in the ELF auxiliary table -- see also
    847         the_iifii.client_auxv in m_main.c.
    848       */
    849      vki_sigset_t          saved_set, tmp_set;
    850      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
    851      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
    852 
    853      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
    854      Int r;
    855 
    856      /* This is a kludge.  Really we ought to back-convert saved_act
    857         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
    858         since that's a no-op on all ppc32 platforms so far supported,
    859         it's not worth the typing effort.  At least include most basic
    860         sanity check: */
    861      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
    862 
    863      VG_(sigemptyset)(&tmp_set);
    864      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
    865      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
    866 
    867      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
    868      vg_assert(r == 0);
    869 
    870      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
    871      vg_assert(r == 0);
    872      tmp_sigill_act = saved_sigill_act;
    873 
    874      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
    875      vg_assert(r == 0);
    876      tmp_sigfpe_act = saved_sigfpe_act;
    877 
    878      /* NODEFER: signal handler does not return (from the kernel's point of
    879         view), hence if it is to successfully catch a signal more than once,
    880         we need the NODEFER flag. */
    881      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
    882      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
    883      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
    884      tmp_sigill_act.ksa_handler = handler_unsup_insn;
    885      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
    886      vg_assert(r == 0);
    887 
    888      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
    889      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
    890      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
    891      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
    892      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
    893      vg_assert(r == 0);
    894 
    895      /* standard FP insns */
    896      have_F = True;
    897      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    898         have_F = False;
    899      } else {
    900         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
    901      }
    902 
    903      /* Altivec insns */
    904      have_V = True;
    905      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    906         have_V = False;
    907      } else {
    908         /* Unfortunately some older assemblers don't speak Altivec (or
    909            choose not to), so to be safe we directly emit the 32-bit
    910            word corresponding to "vor 0,0,0".  This fixes a build
    911            problem that happens on Debian 3.1 (ppc32), and probably
    912            various other places. */
    913         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
    914      }
    915 
    916      /* General-Purpose optional (fsqrt, fsqrts) */
    917      have_FX = True;
    918      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    919         have_FX = False;
    920      } else {
    921         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
    922      }
    923 
    924      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
    925      have_GX = True;
    926      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    927         have_GX = False;
    928      } else {
    929         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
    930      }
    931 
    932      /* VSX support implies Power ISA 2.06 */
    933      have_VX = True;
    934      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    935         have_VX = False;
    936      } else {
    937         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
    938      }
    939 
    940      /* Check for Decimal Floating Point (DFP) support. */
    941      have_DFP = True;
    942      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
    943         have_DFP = False;
    944      } else {
    945         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
    946      }
    947 
    948      /* determine dcbz/dcbzl sizes while we still have the signal
    949       * handlers registered */
    950      find_ppc_dcbz_sz(&vai);
    951 
    952      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
    953      vg_assert(r == 0);
    954      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
    955      vg_assert(r == 0);
    956      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
    957      vg_assert(r == 0);
    958      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
    959                     (Int)have_F, (Int)have_V, (Int)have_FX,
    960                     (Int)have_GX, (Int)have_VX, (Int)have_DFP);
    961      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
    962      if (have_V && !have_F)
    963         have_V = False;
    964      if (have_FX && !have_F)
    965         have_FX = False;
    966      if (have_GX && !have_F)
    967         have_GX = False;
    968 
    969      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
    970      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
    971 
    972      va = VexArchPPC32;
    973 
    974      vai.hwcaps = 0;
    975      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
    976      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
    977      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
    978      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
    979      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
    980      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
    981 
    982 
    983      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
    984         called before we're ready to go. */
    985      return True;
    986    }
    987 
    988 #elif defined(VGA_ppc64)
    989    {
    990      /* Same instruction set detection algorithm as for ppc32. */
    991      vki_sigset_t          saved_set, tmp_set;
    992      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
    993      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
    994 
    995      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
    996      Int r;
    997 
    998      /* This is a kludge.  Really we ought to back-convert saved_act
    999         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1000         since that's a no-op on all ppc64 platforms so far supported,
   1001         it's not worth the typing effort.  At least include most basic
   1002         sanity check: */
   1003      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1004 
   1005      VG_(sigemptyset)(&tmp_set);
   1006      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1007      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1008 
   1009      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1010      vg_assert(r == 0);
   1011 
   1012      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1013      vg_assert(r == 0);
   1014      tmp_sigill_act = saved_sigill_act;
   1015 
   1016      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1017      tmp_sigfpe_act = saved_sigfpe_act;
   1018 
   1019      /* NODEFER: signal handler does not return (from the kernel's point of
   1020         view), hence if it is to successfully catch a signal more than once,
   1021         we need the NODEFER flag. */
   1022      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1023      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1024      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1025      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1026      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1027 
   1028      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1029      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1030      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1031      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1032      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1033 
   1034      /* standard FP insns */
   1035      have_F = True;
   1036      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1037         have_F = False;
   1038      } else {
   1039         __asm__ __volatile__("fmr 0,0");
   1040      }
   1041 
   1042      /* Altivec insns */
   1043      have_V = True;
   1044      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1045         have_V = False;
   1046      } else {
   1047         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
   1048      }
   1049 
   1050      /* General-Purpose optional (fsqrt, fsqrts) */
   1051      have_FX = True;
   1052      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1053         have_FX = False;
   1054      } else {
   1055         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
   1056      }
   1057 
   1058      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
   1059      have_GX = True;
   1060      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1061         have_GX = False;
   1062      } else {
   1063         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
   1064      }
   1065 
   1066      /* VSX support implies Power ISA 2.06 */
   1067      have_VX = True;
   1068      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1069         have_VX = False;
   1070      } else {
   1071         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
   1072      }
   1073 
   1074      /* Check for Decimal Floating Point (DFP) support. */
   1075      have_DFP = True;
   1076      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1077         have_DFP = False;
   1078      } else {
   1079         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
   1080      }
   1081 
   1082      /* determine dcbz/dcbzl sizes while we still have the signal
   1083       * handlers registered */
   1084      find_ppc_dcbz_sz(&vai);
   1085 
   1086      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1087      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
   1088      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1089      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
   1090                     (Int)have_F, (Int)have_V, (Int)have_FX,
   1091                     (Int)have_GX, (Int)have_VX, (Int)have_DFP);
   1092      /* on ppc64, if we don't even have FP, just give up. */
   1093      if (!have_F)
   1094         return False;
   1095 
   1096      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
   1097 
   1098      va = VexArchPPC64;
   1099 
   1100      vai.hwcaps = 0;
   1101      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
   1102      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
   1103      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
   1104      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
   1105      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
   1106 
   1107      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
   1108         called before we're ready to go. */
   1109      return True;
   1110    }
   1111 
   1112 #elif defined(VGA_s390x)
   1113    {
   1114      /* Instruction set detection code borrowed from ppc above. */
   1115      vki_sigset_t          saved_set, tmp_set;
   1116      vki_sigaction_fromK_t saved_sigill_act;
   1117      vki_sigaction_toK_t     tmp_sigill_act;
   1118 
   1119      volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX;
   1120      volatile Bool have_STFLE, have_ETF2, have_ETF3;
   1121      Int r, model;
   1122 
   1123      /* Unblock SIGILL and stash away the old action for that signal */
   1124      VG_(sigemptyset)(&tmp_set);
   1125      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1126 
   1127      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1128      vg_assert(r == 0);
   1129 
   1130      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1131      vg_assert(r == 0);
   1132      tmp_sigill_act = saved_sigill_act;
   1133 
   1134      /* NODEFER: signal handler does not return (from the kernel's point of
   1135         view), hence if it is to successfully catch a signal more than once,
   1136         we need the NODEFER flag. */
   1137      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1138      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1139      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1140      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1141      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1142 
   1143      /* Determine hwcaps. Note, we cannot use the stfle insn because it
   1144         is not supported on z900. */
   1145 
   1146      have_LDISP = True;
   1147      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1148         have_LDISP = False;
   1149      } else {
   1150        /* BASR loads the address of the next insn into r1. Needed to avoid
   1151           a segfault in XY. */
   1152         __asm__ __volatile__("basr %%r1,%%r0\n\t"
   1153                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
   1154                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
   1155      }
   1156 
   1157      have_EIMM = True;
   1158      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1159         have_EIMM = False;
   1160      } else {
   1161         __asm__ __volatile__(".long  0xc0090000\n\t"  /* iilf r0,0 */
   1162                              ".short 0x0000" : : : "r0", "memory");
   1163      }
   1164 
   1165      have_GIE = True;
   1166      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1167         have_GIE = False;
   1168      } else {
   1169         __asm__ __volatile__(".long  0xc2010000\n\t"  /* msfi r0,0 */
   1170                              ".short 0x0000" : : : "r0", "memory");
   1171      }
   1172 
   1173      have_DFP = True;
   1174      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1175         have_DFP = False;
   1176      } else {
   1177         __asm__ __volatile__(".long 0xb3d20000"
   1178                                : : : "r0", "cc", "memory");  /* adtr r0,r0,r0 */
   1179      }
   1180 
   1181      have_FGX = True;
   1182      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1183         have_FGX = False;
   1184      } else {
   1185         __asm__ __volatile__(".long 0xb3cd0000" : : : "r0");  /* lgdr r0,f0 */
   1186      }
   1187 
   1188      /* Detect presence of the ETF2-enhancement facility using the
   1189         STFLE insn. Note, that STFLE and ETF2 were introduced at the same
   1190         time, so the absence of STLFE implies the absence of ETF2. */
   1191      have_STFLE = True;
   1192      have_ETF2 = False;
   1193      have_ETF3 = False;
   1194      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1195         have_STFLE = False;
   1196      } else {
   1197          ULong hoststfle[1];
   1198          register ULong reg0 asm("0") = 0; /* one double word available */
   1199 
   1200          __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
   1201                               : "=m" (hoststfle), "+d"(reg0)
   1202                               : : "cc", "memory");
   1203          if (hoststfle[0] & (1ULL << (63 - 24)))
   1204              have_ETF2 = True;
   1205          if (hoststfle[0] & (1ULL << (63 - 30)))
   1206              have_ETF3 = True;
   1207      }
   1208 
   1209      /* Restore signals */
   1210      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
   1211      vg_assert(r == 0);
   1212      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1213      vg_assert(r == 0);
   1214      va = VexArchS390X;
   1215 
   1216      model = VG_(get_machine_model)();
   1217 
   1218      /* If the model is "unknown" don't treat this as an error. Assume
   1219         this is a brand-new machine model for which we don't have the
   1220         identification yet. Keeping fingers crossed. */
   1221 
   1222      VG_(debugLog)(1, "machine", "machine %d  LDISP %d EIMM %d GIE %d DFP %d "
   1223                    "FGX %d STFLE %d ETF2 %d ETF3 %d\n", model, have_LDISP, have_EIMM,
   1224                    have_GIE, have_DFP, have_FGX, have_STFLE, have_ETF2, have_ETF3);
   1225 
   1226      vai.hwcaps = model;
   1227      if (have_LDISP) {
   1228         /* Use long displacement only on machines >= z990. For all other machines
   1229            it is millicoded and therefore slow. */
   1230         if (model >= VEX_S390X_MODEL_Z990)
   1231            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
   1232      }
   1233      if (have_EIMM)  vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
   1234      if (have_GIE)   vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
   1235      if (have_DFP)   vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
   1236      if (have_FGX)   vai.hwcaps |= VEX_HWCAPS_S390X_FGX;
   1237      if (have_ETF2)  vai.hwcaps |= VEX_HWCAPS_S390X_ETF2;
   1238      if (have_ETF3)  vai.hwcaps |= VEX_HWCAPS_S390X_ETF3;
   1239      if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
   1240 
   1241      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
   1242 
   1243      return True;
   1244    }
   1245 
   1246 #elif defined(VGA_arm)
   1247    {
   1248      /* Same instruction set detection algorithm as for ppc32. */
   1249      vki_sigset_t          saved_set, tmp_set;
   1250      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
   1251      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
   1252 
   1253      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
   1254      volatile Int archlevel;
   1255      Int r;
   1256 
   1257      /* This is a kludge.  Really we ought to back-convert saved_act
   1258         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
   1259         since that's a no-op on all ppc64 platforms so far supported,
   1260         it's not worth the typing effort.  At least include most basic
   1261         sanity check: */
   1262      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
   1263 
   1264      VG_(sigemptyset)(&tmp_set);
   1265      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
   1266      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
   1267 
   1268      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
   1269      vg_assert(r == 0);
   1270 
   1271      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
   1272      vg_assert(r == 0);
   1273      tmp_sigill_act = saved_sigill_act;
   1274 
   1275      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
   1276      tmp_sigfpe_act = saved_sigfpe_act;
   1277 
   1278      /* NODEFER: signal handler does not return (from the kernel's point of
   1279         view), hence if it is to successfully catch a signal more than once,
   1280         we need the NODEFER flag. */
   1281      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
   1282      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
   1283      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
   1284      tmp_sigill_act.ksa_handler = handler_unsup_insn;
   1285      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1286 
   1287      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
   1288      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
   1289      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
   1290      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
   1291      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1292 
   1293      /* VFP insns */
   1294      have_VFP = True;
   1295      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1296         have_VFP = False;
   1297      } else {
   1298         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
   1299      }
   1300      /* There are several generation of VFP extension but they differs very
   1301         little so for now we will not distinguish them. */
   1302      have_VFP2 = have_VFP;
   1303      have_VFP3 = have_VFP;
   1304 
   1305      /* NEON insns */
   1306      have_NEON = True;
   1307      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1308         have_NEON = False;
   1309      } else {
   1310         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
   1311      }
   1312 
   1313      /* ARM architecture level */
   1314      archlevel = 5; /* v5 will be base level */
   1315      if (archlevel < 7) {
   1316         archlevel = 7;
   1317         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1318            archlevel = 5;
   1319         } else {
   1320            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
   1321         }
   1322      }
   1323      if (archlevel < 6) {
   1324         archlevel = 6;
   1325         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
   1326            archlevel = 5;
   1327         } else {
   1328            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
   1329         }
   1330      }
   1331 
   1332      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
   1333      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
   1334      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
   1335      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
   1336      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
   1337 
   1338      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
   1339            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
   1340            (Int)have_NEON);
   1341 
   1342      VG_(machine_arm_archlevel) = archlevel;
   1343 
   1344      va = VexArchARM;
   1345 
   1346      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
   1347      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
   1348      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
   1349      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
   1350      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1351 
   1352      return True;
   1353    }
   1354 
   1355 #elif defined(VGA_mips32)
   1356    {
   1357      va = VexArchMIPS32;
   1358      UInt model = VG_(get_machine_model)();
   1359      if (model== -1)
   1360          return False;
   1361 
   1362      vai.hwcaps = model;
   1363      return True;
   1364    }
   1365 
   1366 #else
   1367 #  error "Unknown arch"
   1368 #endif
   1369 }
   1370 
   1371 /* Notify host cpu cache line size. */
   1372 #if defined(VGA_ppc32)
   1373 void VG_(machine_ppc32_set_clszB)( Int szB )
   1374 {
   1375    vg_assert(hwcaps_done);
   1376 
   1377    /* Either the value must not have been set yet (zero) or we can
   1378       tolerate it being set to the same value multiple times, as the
   1379       stack scanning logic in m_main is a bit stupid. */
   1380    vg_assert(vai.ppc_cache_line_szB == 0
   1381              || vai.ppc_cache_line_szB == szB);
   1382 
   1383    vg_assert(szB == 32 || szB == 64 || szB == 128);
   1384    vai.ppc_cache_line_szB = szB;
   1385 }
   1386 #endif
   1387 
   1388 
   1389 /* Notify host cpu cache line size. */
   1390 #if defined(VGA_ppc64)
   1391 void VG_(machine_ppc64_set_clszB)( Int szB )
   1392 {
   1393    vg_assert(hwcaps_done);
   1394 
   1395    /* Either the value must not have been set yet (zero) or we can
   1396       tolerate it being set to the same value multiple times, as the
   1397       stack scanning logic in m_main is a bit stupid. */
   1398    vg_assert(vai.ppc_cache_line_szB == 0
   1399              || vai.ppc_cache_line_szB == szB);
   1400 
   1401    vg_assert(szB == 32 || szB == 64 || szB == 128);
   1402    vai.ppc_cache_line_szB = szB;
   1403 }
   1404 #endif
   1405 
   1406 
   1407 /* Notify host's ability to handle NEON instructions. */
   1408 #if defined(VGA_arm)
   1409 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
   1410 {
   1411    vg_assert(hwcaps_done);
   1412    /* There's nothing else we can sanity check. */
   1413 
   1414    if (has_neon) {
   1415       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
   1416    } else {
   1417       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
   1418    }
   1419 }
   1420 #endif
   1421 
   1422 
   1423 /* Fetch host cpu info, once established. */
   1424 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
   1425                                    /*OUT*/VexArchInfo* pVai )
   1426 {
   1427    vg_assert(hwcaps_done);
   1428    if (pVa)  *pVa  = va;
   1429    if (pVai) *pVai = vai;
   1430 }
   1431 
   1432 
   1433 /* Returns the size of the largest guest register that we will
   1434    simulate in this run.  This depends on both the guest architecture
   1435    and on the specific capabilities we are simulating for that guest
   1436    (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
   1437    or 32.  General rule: if in doubt, return a value larger than
   1438    reality.
   1439 
   1440    This information is needed by Cachegrind and Callgrind to decide
   1441    what the minimum cache line size they are prepared to simulate is.
   1442    Basically require that the minimum cache line size is at least as
   1443    large as the largest register that might get transferred to/from
   1444    memory, so as to guarantee that any such transaction can straddle
   1445    at most 2 cache lines.
   1446 */
   1447 Int VG_(machine_get_size_of_largest_guest_register) ( void )
   1448 {
   1449    vg_assert(hwcaps_done);
   1450    /* Once hwcaps_done is True, we can fish around inside va/vai to
   1451       find the information we need. */
   1452 
   1453 #  if defined(VGA_x86)
   1454    vg_assert(va == VexArchX86);
   1455    /* We don't support AVX, so 32 is out.  At the other end, even if
   1456       we don't support any SSE, the X87 can generate 10 byte
   1457       transfers, so let's say 16 to be on the safe side.  Hence the
   1458       answer is always 16. */
   1459    return 16;
   1460 
   1461 #  elif defined(VGA_amd64)
   1462    /* if AVX then 32 else 16 */
   1463    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
   1464 
   1465 #  elif defined(VGA_ppc32)
   1466    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1467    if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
   1468    if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
   1469    if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
   1470    return 8;
   1471 
   1472 #  elif defined(VGA_ppc64)
   1473    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
   1474    if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
   1475    if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
   1476    if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
   1477    return 8;
   1478 
   1479 #  elif defined(VGA_s390x)
   1480    return 8;
   1481 
   1482 #  elif defined(VGA_arm)
   1483    /* Really it depends whether or not we have NEON, but let's just
   1484       assume we always do. */
   1485    return 16;
   1486 
   1487 #  elif defined(VGA_mips32)
   1488    /* The guest state implies 4, but that can't really be true, can
   1489       it? */
   1490    return 8;
   1491 
   1492 #  else
   1493 #    error "Unknown arch"
   1494 #  endif
   1495 }
   1496 
   1497 
   1498 // Given a pointer to a function as obtained by "& functionname" in C,
   1499 // produce a pointer to the actual entry point for the function.
   1500 void* VG_(fnptr_to_fnentry)( void* f )
   1501 {
   1502 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
   1503       || defined(VGP_arm_linux)                           \
   1504       || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
   1505       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux)
   1506    return f;
   1507 #  elif defined(VGP_ppc64_linux)
   1508    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
   1509       3-word function descriptor, of which the first word is the entry
   1510       address. */
   1511    UWord* descr = (UWord*)f;
   1512    return (void*)(descr[0]);
   1513 #  else
   1514 #    error "Unknown platform"
   1515 #  endif
   1516 }
   1517 
   1518 /*--------------------------------------------------------------------*/
   1519 /*--- end                                                          ---*/
   1520 /*--------------------------------------------------------------------*/
   1521