1 /*--------------------------------------------------------------------*/ 2 /*--- Machine-related stuff. m_machine.c ---*/ 3 /*--------------------------------------------------------------------*/ 4 5 /* 6 This file is part of Valgrind, a dynamic binary instrumentation 7 framework. 8 9 Copyright (C) 2000-2013 Julian Seward 10 jseward (at) acm.org 11 12 This program is free software; you can redistribute it and/or 13 modify it under the terms of the GNU General Public License as 14 published by the Free Software Foundation; either version 2 of the 15 License, or (at your option) any later version. 16 17 This program is distributed in the hope that it will be useful, but 18 WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 General Public License for more details. 21 22 You should have received a copy of the GNU General Public License 23 along with this program; if not, write to the Free Software 24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 25 02111-1307, USA. 26 27 The GNU General Public License is contained in the file COPYING. 28 */ 29 30 #include "pub_core_basics.h" 31 #include "pub_core_vki.h" 32 #include "pub_core_libcsetjmp.h" // setjmp facilities 33 #include "pub_core_threadstate.h" 34 #include "pub_core_libcassert.h" 35 #include "pub_core_libcbase.h" 36 #include "pub_core_libcfile.h" 37 #include "pub_core_libcprint.h" 38 #include "pub_core_mallocfree.h" 39 #include "pub_core_machine.h" 40 #include "pub_core_cpuid.h" 41 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE 42 #include "pub_core_debuglog.h" 43 44 45 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR) 46 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR) 47 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR) 48 49 Addr VG_(get_IP) ( ThreadId tid ) { 50 return INSTR_PTR( VG_(threads)[tid].arch ); 51 } 52 Addr VG_(get_SP) ( ThreadId tid ) { 53 return STACK_PTR( VG_(threads)[tid].arch ); 54 } 55 Addr VG_(get_FP) ( ThreadId tid ) { 56 return FRAME_PTR( VG_(threads)[tid].arch ); 57 } 58 59 void VG_(set_IP) ( ThreadId tid, Addr ip ) { 60 INSTR_PTR( VG_(threads)[tid].arch ) = ip; 61 } 62 void VG_(set_SP) ( ThreadId tid, Addr sp ) { 63 STACK_PTR( VG_(threads)[tid].arch ) = sp; 64 } 65 66 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs, 67 ThreadId tid ) 68 { 69 # if defined(VGA_x86) 70 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP; 71 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP; 72 regs->misc.X86.r_ebp 73 = VG_(threads)[tid].arch.vex.guest_EBP; 74 # elif defined(VGA_amd64) 75 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP; 76 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP; 77 regs->misc.AMD64.r_rbp 78 = VG_(threads)[tid].arch.vex.guest_RBP; 79 # elif defined(VGA_ppc32) 80 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA; 81 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1; 82 regs->misc.PPC32.r_lr 83 = VG_(threads)[tid].arch.vex.guest_LR; 84 # elif defined(VGA_ppc64) 85 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA; 86 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1; 87 regs->misc.PPC64.r_lr 88 = VG_(threads)[tid].arch.vex.guest_LR; 89 # elif defined(VGA_arm) 90 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T; 91 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13; 92 regs->misc.ARM.r14 93 = VG_(threads)[tid].arch.vex.guest_R14; 94 regs->misc.ARM.r12 95 = VG_(threads)[tid].arch.vex.guest_R12; 96 regs->misc.ARM.r11 97 = VG_(threads)[tid].arch.vex.guest_R11; 98 regs->misc.ARM.r7 99 = VG_(threads)[tid].arch.vex.guest_R7; 100 # elif defined(VGA_arm64) 101 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 102 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP; 103 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29; 104 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30; 105 # elif defined(VGA_s390x) 106 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA; 107 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP; 108 regs->misc.S390X.r_fp 109 = VG_(threads)[tid].arch.vex.guest_r11; 110 regs->misc.S390X.r_lr 111 = VG_(threads)[tid].arch.vex.guest_r14; 112 # elif defined(VGA_mips32) 113 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 114 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 115 regs->misc.MIPS32.r30 116 = VG_(threads)[tid].arch.vex.guest_r30; 117 regs->misc.MIPS32.r31 118 = VG_(threads)[tid].arch.vex.guest_r31; 119 regs->misc.MIPS32.r28 120 = VG_(threads)[tid].arch.vex.guest_r28; 121 # elif defined(VGA_mips64) 122 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 123 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 124 regs->misc.MIPS64.r30 125 = VG_(threads)[tid].arch.vex.guest_r30; 126 regs->misc.MIPS64.r31 127 = VG_(threads)[tid].arch.vex.guest_r31; 128 regs->misc.MIPS64.r28 129 = VG_(threads)[tid].arch.vex.guest_r28; 130 # else 131 # error "Unknown arch" 132 # endif 133 } 134 135 void 136 VG_(get_shadow_regs_area) ( ThreadId tid, 137 /*DST*/UChar* dst, 138 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size ) 139 { 140 void* src; 141 ThreadState* tst; 142 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 143 vg_assert(VG_(is_valid_tid)(tid)); 144 // Bounds check 145 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 146 vg_assert(offset + size <= sizeof(VexGuestArchState)); 147 // Copy 148 tst = & VG_(threads)[tid]; 149 src = NULL; 150 switch (shadowNo) { 151 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 152 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 153 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 154 } 155 tl_assert(src != NULL); 156 VG_(memcpy)( dst, src, size); 157 } 158 159 void 160 VG_(set_shadow_regs_area) ( ThreadId tid, 161 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size, 162 /*SRC*/const UChar* src ) 163 { 164 void* dst; 165 ThreadState* tst; 166 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 167 vg_assert(VG_(is_valid_tid)(tid)); 168 // Bounds check 169 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 170 vg_assert(offset + size <= sizeof(VexGuestArchState)); 171 // Copy 172 tst = & VG_(threads)[tid]; 173 dst = NULL; 174 switch (shadowNo) { 175 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 176 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 177 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 178 } 179 tl_assert(dst != NULL); 180 VG_(memcpy)( dst, src, size); 181 } 182 183 184 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, 185 const HChar*, Addr)) 186 { 187 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex); 188 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %d\n", tid); 189 #if defined(VGA_x86) 190 (*f)(tid, "EAX", vex->guest_EAX); 191 (*f)(tid, "ECX", vex->guest_ECX); 192 (*f)(tid, "EDX", vex->guest_EDX); 193 (*f)(tid, "EBX", vex->guest_EBX); 194 (*f)(tid, "ESI", vex->guest_ESI); 195 (*f)(tid, "EDI", vex->guest_EDI); 196 (*f)(tid, "ESP", vex->guest_ESP); 197 (*f)(tid, "EBP", vex->guest_EBP); 198 #elif defined(VGA_amd64) 199 (*f)(tid, "RAX", vex->guest_RAX); 200 (*f)(tid, "RCX", vex->guest_RCX); 201 (*f)(tid, "RDX", vex->guest_RDX); 202 (*f)(tid, "RBX", vex->guest_RBX); 203 (*f)(tid, "RSI", vex->guest_RSI); 204 (*f)(tid, "RDI", vex->guest_RDI); 205 (*f)(tid, "RSP", vex->guest_RSP); 206 (*f)(tid, "RBP", vex->guest_RBP); 207 (*f)(tid, "R8" , vex->guest_R8 ); 208 (*f)(tid, "R9" , vex->guest_R9 ); 209 (*f)(tid, "R10", vex->guest_R10); 210 (*f)(tid, "R11", vex->guest_R11); 211 (*f)(tid, "R12", vex->guest_R12); 212 (*f)(tid, "R13", vex->guest_R13); 213 (*f)(tid, "R14", vex->guest_R14); 214 (*f)(tid, "R15", vex->guest_R15); 215 #elif defined(VGA_ppc32) || defined(VGA_ppc64) 216 (*f)(tid, "GPR0" , vex->guest_GPR0 ); 217 (*f)(tid, "GPR1" , vex->guest_GPR1 ); 218 (*f)(tid, "GPR2" , vex->guest_GPR2 ); 219 (*f)(tid, "GPR3" , vex->guest_GPR3 ); 220 (*f)(tid, "GPR4" , vex->guest_GPR4 ); 221 (*f)(tid, "GPR5" , vex->guest_GPR5 ); 222 (*f)(tid, "GPR6" , vex->guest_GPR6 ); 223 (*f)(tid, "GPR7" , vex->guest_GPR7 ); 224 (*f)(tid, "GPR8" , vex->guest_GPR8 ); 225 (*f)(tid, "GPR9" , vex->guest_GPR9 ); 226 (*f)(tid, "GPR10", vex->guest_GPR10); 227 (*f)(tid, "GPR11", vex->guest_GPR11); 228 (*f)(tid, "GPR12", vex->guest_GPR12); 229 (*f)(tid, "GPR13", vex->guest_GPR13); 230 (*f)(tid, "GPR14", vex->guest_GPR14); 231 (*f)(tid, "GPR15", vex->guest_GPR15); 232 (*f)(tid, "GPR16", vex->guest_GPR16); 233 (*f)(tid, "GPR17", vex->guest_GPR17); 234 (*f)(tid, "GPR18", vex->guest_GPR18); 235 (*f)(tid, "GPR19", vex->guest_GPR19); 236 (*f)(tid, "GPR20", vex->guest_GPR20); 237 (*f)(tid, "GPR21", vex->guest_GPR21); 238 (*f)(tid, "GPR22", vex->guest_GPR22); 239 (*f)(tid, "GPR23", vex->guest_GPR23); 240 (*f)(tid, "GPR24", vex->guest_GPR24); 241 (*f)(tid, "GPR25", vex->guest_GPR25); 242 (*f)(tid, "GPR26", vex->guest_GPR26); 243 (*f)(tid, "GPR27", vex->guest_GPR27); 244 (*f)(tid, "GPR28", vex->guest_GPR28); 245 (*f)(tid, "GPR29", vex->guest_GPR29); 246 (*f)(tid, "GPR30", vex->guest_GPR30); 247 (*f)(tid, "GPR31", vex->guest_GPR31); 248 (*f)(tid, "CTR" , vex->guest_CTR ); 249 (*f)(tid, "LR" , vex->guest_LR ); 250 #elif defined(VGA_arm) 251 (*f)(tid, "R0" , vex->guest_R0 ); 252 (*f)(tid, "R1" , vex->guest_R1 ); 253 (*f)(tid, "R2" , vex->guest_R2 ); 254 (*f)(tid, "R3" , vex->guest_R3 ); 255 (*f)(tid, "R4" , vex->guest_R4 ); 256 (*f)(tid, "R5" , vex->guest_R5 ); 257 (*f)(tid, "R6" , vex->guest_R6 ); 258 (*f)(tid, "R8" , vex->guest_R8 ); 259 (*f)(tid, "R9" , vex->guest_R9 ); 260 (*f)(tid, "R10", vex->guest_R10); 261 (*f)(tid, "R11", vex->guest_R11); 262 (*f)(tid, "R12", vex->guest_R12); 263 (*f)(tid, "R13", vex->guest_R13); 264 (*f)(tid, "R14", vex->guest_R14); 265 #elif defined(VGA_s390x) 266 (*f)(tid, "r0" , vex->guest_r0 ); 267 (*f)(tid, "r1" , vex->guest_r1 ); 268 (*f)(tid, "r2" , vex->guest_r2 ); 269 (*f)(tid, "r3" , vex->guest_r3 ); 270 (*f)(tid, "r4" , vex->guest_r4 ); 271 (*f)(tid, "r5" , vex->guest_r5 ); 272 (*f)(tid, "r6" , vex->guest_r6 ); 273 (*f)(tid, "r7" , vex->guest_r7 ); 274 (*f)(tid, "r8" , vex->guest_r8 ); 275 (*f)(tid, "r9" , vex->guest_r9 ); 276 (*f)(tid, "r10", vex->guest_r10); 277 (*f)(tid, "r11", vex->guest_r11); 278 (*f)(tid, "r12", vex->guest_r12); 279 (*f)(tid, "r13", vex->guest_r13); 280 (*f)(tid, "r14", vex->guest_r14); 281 (*f)(tid, "r15", vex->guest_r15); 282 #elif defined(VGA_mips32) || defined(VGA_mips64) 283 (*f)(tid, "r0" , vex->guest_r0 ); 284 (*f)(tid, "r1" , vex->guest_r1 ); 285 (*f)(tid, "r2" , vex->guest_r2 ); 286 (*f)(tid, "r3" , vex->guest_r3 ); 287 (*f)(tid, "r4" , vex->guest_r4 ); 288 (*f)(tid, "r5" , vex->guest_r5 ); 289 (*f)(tid, "r6" , vex->guest_r6 ); 290 (*f)(tid, "r7" , vex->guest_r7 ); 291 (*f)(tid, "r8" , vex->guest_r8 ); 292 (*f)(tid, "r9" , vex->guest_r9 ); 293 (*f)(tid, "r10", vex->guest_r10); 294 (*f)(tid, "r11", vex->guest_r11); 295 (*f)(tid, "r12", vex->guest_r12); 296 (*f)(tid, "r13", vex->guest_r13); 297 (*f)(tid, "r14", vex->guest_r14); 298 (*f)(tid, "r15", vex->guest_r15); 299 (*f)(tid, "r16", vex->guest_r16); 300 (*f)(tid, "r17", vex->guest_r17); 301 (*f)(tid, "r18", vex->guest_r18); 302 (*f)(tid, "r19", vex->guest_r19); 303 (*f)(tid, "r20", vex->guest_r20); 304 (*f)(tid, "r21", vex->guest_r21); 305 (*f)(tid, "r22", vex->guest_r22); 306 (*f)(tid, "r23", vex->guest_r23); 307 (*f)(tid, "r24", vex->guest_r24); 308 (*f)(tid, "r25", vex->guest_r25); 309 (*f)(tid, "r26", vex->guest_r26); 310 (*f)(tid, "r27", vex->guest_r27); 311 (*f)(tid, "r28", vex->guest_r28); 312 (*f)(tid, "r29", vex->guest_r29); 313 (*f)(tid, "r30", vex->guest_r30); 314 (*f)(tid, "r31", vex->guest_r31); 315 #elif defined(VGA_arm64) 316 (*f)(tid, "x0" , vex->guest_X0 ); 317 (*f)(tid, "x1" , vex->guest_X1 ); 318 (*f)(tid, "x2" , vex->guest_X2 ); 319 (*f)(tid, "x3" , vex->guest_X3 ); 320 (*f)(tid, "x4" , vex->guest_X4 ); 321 (*f)(tid, "x5" , vex->guest_X5 ); 322 (*f)(tid, "x6" , vex->guest_X6 ); 323 (*f)(tid, "x7" , vex->guest_X7 ); 324 (*f)(tid, "x8" , vex->guest_X8 ); 325 (*f)(tid, "x9" , vex->guest_X9 ); 326 (*f)(tid, "x10", vex->guest_X10); 327 (*f)(tid, "x11", vex->guest_X11); 328 (*f)(tid, "x12", vex->guest_X12); 329 (*f)(tid, "x13", vex->guest_X13); 330 (*f)(tid, "x14", vex->guest_X14); 331 (*f)(tid, "x15", vex->guest_X15); 332 (*f)(tid, "x16", vex->guest_X16); 333 (*f)(tid, "x17", vex->guest_X17); 334 (*f)(tid, "x18", vex->guest_X18); 335 (*f)(tid, "x19", vex->guest_X19); 336 (*f)(tid, "x20", vex->guest_X20); 337 (*f)(tid, "x21", vex->guest_X21); 338 (*f)(tid, "x22", vex->guest_X22); 339 (*f)(tid, "x23", vex->guest_X23); 340 (*f)(tid, "x24", vex->guest_X24); 341 (*f)(tid, "x25", vex->guest_X25); 342 (*f)(tid, "x26", vex->guest_X26); 343 (*f)(tid, "x27", vex->guest_X27); 344 (*f)(tid, "x28", vex->guest_X28); 345 (*f)(tid, "x29", vex->guest_X29); 346 (*f)(tid, "x30", vex->guest_X30); 347 #else 348 # error Unknown arch 349 #endif 350 } 351 352 353 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord)) 354 { 355 ThreadId tid; 356 357 for (tid = 1; tid < VG_N_THREADS; tid++) { 358 if (VG_(is_valid_tid)(tid) 359 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) { 360 // live thread or thread instructed to die by another thread that 361 // called exit. 362 apply_to_GPs_of_tid(tid, f); 363 } 364 } 365 } 366 367 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid) 368 { 369 *tid = (ThreadId)(-1); 370 } 371 372 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid, 373 /*OUT*/Addr* stack_min, 374 /*OUT*/Addr* stack_max) 375 { 376 ThreadId i; 377 for (i = (*tid)+1; i < VG_N_THREADS; i++) { 378 if (i == VG_INVALID_THREADID) 379 continue; 380 if (VG_(threads)[i].status != VgTs_Empty) { 381 *tid = i; 382 *stack_min = VG_(get_SP)(i); 383 *stack_max = VG_(threads)[i].client_stack_highest_word; 384 return True; 385 } 386 } 387 return False; 388 } 389 390 Addr VG_(thread_get_stack_max)(ThreadId tid) 391 { 392 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 393 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 394 return VG_(threads)[tid].client_stack_highest_word; 395 } 396 397 SizeT VG_(thread_get_stack_size)(ThreadId tid) 398 { 399 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 400 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 401 return VG_(threads)[tid].client_stack_szB; 402 } 403 404 Addr VG_(thread_get_altstack_min)(ThreadId tid) 405 { 406 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 407 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 408 return (Addr)VG_(threads)[tid].altstack.ss_sp; 409 } 410 411 SizeT VG_(thread_get_altstack_size)(ThreadId tid) 412 { 413 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 414 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 415 return VG_(threads)[tid].altstack.ss_size; 416 } 417 418 //------------------------------------------------------------- 419 /* Details about the capabilities of the underlying (host) CPU. These 420 details are acquired by (1) enquiring with the CPU at startup, or 421 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache 422 line size). It's a bit nasty in the sense that there's no obvious 423 way to stop uses of some of this info before it's ready to go. 424 See pub_core_machine.h for more information about that. 425 426 VG_(machine_get_hwcaps) may use signals (although it attempts to 427 leave signal state unchanged) and therefore should only be 428 called before m_main sets up the client's signal state. 429 */ 430 431 /* --------- State --------- */ 432 static Bool hwcaps_done = False; 433 434 /* --- all archs --- */ 435 static VexArch va = VexArch_INVALID; 436 static VexArchInfo vai; 437 438 #if defined(VGA_x86) 439 UInt VG_(machine_x86_have_mxcsr) = 0; 440 #endif 441 #if defined(VGA_ppc32) 442 UInt VG_(machine_ppc32_has_FP) = 0; 443 UInt VG_(machine_ppc32_has_VMX) = 0; 444 #endif 445 #if defined(VGA_ppc64) 446 ULong VG_(machine_ppc64_has_VMX) = 0; 447 #endif 448 #if defined(VGA_arm) 449 Int VG_(machine_arm_archlevel) = 4; 450 #endif 451 452 453 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL 454 testing, so we need a VG_MINIMAL_JMP_BUF. */ 455 #if defined(VGA_ppc32) || defined(VGA_ppc64) \ 456 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) 457 #include "pub_core_libcsetjmp.h" 458 static VG_MINIMAL_JMP_BUF(env_unsup_insn); 459 static void handler_unsup_insn ( Int x ) { 460 VG_MINIMAL_LONGJMP(env_unsup_insn); 461 } 462 #endif 463 464 465 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc 466 * handlers are installed. Determines the the sizes affected by dcbz 467 * and dcbzl instructions and updates the given VexArchInfo structure 468 * accordingly. 469 * 470 * Not very defensive: assumes that as long as the dcbz/dcbzl 471 * instructions don't raise a SIGILL, that they will zero an aligned, 472 * contiguous block of memory of a sensible size. */ 473 #if defined(VGA_ppc32) || defined(VGA_ppc64) 474 static void find_ppc_dcbz_sz(VexArchInfo *arch_info) 475 { 476 Int dcbz_szB = 0; 477 Int dcbzl_szB; 478 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */ 479 char test_block[4*MAX_DCBZL_SZB]; 480 char *aligned = test_block; 481 Int i; 482 483 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */ 484 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1)); 485 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]); 486 487 /* dcbz often clears 32B, although sometimes whatever the native cache 488 * block size is */ 489 VG_(memset)(test_block, 0xff, sizeof(test_block)); 490 __asm__ __volatile__("dcbz 0,%0" 491 : /*out*/ 492 : "r" (aligned) /*in*/ 493 : "memory" /*clobber*/); 494 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) { 495 if (!test_block[i]) 496 ++dcbz_szB; 497 } 498 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128); 499 500 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */ 501 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 502 dcbzl_szB = 0; /* indicates unsupported */ 503 } 504 else { 505 VG_(memset)(test_block, 0xff, sizeof(test_block)); 506 /* some older assemblers won't understand the dcbzl instruction 507 * variant, so we directly emit the instruction ourselves */ 508 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/ 509 : /*out*/ 510 : "r" (aligned) /*in*/ 511 : "memory", "r9" /*clobber*/); 512 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) { 513 if (!test_block[i]) 514 ++dcbzl_szB; 515 } 516 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128); 517 } 518 519 arch_info->ppc_dcbz_szB = dcbz_szB; 520 arch_info->ppc_dcbzl_szB = dcbzl_szB; 521 522 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n", 523 dcbz_szB, dcbzl_szB); 524 # undef MAX_DCBZL_SZB 525 } 526 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */ 527 528 #ifdef VGA_s390x 529 530 /* Read /proc/cpuinfo. Look for lines like these 531 532 processor 0: version = FF, identification = 0117C9, machine = 2064 533 534 and return the machine model. If the machine model could not be determined 535 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */ 536 537 static UInt VG_(get_machine_model)(void) 538 { 539 static struct model_map { 540 HChar name[5]; 541 UInt id; 542 } model_map[] = { 543 { "2064", VEX_S390X_MODEL_Z900 }, 544 { "2066", VEX_S390X_MODEL_Z800 }, 545 { "2084", VEX_S390X_MODEL_Z990 }, 546 { "2086", VEX_S390X_MODEL_Z890 }, 547 { "2094", VEX_S390X_MODEL_Z9_EC }, 548 { "2096", VEX_S390X_MODEL_Z9_BC }, 549 { "2097", VEX_S390X_MODEL_Z10_EC }, 550 { "2098", VEX_S390X_MODEL_Z10_BC }, 551 { "2817", VEX_S390X_MODEL_Z196 }, 552 { "2818", VEX_S390X_MODEL_Z114 }, 553 { "2827", VEX_S390X_MODEL_ZEC12 }, 554 { "2828", VEX_S390X_MODEL_ZBC12 }, 555 }; 556 557 Int model, n, fh; 558 SysRes fd; 559 SizeT num_bytes, file_buf_size; 560 HChar *p, *m, *model_name, *file_buf; 561 562 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 563 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 564 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN; 565 566 fh = sr_Res(fd); 567 568 /* Determine the size of /proc/cpuinfo. 569 Work around broken-ness in /proc file system implementation. 570 fstat returns a zero size for /proc/cpuinfo although it is 571 claimed to be a regular file. */ 572 num_bytes = 0; 573 file_buf_size = 1000; 574 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 575 while (42) { 576 n = VG_(read)(fh, file_buf, file_buf_size); 577 if (n < 0) break; 578 579 num_bytes += n; 580 if (n < file_buf_size) break; /* reached EOF */ 581 } 582 583 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 584 585 if (num_bytes > file_buf_size) { 586 VG_(free)( file_buf ); 587 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 588 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 589 n = VG_(read)( fh, file_buf, num_bytes ); 590 if (n < 0) num_bytes = 0; 591 } 592 593 file_buf[num_bytes] = '\0'; 594 VG_(close)(fh); 595 596 /* Parse file */ 597 model = VEX_S390X_MODEL_UNKNOWN; 598 for (p = file_buf; *p; ++p) { 599 /* Beginning of line */ 600 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue; 601 602 m = VG_(strstr)( p, "machine" ); 603 if (m == NULL) continue; 604 605 p = m + sizeof "machine" - 1; 606 while ( VG_(isspace)( *p ) || *p == '=') { 607 if (*p == '\n') goto next_line; 608 ++p; 609 } 610 611 model_name = p; 612 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) { 613 struct model_map *mm = model_map + n; 614 SizeT len = VG_(strlen)( mm->name ); 615 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 && 616 VG_(isspace)( model_name[len] )) { 617 if (mm->id < model) model = mm->id; 618 p = model_name + len; 619 break; 620 } 621 } 622 /* Skip until end-of-line */ 623 while (*p != '\n') 624 ++p; 625 next_line: ; 626 } 627 628 VG_(free)( file_buf ); 629 VG_(debugLog)(1, "machine", "model = %s\n", 630 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN" 631 : model_map[model].name); 632 return model; 633 } 634 635 #endif /* VGA_s390x */ 636 637 #if defined(VGA_mips32) || defined(VGA_mips64) 638 639 /* Read /proc/cpuinfo and return the machine model. */ 640 static UInt VG_(get_machine_model)(void) 641 { 642 const char *search_MIPS_str = "MIPS"; 643 const char *search_Broadcom_str = "Broadcom"; 644 const char *search_Netlogic_str = "Netlogic"; 645 const char *search_Cavium_str= "Cavium"; 646 Int n, fh; 647 SysRes fd; 648 SizeT num_bytes, file_buf_size; 649 HChar *file_buf; 650 651 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 652 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 653 if ( sr_isError(fd) ) return -1; 654 655 fh = sr_Res(fd); 656 657 /* Determine the size of /proc/cpuinfo. 658 Work around broken-ness in /proc file system implementation. 659 fstat returns a zero size for /proc/cpuinfo although it is 660 claimed to be a regular file. */ 661 num_bytes = 0; 662 file_buf_size = 1000; 663 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 664 while (42) { 665 n = VG_(read)(fh, file_buf, file_buf_size); 666 if (n < 0) break; 667 668 num_bytes += n; 669 if (n < file_buf_size) break; /* reached EOF */ 670 } 671 672 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 673 674 if (num_bytes > file_buf_size) { 675 VG_(free)( file_buf ); 676 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 677 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 678 n = VG_(read)( fh, file_buf, num_bytes ); 679 if (n < 0) num_bytes = 0; 680 } 681 682 file_buf[num_bytes] = '\0'; 683 VG_(close)(fh); 684 685 /* Parse file */ 686 if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL) 687 return VEX_PRID_COMP_BROADCOM; 688 if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL) 689 return VEX_PRID_COMP_NETLOGIC; 690 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL) 691 return VEX_PRID_COMP_CAVIUM; 692 if (VG_(strstr) (file_buf, search_MIPS_str) != NULL) 693 return VEX_PRID_COMP_MIPS; 694 695 /* Did not find string in the proc file. */ 696 return -1; 697 } 698 699 #endif 700 701 /* Determine what insn set and insn set variant the host has, and 702 record it. To be called once at system startup. Returns False if 703 this a CPU incapable of running Valgrind. 704 Also determine information about the caches on this host. */ 705 706 Bool VG_(machine_get_hwcaps)( void ) 707 { 708 vg_assert(hwcaps_done == False); 709 hwcaps_done = True; 710 711 // Whack default settings into vai, so that we only need to fill in 712 // any interesting bits. 713 LibVEX_default_VexArchInfo(&vai); 714 715 #if defined(VGA_x86) 716 { Bool have_sse1, have_sse2, have_cx8, have_lzcnt, have_mmxext; 717 UInt eax, ebx, ecx, edx, max_extended; 718 HChar vstr[13]; 719 vstr[0] = 0; 720 721 if (!VG_(has_cpuid)()) 722 /* we can't do cpuid at all. Give up. */ 723 return False; 724 725 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 726 if (eax < 1) 727 /* we can't ask for cpuid(x) for x > 0. Give up. */ 728 return False; 729 730 /* Get processor ID string, and max basic/extended index 731 values. */ 732 VG_(memcpy)(&vstr[0], &ebx, 4); 733 VG_(memcpy)(&vstr[4], &edx, 4); 734 VG_(memcpy)(&vstr[8], &ecx, 4); 735 vstr[12] = 0; 736 737 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 738 max_extended = eax; 739 740 /* get capabilities bits into edx */ 741 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 742 743 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */ 744 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */ 745 746 /* cmpxchg8b is a minimum requirement now; if we don't have it we 747 must simply give up. But all CPUs since Pentium-I have it, so 748 that doesn't seem like much of a restriction. */ 749 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 750 if (!have_cx8) 751 return False; 752 753 /* Figure out if this is an AMD that can do MMXEXT. */ 754 have_mmxext = False; 755 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 756 && max_extended >= 0x80000001) { 757 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 758 /* Some older AMD processors support a sse1 subset (Integer SSE). */ 759 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0); 760 } 761 762 /* Figure out if this is an AMD or Intel that can do LZCNT. */ 763 have_lzcnt = False; 764 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD") 765 || 0 == VG_(strcmp)(vstr, "GenuineIntel")) 766 && max_extended >= 0x80000001) { 767 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 768 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 769 } 770 771 /* Intel processors don't define the mmxext extension, but since it 772 is just a sse1 subset always define it when we have sse1. */ 773 if (have_sse1) 774 have_mmxext = True; 775 776 va = VexArchX86; 777 if (have_sse2 && have_sse1 && have_mmxext) { 778 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 779 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 780 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 781 if (have_lzcnt) 782 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 783 VG_(machine_x86_have_mxcsr) = 1; 784 } else if (have_sse1 && have_mmxext) { 785 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 786 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 787 VG_(machine_x86_have_mxcsr) = 1; 788 } else if (have_mmxext) { 789 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/ 790 VG_(machine_x86_have_mxcsr) = 0; 791 } else { 792 vai.hwcaps = 0; /*baseline - no sse at all*/ 793 VG_(machine_x86_have_mxcsr) = 0; 794 } 795 796 VG_(machine_get_cache_info)(&vai); 797 798 return True; 799 } 800 801 #elif defined(VGA_amd64) 802 { Bool have_sse3, have_cx8, have_cx16; 803 Bool have_lzcnt, have_avx, have_bmi, have_avx2; 804 Bool have_rdtscp; 805 UInt eax, ebx, ecx, edx, max_basic, max_extended; 806 HChar vstr[13]; 807 vstr[0] = 0; 808 809 if (!VG_(has_cpuid)()) 810 /* we can't do cpuid at all. Give up. */ 811 return False; 812 813 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 814 max_basic = eax; 815 if (max_basic < 1) 816 /* we can't ask for cpuid(x) for x > 0. Give up. */ 817 return False; 818 819 /* Get processor ID string, and max basic/extended index 820 values. */ 821 VG_(memcpy)(&vstr[0], &ebx, 4); 822 VG_(memcpy)(&vstr[4], &edx, 4); 823 VG_(memcpy)(&vstr[8], &ecx, 4); 824 vstr[12] = 0; 825 826 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 827 max_extended = eax; 828 829 /* get capabilities bits into edx */ 830 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 831 832 // we assume that SSE1 and SSE2 are available by default 833 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 834 // ssse3 is ecx:9 835 // sse41 is ecx:19 836 // sse42 is ecx:20 837 838 // osxsave is ecx:27 839 // avx is ecx:28 840 // fma is ecx:12 841 have_avx = False; 842 /* have_fma = False; */ 843 if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) { 844 /* processor supports AVX instructions and XGETBV is enabled 845 by OS */ 846 ULong w; 847 __asm__ __volatile__("movq $0,%%rcx ; " 848 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */ 849 "movq %%rax,%0" 850 :/*OUT*/"=r"(w) :/*IN*/ 851 :/*TRASH*/"rdx","rcx"); 852 if ((w & 6) == 6) { 853 /* OS has enabled both XMM and YMM state support */ 854 have_avx = True; 855 /* have_fma = (ecx & (1<<12)) != 0; */ 856 /* have_fma: Probably correct, but gcc complains due to 857 unusedness. &*/ 858 } 859 } 860 861 /* cmpxchg8b is a minimum requirement now; if we don't have it we 862 must simply give up. But all CPUs since Pentium-I have it, so 863 that doesn't seem like much of a restriction. */ 864 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 865 if (!have_cx8) 866 return False; 867 868 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */ 869 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */ 870 871 /* Figure out if this CPU can do LZCNT. */ 872 have_lzcnt = False; 873 if (max_extended >= 0x80000001) { 874 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 875 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 876 } 877 878 /* Can we do RDTSCP? */ 879 have_rdtscp = False; 880 if (max_extended >= 0x80000001) { 881 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 882 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */ 883 } 884 885 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */ 886 have_bmi = False; 887 have_avx2 = False; 888 if (have_avx && max_basic >= 7) { 889 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx); 890 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */ 891 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */ 892 } 893 894 va = VexArchAMD64; 895 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0) 896 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0) 897 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0) 898 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0) 899 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0) 900 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0) 901 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0); 902 903 VG_(machine_get_cache_info)(&vai); 904 905 return True; 906 } 907 908 #elif defined(VGA_ppc32) 909 { 910 /* Find out which subset of the ppc32 instruction set is supported by 911 verifying whether various ppc32 instructions generate a SIGILL 912 or a SIGFPE. An alternative approach is to check the AT_HWCAP and 913 AT_PLATFORM entries in the ELF auxiliary table -- see also 914 the_iifii.client_auxv in m_main.c. 915 */ 916 vki_sigset_t saved_set, tmp_set; 917 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 918 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 919 920 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 921 volatile Bool have_isa_2_07; 922 Int r; 923 924 /* This is a kludge. Really we ought to back-convert saved_act 925 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 926 since that's a no-op on all ppc32 platforms so far supported, 927 it's not worth the typing effort. At least include most basic 928 sanity check: */ 929 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 930 931 VG_(sigemptyset)(&tmp_set); 932 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 933 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 934 935 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 936 vg_assert(r == 0); 937 938 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 939 vg_assert(r == 0); 940 tmp_sigill_act = saved_sigill_act; 941 942 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 943 vg_assert(r == 0); 944 tmp_sigfpe_act = saved_sigfpe_act; 945 946 /* NODEFER: signal handler does not return (from the kernel's point of 947 view), hence if it is to successfully catch a signal more than once, 948 we need the NODEFER flag. */ 949 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 950 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 951 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 952 tmp_sigill_act.ksa_handler = handler_unsup_insn; 953 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 954 vg_assert(r == 0); 955 956 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 957 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 958 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 959 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 960 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 961 vg_assert(r == 0); 962 963 /* standard FP insns */ 964 have_F = True; 965 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 966 have_F = False; 967 } else { 968 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */ 969 } 970 971 /* Altivec insns */ 972 have_V = True; 973 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 974 have_V = False; 975 } else { 976 /* Unfortunately some older assemblers don't speak Altivec (or 977 choose not to), so to be safe we directly emit the 32-bit 978 word corresponding to "vor 0,0,0". This fixes a build 979 problem that happens on Debian 3.1 (ppc32), and probably 980 various other places. */ 981 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 982 } 983 984 /* General-Purpose optional (fsqrt, fsqrts) */ 985 have_FX = True; 986 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 987 have_FX = False; 988 } else { 989 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */ 990 } 991 992 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 993 have_GX = True; 994 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 995 have_GX = False; 996 } else { 997 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */ 998 } 999 1000 /* VSX support implies Power ISA 2.06 */ 1001 have_VX = True; 1002 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1003 have_VX = False; 1004 } else { 1005 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1006 } 1007 1008 /* Check for Decimal Floating Point (DFP) support. */ 1009 have_DFP = True; 1010 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1011 have_DFP = False; 1012 } else { 1013 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1014 } 1015 1016 /* Check for ISA 2.07 support. */ 1017 have_isa_2_07 = True; 1018 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1019 have_isa_2_07 = False; 1020 } else { 1021 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */ 1022 } 1023 1024 /* determine dcbz/dcbzl sizes while we still have the signal 1025 * handlers registered */ 1026 find_ppc_dcbz_sz(&vai); 1027 1028 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1029 vg_assert(r == 0); 1030 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1031 vg_assert(r == 0); 1032 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1033 vg_assert(r == 0); 1034 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n", 1035 (Int)have_F, (Int)have_V, (Int)have_FX, 1036 (Int)have_GX, (Int)have_VX, (Int)have_DFP, 1037 (Int)have_isa_2_07); 1038 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */ 1039 if (have_V && !have_F) 1040 have_V = False; 1041 if (have_FX && !have_F) 1042 have_FX = False; 1043 if (have_GX && !have_F) 1044 have_GX = False; 1045 1046 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0; 1047 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0; 1048 1049 va = VexArchPPC32; 1050 1051 vai.hwcaps = 0; 1052 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F; 1053 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V; 1054 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX; 1055 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX; 1056 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX; 1057 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP; 1058 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07; 1059 1060 VG_(machine_get_cache_info)(&vai); 1061 1062 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be 1063 called before we're ready to go. */ 1064 return True; 1065 } 1066 1067 #elif defined(VGA_ppc64) 1068 { 1069 /* Same instruction set detection algorithm as for ppc32. */ 1070 vki_sigset_t saved_set, tmp_set; 1071 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1072 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1073 1074 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 1075 volatile Bool have_isa_2_07; 1076 Int r; 1077 1078 /* This is a kludge. Really we ought to back-convert saved_act 1079 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1080 since that's a no-op on all ppc64 platforms so far supported, 1081 it's not worth the typing effort. At least include most basic 1082 sanity check: */ 1083 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1084 1085 VG_(sigemptyset)(&tmp_set); 1086 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1087 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1088 1089 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1090 vg_assert(r == 0); 1091 1092 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1093 vg_assert(r == 0); 1094 tmp_sigill_act = saved_sigill_act; 1095 1096 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1097 tmp_sigfpe_act = saved_sigfpe_act; 1098 1099 /* NODEFER: signal handler does not return (from the kernel's point of 1100 view), hence if it is to successfully catch a signal more than once, 1101 we need the NODEFER flag. */ 1102 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1103 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1104 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1105 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1106 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1107 1108 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1109 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1110 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1111 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1112 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1113 1114 /* standard FP insns */ 1115 have_F = True; 1116 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1117 have_F = False; 1118 } else { 1119 __asm__ __volatile__("fmr 0,0"); 1120 } 1121 1122 /* Altivec insns */ 1123 have_V = True; 1124 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1125 have_V = False; 1126 } else { 1127 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 1128 } 1129 1130 /* General-Purpose optional (fsqrt, fsqrts) */ 1131 have_FX = True; 1132 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1133 have_FX = False; 1134 } else { 1135 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/ 1136 } 1137 1138 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 1139 have_GX = True; 1140 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1141 have_GX = False; 1142 } else { 1143 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/ 1144 } 1145 1146 /* VSX support implies Power ISA 2.06 */ 1147 have_VX = True; 1148 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1149 have_VX = False; 1150 } else { 1151 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1152 } 1153 1154 /* Check for Decimal Floating Point (DFP) support. */ 1155 have_DFP = True; 1156 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1157 have_DFP = False; 1158 } else { 1159 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1160 } 1161 1162 /* Check for ISA 2.07 support. */ 1163 have_isa_2_07 = True; 1164 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1165 have_isa_2_07 = False; 1166 } else { 1167 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */ 1168 } 1169 1170 /* determine dcbz/dcbzl sizes while we still have the signal 1171 * handlers registered */ 1172 find_ppc_dcbz_sz(&vai); 1173 1174 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1175 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1176 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1177 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n", 1178 (Int)have_F, (Int)have_V, (Int)have_FX, 1179 (Int)have_GX, (Int)have_VX, (Int)have_DFP, 1180 (Int)have_isa_2_07); 1181 /* on ppc64, if we don't even have FP, just give up. */ 1182 if (!have_F) 1183 return False; 1184 1185 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0; 1186 1187 va = VexArchPPC64; 1188 1189 vai.hwcaps = 0; 1190 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V; 1191 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX; 1192 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX; 1193 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX; 1194 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP; 1195 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07; 1196 1197 VG_(machine_get_cache_info)(&vai); 1198 1199 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be 1200 called before we're ready to go. */ 1201 return True; 1202 } 1203 1204 #elif defined(VGA_s390x) 1205 1206 # include "libvex_s390x_common.h" 1207 1208 { 1209 /* Instruction set detection code borrowed from ppc above. */ 1210 vki_sigset_t saved_set, tmp_set; 1211 vki_sigaction_fromK_t saved_sigill_act; 1212 vki_sigaction_toK_t tmp_sigill_act; 1213 1214 volatile Bool have_LDISP, have_STFLE; 1215 Int i, r, model; 1216 1217 /* If the model is "unknown" don't treat this as an error. Assume 1218 this is a brand-new machine model for which we don't have the 1219 identification yet. Keeping fingers crossed. */ 1220 model = VG_(get_machine_model)(); 1221 1222 /* Unblock SIGILL and stash away the old action for that signal */ 1223 VG_(sigemptyset)(&tmp_set); 1224 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1225 1226 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1227 vg_assert(r == 0); 1228 1229 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1230 vg_assert(r == 0); 1231 tmp_sigill_act = saved_sigill_act; 1232 1233 /* NODEFER: signal handler does not return (from the kernel's point of 1234 view), hence if it is to successfully catch a signal more than once, 1235 we need the NODEFER flag. */ 1236 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1237 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1238 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1239 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1240 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1241 1242 /* Determine hwcaps. Note, we cannot use the stfle insn because it 1243 is not supported on z900. */ 1244 1245 have_LDISP = True; 1246 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1247 have_LDISP = False; 1248 } else { 1249 /* BASR loads the address of the next insn into r1. Needed to avoid 1250 a segfault in XY. */ 1251 __asm__ __volatile__("basr %%r1,%%r0\n\t" 1252 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */ 1253 ".short 0x0057" : : : "r0", "r1", "cc", "memory"); 1254 } 1255 1256 /* Check availability og STFLE. If available store facility bits 1257 in hoststfle. */ 1258 ULong hoststfle[S390_NUM_FACILITY_DW]; 1259 1260 for (i = 0; i < S390_NUM_FACILITY_DW; ++i) 1261 hoststfle[i] = 0; 1262 1263 have_STFLE = True; 1264 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1265 have_STFLE = False; 1266 } else { 1267 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1; 1268 1269 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */ 1270 : "=m" (hoststfle), "+d"(reg0) 1271 : : "cc", "memory"); 1272 } 1273 1274 /* Restore signals */ 1275 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1276 vg_assert(r == 0); 1277 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1278 vg_assert(r == 0); 1279 va = VexArchS390X; 1280 1281 vai.hwcaps = model; 1282 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE; 1283 if (have_LDISP) { 1284 /* Use long displacement only on machines >= z990. For all other 1285 machines it is millicoded and therefore slow. */ 1286 if (model >= VEX_S390X_MODEL_Z990) 1287 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP; 1288 } 1289 1290 /* Detect presence of certain facilities using the STFLE insn. 1291 Note, that these facilities were introduced at the same time or later 1292 as STFLE, so the absence of STLFE implies the absence of the facility 1293 we're trying to detect. */ 1294 struct fac_hwcaps_map { 1295 UInt installed; 1296 UInt facility_bit; 1297 UInt hwcaps_bit; 1298 const HChar name[6]; // may need adjustment for new facility names 1299 } fac_hwcaps[] = { 1300 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" }, 1301 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" }, 1302 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" }, 1303 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" }, 1304 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" }, 1305 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" }, 1306 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" }, 1307 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" }, 1308 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" }, 1309 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" }, 1310 }; 1311 1312 /* Set hwcaps according to the detected facilities */ 1313 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) { 1314 vg_assert(fac_hwcaps[i].facility_bit <= 63); // for now 1315 if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) { 1316 fac_hwcaps[i].installed = True; 1317 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit; 1318 } 1319 } 1320 1321 /* Build up a string showing the probed-for facilities */ 1322 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) * 1323 (sizeof fac_hwcaps[0].name + 3) + // %s %d 1324 7 + 1 + 4 + 2 // machine %4d 1325 + 1]; // \0 1326 HChar *p = fac_str; 1327 p += VG_(sprintf)(p, "machine %4d ", model); 1328 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) { 1329 p += VG_(sprintf)(p, " %s %1d", fac_hwcaps[i].name, 1330 fac_hwcaps[i].installed); 1331 } 1332 *p++ = '\0'; 1333 1334 VG_(debugLog)(1, "machine", "%s\n", fac_str); 1335 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1336 1337 VG_(machine_get_cache_info)(&vai); 1338 1339 return True; 1340 } 1341 1342 #elif defined(VGA_arm) 1343 { 1344 /* Same instruction set detection algorithm as for ppc32. */ 1345 vki_sigset_t saved_set, tmp_set; 1346 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1347 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1348 1349 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON; 1350 volatile Int archlevel; 1351 Int r; 1352 1353 /* This is a kludge. Really we ought to back-convert saved_act 1354 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1355 since that's a no-op on all ppc64 platforms so far supported, 1356 it's not worth the typing effort. At least include most basic 1357 sanity check: */ 1358 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1359 1360 VG_(sigemptyset)(&tmp_set); 1361 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1362 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1363 1364 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1365 vg_assert(r == 0); 1366 1367 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1368 vg_assert(r == 0); 1369 tmp_sigill_act = saved_sigill_act; 1370 1371 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1372 tmp_sigfpe_act = saved_sigfpe_act; 1373 1374 /* NODEFER: signal handler does not return (from the kernel's point of 1375 view), hence if it is to successfully catch a signal more than once, 1376 we need the NODEFER flag. */ 1377 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1378 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1379 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1380 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1381 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1382 1383 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1384 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1385 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1386 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1387 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1388 1389 /* VFP insns */ 1390 have_VFP = True; 1391 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1392 have_VFP = False; 1393 } else { 1394 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */ 1395 } 1396 /* There are several generation of VFP extension but they differs very 1397 little so for now we will not distinguish them. */ 1398 have_VFP2 = have_VFP; 1399 have_VFP3 = have_VFP; 1400 1401 /* NEON insns */ 1402 have_NEON = True; 1403 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1404 have_NEON = False; 1405 } else { 1406 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */ 1407 } 1408 1409 /* ARM architecture level */ 1410 archlevel = 5; /* v5 will be base level */ 1411 if (archlevel < 7) { 1412 archlevel = 7; 1413 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1414 archlevel = 5; 1415 } else { 1416 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */ 1417 } 1418 } 1419 if (archlevel < 6) { 1420 archlevel = 6; 1421 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1422 archlevel = 5; 1423 } else { 1424 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */ 1425 } 1426 } 1427 1428 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1429 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act); 1430 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1431 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1432 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1433 1434 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n", 1435 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3, 1436 (Int)have_NEON); 1437 1438 VG_(machine_arm_archlevel) = archlevel; 1439 1440 va = VexArchARM; 1441 1442 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel); 1443 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3; 1444 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2; 1445 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP; 1446 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1447 1448 VG_(machine_get_cache_info)(&vai); 1449 1450 return True; 1451 } 1452 1453 #elif defined(VGA_arm64) 1454 { 1455 va = VexArchARM64; 1456 1457 /* So far there are no variants. */ 1458 vai.hwcaps = 0; 1459 1460 VG_(machine_get_cache_info)(&vai); 1461 1462 /* 0 denotes 'not set'. The range of legitimate values here, 1463 after being set that is, is 2 though 17 inclusive. */ 1464 vg_assert(vai.arm64_dMinLine_lg2_szB == 0); 1465 vg_assert(vai.arm64_iMinLine_lg2_szB == 0); 1466 ULong ctr_el0; 1467 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0)); 1468 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2; 1469 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2; 1470 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, " 1471 "ctr_el0.iMinLine_szB = %d\n", 1472 1 << vai.arm64_dMinLine_lg2_szB, 1473 1 << vai.arm64_iMinLine_lg2_szB); 1474 1475 return True; 1476 } 1477 1478 #elif defined(VGA_mips32) 1479 { 1480 /* Define the position of F64 bit in FIR register. */ 1481 # define FP64 22 1482 va = VexArchMIPS32; 1483 UInt model = VG_(get_machine_model)(); 1484 if (model == -1) 1485 return False; 1486 1487 vai.hwcaps = model; 1488 1489 /* Same instruction set detection algorithm as for ppc32/arm... */ 1490 vki_sigset_t saved_set, tmp_set; 1491 vki_sigaction_fromK_t saved_sigill_act; 1492 vki_sigaction_toK_t tmp_sigill_act; 1493 1494 volatile Bool have_DSP, have_DSPr2; 1495 Int r; 1496 1497 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1498 1499 VG_(sigemptyset)(&tmp_set); 1500 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1501 1502 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1503 vg_assert(r == 0); 1504 1505 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1506 vg_assert(r == 0); 1507 tmp_sigill_act = saved_sigill_act; 1508 1509 /* NODEFER: signal handler does not return (from the kernel's point of 1510 view), hence if it is to successfully catch a signal more than once, 1511 we need the NODEFER flag. */ 1512 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1513 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1514 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1515 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1516 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1517 1518 if (model == VEX_PRID_COMP_MIPS) { 1519 /* DSPr2 instructions. */ 1520 have_DSPr2 = True; 1521 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1522 have_DSPr2 = False; 1523 } else { 1524 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */ 1525 } 1526 if (have_DSPr2) { 1527 /* We assume it's 74K, since it can run DSPr2. */ 1528 vai.hwcaps |= VEX_PRID_IMP_74K; 1529 } else { 1530 /* DSP instructions. */ 1531 have_DSP = True; 1532 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1533 have_DSP = False; 1534 } else { 1535 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */ 1536 } 1537 if (have_DSP) { 1538 /* We assume it's 34K, since it has support for DSP. */ 1539 vai.hwcaps |= VEX_PRID_IMP_34K; 1540 } 1541 } 1542 } 1543 1544 /* Check if CPU has FPU and 32 dbl. prec. FP registers */ 1545 int FIR = 0; 1546 __asm__ __volatile__( 1547 "cfc1 %0, $0" "\n\t" 1548 : "=r" (FIR) 1549 ); 1550 if (FIR & (1 << FP64)) { 1551 vai.hwcaps |= VEX_PRID_CPU_32FPR; 1552 } 1553 1554 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1555 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1556 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1557 1558 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1559 VG_(machine_get_cache_info)(&vai); 1560 1561 return True; 1562 } 1563 1564 #elif defined(VGA_mips64) 1565 { 1566 va = VexArchMIPS64; 1567 UInt model = VG_(get_machine_model)(); 1568 if (model== -1) 1569 return False; 1570 1571 vai.hwcaps = model; 1572 1573 VG_(machine_get_cache_info)(&vai); 1574 1575 return True; 1576 } 1577 1578 #else 1579 # error "Unknown arch" 1580 #endif 1581 } 1582 1583 /* Notify host cpu instruction cache line size. */ 1584 #if defined(VGA_ppc32) 1585 void VG_(machine_ppc32_set_clszB)( Int szB ) 1586 { 1587 vg_assert(hwcaps_done); 1588 1589 /* Either the value must not have been set yet (zero) or we can 1590 tolerate it being set to the same value multiple times, as the 1591 stack scanning logic in m_main is a bit stupid. */ 1592 vg_assert(vai.ppc_icache_line_szB == 0 1593 || vai.ppc_icache_line_szB == szB); 1594 1595 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128); 1596 vai.ppc_icache_line_szB = szB; 1597 } 1598 #endif 1599 1600 1601 /* Notify host cpu instruction cache line size. */ 1602 #if defined(VGA_ppc64) 1603 void VG_(machine_ppc64_set_clszB)( Int szB ) 1604 { 1605 vg_assert(hwcaps_done); 1606 1607 /* Either the value must not have been set yet (zero) or we can 1608 tolerate it being set to the same value multiple times, as the 1609 stack scanning logic in m_main is a bit stupid. */ 1610 vg_assert(vai.ppc_icache_line_szB == 0 1611 || vai.ppc_icache_line_szB == szB); 1612 1613 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128); 1614 vai.ppc_icache_line_szB = szB; 1615 } 1616 #endif 1617 1618 1619 /* Notify host's ability to handle NEON instructions. */ 1620 #if defined(VGA_arm) 1621 void VG_(machine_arm_set_has_NEON)( Bool has_neon ) 1622 { 1623 vg_assert(hwcaps_done); 1624 /* There's nothing else we can sanity check. */ 1625 1626 if (has_neon) { 1627 vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1628 } else { 1629 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON; 1630 } 1631 } 1632 #endif 1633 1634 1635 /* Fetch host cpu info, once established. */ 1636 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa, 1637 /*OUT*/VexArchInfo* pVai ) 1638 { 1639 vg_assert(hwcaps_done); 1640 if (pVa) *pVa = va; 1641 if (pVai) *pVai = vai; 1642 } 1643 1644 1645 /* Returns the size of the largest guest register that we will 1646 simulate in this run. This depends on both the guest architecture 1647 and on the specific capabilities we are simulating for that guest 1648 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16 1649 or 32. General rule: if in doubt, return a value larger than 1650 reality. 1651 1652 This information is needed by Cachegrind and Callgrind to decide 1653 what the minimum cache line size they are prepared to simulate is. 1654 Basically require that the minimum cache line size is at least as 1655 large as the largest register that might get transferred to/from 1656 memory, so as to guarantee that any such transaction can straddle 1657 at most 2 cache lines. 1658 */ 1659 Int VG_(machine_get_size_of_largest_guest_register) ( void ) 1660 { 1661 vg_assert(hwcaps_done); 1662 /* Once hwcaps_done is True, we can fish around inside va/vai to 1663 find the information we need. */ 1664 1665 # if defined(VGA_x86) 1666 vg_assert(va == VexArchX86); 1667 /* We don't support AVX, so 32 is out. At the other end, even if 1668 we don't support any SSE, the X87 can generate 10 byte 1669 transfers, so let's say 16 to be on the safe side. Hence the 1670 answer is always 16. */ 1671 return 16; 1672 1673 # elif defined(VGA_amd64) 1674 /* if AVX then 32 else 16 */ 1675 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16; 1676 1677 # elif defined(VGA_ppc32) 1678 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1679 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16; 1680 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16; 1681 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16; 1682 return 8; 1683 1684 # elif defined(VGA_ppc64) 1685 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1686 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16; 1687 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16; 1688 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16; 1689 return 8; 1690 1691 # elif defined(VGA_s390x) 1692 return 8; 1693 1694 # elif defined(VGA_arm) 1695 /* Really it depends whether or not we have NEON, but let's just 1696 assume we always do. */ 1697 return 16; 1698 1699 # elif defined(VGA_arm64) 1700 /* ARM64 always has Neon, AFAICS. */ 1701 return 16; 1702 1703 # elif defined(VGA_mips32) 1704 /* The guest state implies 4, but that can't really be true, can 1705 it? */ 1706 return 8; 1707 1708 # elif defined(VGA_mips64) 1709 return 8; 1710 1711 # else 1712 # error "Unknown arch" 1713 # endif 1714 } 1715 1716 1717 // Given a pointer to a function as obtained by "& functionname" in C, 1718 // produce a pointer to the actual entry point for the function. 1719 void* VG_(fnptr_to_fnentry)( void* f ) 1720 { 1721 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \ 1722 || defined(VGP_arm_linux) \ 1723 || defined(VGP_ppc32_linux) || defined(VGO_darwin) \ 1724 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \ 1725 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) 1726 return f; 1727 # elif defined(VGP_ppc64_linux) 1728 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a 1729 3-word function descriptor, of which the first word is the entry 1730 address. */ 1731 UWord* descr = (UWord*)f; 1732 return (void*)(descr[0]); 1733 # else 1734 # error "Unknown platform" 1735 # endif 1736 } 1737 1738 /*--------------------------------------------------------------------*/ 1739 /*--- end ---*/ 1740 /*--------------------------------------------------------------------*/ 1741