1 /*--------------------------------------------------------------------*/ 2 /*--- Machine-related stuff. m_machine.c ---*/ 3 /*--------------------------------------------------------------------*/ 4 5 /* 6 This file is part of Valgrind, a dynamic binary instrumentation 7 framework. 8 9 Copyright (C) 2000-2017 Julian Seward 10 jseward (at) acm.org 11 12 This program is free software; you can redistribute it and/or 13 modify it under the terms of the GNU General Public License as 14 published by the Free Software Foundation; either version 2 of the 15 License, or (at your option) any later version. 16 17 This program is distributed in the hope that it will be useful, but 18 WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 General Public License for more details. 21 22 You should have received a copy of the GNU General Public License 23 along with this program; if not, write to the Free Software 24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 25 02111-1307, USA. 26 27 The GNU General Public License is contained in the file COPYING. 28 */ 29 30 #include "pub_core_basics.h" 31 #include "pub_core_vki.h" 32 #include "pub_core_threadstate.h" 33 #include "pub_core_libcassert.h" 34 #include "pub_core_libcbase.h" 35 #include "pub_core_libcfile.h" 36 #include "pub_core_libcprint.h" 37 #include "pub_core_libcproc.h" 38 #include "pub_core_mallocfree.h" 39 #include "pub_core_machine.h" 40 #include "pub_core_cpuid.h" 41 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE 42 #include "pub_core_debuglog.h" 43 44 45 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR) 46 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR) 47 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR) 48 49 Addr VG_(get_IP) ( ThreadId tid ) { 50 return INSTR_PTR( VG_(threads)[tid].arch ); 51 } 52 Addr VG_(get_SP) ( ThreadId tid ) { 53 return STACK_PTR( VG_(threads)[tid].arch ); 54 } 55 Addr VG_(get_FP) ( ThreadId tid ) { 56 return FRAME_PTR( VG_(threads)[tid].arch ); 57 } 58 59 void VG_(set_IP) ( ThreadId tid, Addr ip ) { 60 INSTR_PTR( VG_(threads)[tid].arch ) = ip; 61 } 62 void VG_(set_SP) ( ThreadId tid, Addr sp ) { 63 STACK_PTR( VG_(threads)[tid].arch ) = sp; 64 } 65 66 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs, 67 ThreadId tid ) 68 { 69 # if defined(VGA_x86) 70 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP; 71 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP; 72 regs->misc.X86.r_ebp 73 = VG_(threads)[tid].arch.vex.guest_EBP; 74 # elif defined(VGA_amd64) 75 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP; 76 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP; 77 regs->misc.AMD64.r_rbp 78 = VG_(threads)[tid].arch.vex.guest_RBP; 79 # elif defined(VGA_ppc32) 80 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA; 81 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1; 82 regs->misc.PPC32.r_lr 83 = VG_(threads)[tid].arch.vex.guest_LR; 84 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le) 85 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA; 86 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1; 87 regs->misc.PPC64.r_lr 88 = VG_(threads)[tid].arch.vex.guest_LR; 89 # elif defined(VGA_arm) 90 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T; 91 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13; 92 regs->misc.ARM.r14 93 = VG_(threads)[tid].arch.vex.guest_R14; 94 regs->misc.ARM.r12 95 = VG_(threads)[tid].arch.vex.guest_R12; 96 regs->misc.ARM.r11 97 = VG_(threads)[tid].arch.vex.guest_R11; 98 regs->misc.ARM.r7 99 = VG_(threads)[tid].arch.vex.guest_R7; 100 # elif defined(VGA_arm64) 101 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 102 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP; 103 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29; 104 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30; 105 # elif defined(VGA_s390x) 106 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA; 107 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP; 108 regs->misc.S390X.r_fp 109 = VG_(threads)[tid].arch.vex.guest_FP; 110 regs->misc.S390X.r_lr 111 = VG_(threads)[tid].arch.vex.guest_LR; 112 # elif defined(VGA_mips32) 113 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 114 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 115 regs->misc.MIPS32.r30 116 = VG_(threads)[tid].arch.vex.guest_r30; 117 regs->misc.MIPS32.r31 118 = VG_(threads)[tid].arch.vex.guest_r31; 119 regs->misc.MIPS32.r28 120 = VG_(threads)[tid].arch.vex.guest_r28; 121 # elif defined(VGA_mips64) 122 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 123 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 124 regs->misc.MIPS64.r30 125 = VG_(threads)[tid].arch.vex.guest_r30; 126 regs->misc.MIPS64.r31 127 = VG_(threads)[tid].arch.vex.guest_r31; 128 regs->misc.MIPS64.r28 129 = VG_(threads)[tid].arch.vex.guest_r28; 130 # else 131 # error "Unknown arch" 132 # endif 133 } 134 135 void 136 VG_(get_shadow_regs_area) ( ThreadId tid, 137 /*DST*/UChar* dst, 138 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size ) 139 { 140 void* src; 141 ThreadState* tst; 142 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 143 vg_assert(VG_(is_valid_tid)(tid)); 144 // Bounds check 145 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 146 vg_assert(offset + size <= sizeof(VexGuestArchState)); 147 // Copy 148 tst = & VG_(threads)[tid]; 149 src = NULL; 150 switch (shadowNo) { 151 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 152 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 153 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 154 } 155 vg_assert(src != NULL); 156 VG_(memcpy)( dst, src, size); 157 } 158 159 void 160 VG_(set_shadow_regs_area) ( ThreadId tid, 161 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size, 162 /*SRC*/const UChar* src ) 163 { 164 void* dst; 165 ThreadState* tst; 166 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 167 vg_assert(VG_(is_valid_tid)(tid)); 168 // Bounds check 169 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 170 vg_assert(offset + size <= sizeof(VexGuestArchState)); 171 // Copy 172 tst = & VG_(threads)[tid]; 173 dst = NULL; 174 switch (shadowNo) { 175 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 176 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 177 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 178 } 179 vg_assert(dst != NULL); 180 VG_(memcpy)( dst, src, size); 181 } 182 183 184 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, 185 const HChar*, Addr)) 186 { 187 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex); 188 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid); 189 #if defined(VGA_x86) 190 (*f)(tid, "EAX", vex->guest_EAX); 191 (*f)(tid, "ECX", vex->guest_ECX); 192 (*f)(tid, "EDX", vex->guest_EDX); 193 (*f)(tid, "EBX", vex->guest_EBX); 194 (*f)(tid, "ESI", vex->guest_ESI); 195 (*f)(tid, "EDI", vex->guest_EDI); 196 (*f)(tid, "ESP", vex->guest_ESP); 197 (*f)(tid, "EBP", vex->guest_EBP); 198 #elif defined(VGA_amd64) 199 (*f)(tid, "RAX", vex->guest_RAX); 200 (*f)(tid, "RCX", vex->guest_RCX); 201 (*f)(tid, "RDX", vex->guest_RDX); 202 (*f)(tid, "RBX", vex->guest_RBX); 203 (*f)(tid, "RSI", vex->guest_RSI); 204 (*f)(tid, "RDI", vex->guest_RDI); 205 (*f)(tid, "RSP", vex->guest_RSP); 206 (*f)(tid, "RBP", vex->guest_RBP); 207 (*f)(tid, "R8" , vex->guest_R8 ); 208 (*f)(tid, "R9" , vex->guest_R9 ); 209 (*f)(tid, "R10", vex->guest_R10); 210 (*f)(tid, "R11", vex->guest_R11); 211 (*f)(tid, "R12", vex->guest_R12); 212 (*f)(tid, "R13", vex->guest_R13); 213 (*f)(tid, "R14", vex->guest_R14); 214 (*f)(tid, "R15", vex->guest_R15); 215 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) 216 (*f)(tid, "GPR0" , vex->guest_GPR0 ); 217 (*f)(tid, "GPR1" , vex->guest_GPR1 ); 218 (*f)(tid, "GPR2" , vex->guest_GPR2 ); 219 (*f)(tid, "GPR3" , vex->guest_GPR3 ); 220 (*f)(tid, "GPR4" , vex->guest_GPR4 ); 221 (*f)(tid, "GPR5" , vex->guest_GPR5 ); 222 (*f)(tid, "GPR6" , vex->guest_GPR6 ); 223 (*f)(tid, "GPR7" , vex->guest_GPR7 ); 224 (*f)(tid, "GPR8" , vex->guest_GPR8 ); 225 (*f)(tid, "GPR9" , vex->guest_GPR9 ); 226 (*f)(tid, "GPR10", vex->guest_GPR10); 227 (*f)(tid, "GPR11", vex->guest_GPR11); 228 (*f)(tid, "GPR12", vex->guest_GPR12); 229 (*f)(tid, "GPR13", vex->guest_GPR13); 230 (*f)(tid, "GPR14", vex->guest_GPR14); 231 (*f)(tid, "GPR15", vex->guest_GPR15); 232 (*f)(tid, "GPR16", vex->guest_GPR16); 233 (*f)(tid, "GPR17", vex->guest_GPR17); 234 (*f)(tid, "GPR18", vex->guest_GPR18); 235 (*f)(tid, "GPR19", vex->guest_GPR19); 236 (*f)(tid, "GPR20", vex->guest_GPR20); 237 (*f)(tid, "GPR21", vex->guest_GPR21); 238 (*f)(tid, "GPR22", vex->guest_GPR22); 239 (*f)(tid, "GPR23", vex->guest_GPR23); 240 (*f)(tid, "GPR24", vex->guest_GPR24); 241 (*f)(tid, "GPR25", vex->guest_GPR25); 242 (*f)(tid, "GPR26", vex->guest_GPR26); 243 (*f)(tid, "GPR27", vex->guest_GPR27); 244 (*f)(tid, "GPR28", vex->guest_GPR28); 245 (*f)(tid, "GPR29", vex->guest_GPR29); 246 (*f)(tid, "GPR30", vex->guest_GPR30); 247 (*f)(tid, "GPR31", vex->guest_GPR31); 248 (*f)(tid, "CTR" , vex->guest_CTR ); 249 (*f)(tid, "LR" , vex->guest_LR ); 250 #elif defined(VGA_arm) 251 (*f)(tid, "R0" , vex->guest_R0 ); 252 (*f)(tid, "R1" , vex->guest_R1 ); 253 (*f)(tid, "R2" , vex->guest_R2 ); 254 (*f)(tid, "R3" , vex->guest_R3 ); 255 (*f)(tid, "R4" , vex->guest_R4 ); 256 (*f)(tid, "R5" , vex->guest_R5 ); 257 (*f)(tid, "R6" , vex->guest_R6 ); 258 (*f)(tid, "R8" , vex->guest_R8 ); 259 (*f)(tid, "R9" , vex->guest_R9 ); 260 (*f)(tid, "R10", vex->guest_R10); 261 (*f)(tid, "R11", vex->guest_R11); 262 (*f)(tid, "R12", vex->guest_R12); 263 (*f)(tid, "R13", vex->guest_R13); 264 (*f)(tid, "R14", vex->guest_R14); 265 #elif defined(VGA_s390x) 266 (*f)(tid, "r0" , vex->guest_r0 ); 267 (*f)(tid, "r1" , vex->guest_r1 ); 268 (*f)(tid, "r2" , vex->guest_r2 ); 269 (*f)(tid, "r3" , vex->guest_r3 ); 270 (*f)(tid, "r4" , vex->guest_r4 ); 271 (*f)(tid, "r5" , vex->guest_r5 ); 272 (*f)(tid, "r6" , vex->guest_r6 ); 273 (*f)(tid, "r7" , vex->guest_r7 ); 274 (*f)(tid, "r8" , vex->guest_r8 ); 275 (*f)(tid, "r9" , vex->guest_r9 ); 276 (*f)(tid, "r10", vex->guest_r10); 277 (*f)(tid, "r11", vex->guest_r11); 278 (*f)(tid, "r12", vex->guest_r12); 279 (*f)(tid, "r13", vex->guest_r13); 280 (*f)(tid, "r14", vex->guest_r14); 281 (*f)(tid, "r15", vex->guest_r15); 282 #elif defined(VGA_mips32) || defined(VGA_mips64) 283 (*f)(tid, "r0" , vex->guest_r0 ); 284 (*f)(tid, "r1" , vex->guest_r1 ); 285 (*f)(tid, "r2" , vex->guest_r2 ); 286 (*f)(tid, "r3" , vex->guest_r3 ); 287 (*f)(tid, "r4" , vex->guest_r4 ); 288 (*f)(tid, "r5" , vex->guest_r5 ); 289 (*f)(tid, "r6" , vex->guest_r6 ); 290 (*f)(tid, "r7" , vex->guest_r7 ); 291 (*f)(tid, "r8" , vex->guest_r8 ); 292 (*f)(tid, "r9" , vex->guest_r9 ); 293 (*f)(tid, "r10", vex->guest_r10); 294 (*f)(tid, "r11", vex->guest_r11); 295 (*f)(tid, "r12", vex->guest_r12); 296 (*f)(tid, "r13", vex->guest_r13); 297 (*f)(tid, "r14", vex->guest_r14); 298 (*f)(tid, "r15", vex->guest_r15); 299 (*f)(tid, "r16", vex->guest_r16); 300 (*f)(tid, "r17", vex->guest_r17); 301 (*f)(tid, "r18", vex->guest_r18); 302 (*f)(tid, "r19", vex->guest_r19); 303 (*f)(tid, "r20", vex->guest_r20); 304 (*f)(tid, "r21", vex->guest_r21); 305 (*f)(tid, "r22", vex->guest_r22); 306 (*f)(tid, "r23", vex->guest_r23); 307 (*f)(tid, "r24", vex->guest_r24); 308 (*f)(tid, "r25", vex->guest_r25); 309 (*f)(tid, "r26", vex->guest_r26); 310 (*f)(tid, "r27", vex->guest_r27); 311 (*f)(tid, "r28", vex->guest_r28); 312 (*f)(tid, "r29", vex->guest_r29); 313 (*f)(tid, "r30", vex->guest_r30); 314 (*f)(tid, "r31", vex->guest_r31); 315 #elif defined(VGA_arm64) 316 (*f)(tid, "x0" , vex->guest_X0 ); 317 (*f)(tid, "x1" , vex->guest_X1 ); 318 (*f)(tid, "x2" , vex->guest_X2 ); 319 (*f)(tid, "x3" , vex->guest_X3 ); 320 (*f)(tid, "x4" , vex->guest_X4 ); 321 (*f)(tid, "x5" , vex->guest_X5 ); 322 (*f)(tid, "x6" , vex->guest_X6 ); 323 (*f)(tid, "x7" , vex->guest_X7 ); 324 (*f)(tid, "x8" , vex->guest_X8 ); 325 (*f)(tid, "x9" , vex->guest_X9 ); 326 (*f)(tid, "x10", vex->guest_X10); 327 (*f)(tid, "x11", vex->guest_X11); 328 (*f)(tid, "x12", vex->guest_X12); 329 (*f)(tid, "x13", vex->guest_X13); 330 (*f)(tid, "x14", vex->guest_X14); 331 (*f)(tid, "x15", vex->guest_X15); 332 (*f)(tid, "x16", vex->guest_X16); 333 (*f)(tid, "x17", vex->guest_X17); 334 (*f)(tid, "x18", vex->guest_X18); 335 (*f)(tid, "x19", vex->guest_X19); 336 (*f)(tid, "x20", vex->guest_X20); 337 (*f)(tid, "x21", vex->guest_X21); 338 (*f)(tid, "x22", vex->guest_X22); 339 (*f)(tid, "x23", vex->guest_X23); 340 (*f)(tid, "x24", vex->guest_X24); 341 (*f)(tid, "x25", vex->guest_X25); 342 (*f)(tid, "x26", vex->guest_X26); 343 (*f)(tid, "x27", vex->guest_X27); 344 (*f)(tid, "x28", vex->guest_X28); 345 (*f)(tid, "x29", vex->guest_X29); 346 (*f)(tid, "x30", vex->guest_X30); 347 #else 348 # error Unknown arch 349 #endif 350 } 351 352 353 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord)) 354 { 355 ThreadId tid; 356 357 for (tid = 1; tid < VG_N_THREADS; tid++) { 358 if (VG_(is_valid_tid)(tid) 359 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) { 360 // live thread or thread instructed to die by another thread that 361 // called exit. 362 apply_to_GPs_of_tid(tid, f); 363 } 364 } 365 } 366 367 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid) 368 { 369 *tid = (ThreadId)(-1); 370 } 371 372 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid, 373 /*OUT*/Addr* stack_min, 374 /*OUT*/Addr* stack_max) 375 { 376 ThreadId i; 377 for (i = (*tid)+1; i < VG_N_THREADS; i++) { 378 if (i == VG_INVALID_THREADID) 379 continue; 380 if (VG_(threads)[i].status != VgTs_Empty) { 381 *tid = i; 382 *stack_min = VG_(get_SP)(i); 383 *stack_max = VG_(threads)[i].client_stack_highest_byte; 384 return True; 385 } 386 } 387 return False; 388 } 389 390 Addr VG_(thread_get_stack_max)(ThreadId tid) 391 { 392 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 393 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 394 return VG_(threads)[tid].client_stack_highest_byte; 395 } 396 397 SizeT VG_(thread_get_stack_size)(ThreadId tid) 398 { 399 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 400 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 401 return VG_(threads)[tid].client_stack_szB; 402 } 403 404 Addr VG_(thread_get_altstack_min)(ThreadId tid) 405 { 406 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 407 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 408 return (Addr)VG_(threads)[tid].altstack.ss_sp; 409 } 410 411 SizeT VG_(thread_get_altstack_size)(ThreadId tid) 412 { 413 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 414 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 415 return VG_(threads)[tid].altstack.ss_size; 416 } 417 418 //------------------------------------------------------------- 419 /* Details about the capabilities of the underlying (host) CPU. These 420 details are acquired by (1) enquiring with the CPU at startup, or 421 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache 422 line size). It's a bit nasty in the sense that there's no obvious 423 way to stop uses of some of this info before it's ready to go. 424 See pub_core_machine.h for more information about that. 425 426 VG_(machine_get_hwcaps) may use signals (although it attempts to 427 leave signal state unchanged) and therefore should only be 428 called before m_main sets up the client's signal state. 429 */ 430 431 /* --------- State --------- */ 432 static Bool hwcaps_done = False; 433 434 /* --- all archs --- */ 435 static VexArch va = VexArch_INVALID; 436 static VexArchInfo vai; 437 438 #if defined(VGA_x86) 439 UInt VG_(machine_x86_have_mxcsr) = 0; 440 #endif 441 #if defined(VGA_ppc32) 442 UInt VG_(machine_ppc32_has_FP) = 0; 443 UInt VG_(machine_ppc32_has_VMX) = 0; 444 #endif 445 #if defined(VGA_ppc64be) || defined(VGA_ppc64le) 446 ULong VG_(machine_ppc64_has_VMX) = 0; 447 #endif 448 #if defined(VGA_arm) 449 Int VG_(machine_arm_archlevel) = 4; 450 #endif 451 452 453 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL 454 testing, so we need a VG_MINIMAL_JMP_BUF. */ 455 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ 456 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) 457 #include "pub_core_libcsetjmp.h" 458 static VG_MINIMAL_JMP_BUF(env_unsup_insn); 459 static void handler_unsup_insn ( Int x ) { 460 VG_MINIMAL_LONGJMP(env_unsup_insn); 461 } 462 #endif 463 464 465 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc 466 * handlers are installed. Determines the sizes affected by dcbz 467 * and dcbzl instructions and updates the given VexArchInfo structure 468 * accordingly. 469 * 470 * Not very defensive: assumes that as long as the dcbz/dcbzl 471 * instructions don't raise a SIGILL, that they will zero an aligned, 472 * contiguous block of memory of a sensible size. */ 473 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) 474 static void find_ppc_dcbz_sz(VexArchInfo *arch_info) 475 { 476 Int dcbz_szB = 0; 477 Int dcbzl_szB; 478 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */ 479 char test_block[4*MAX_DCBZL_SZB]; 480 char *aligned = test_block; 481 Int i; 482 483 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */ 484 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1)); 485 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]); 486 487 /* dcbz often clears 32B, although sometimes whatever the native cache 488 * block size is */ 489 VG_(memset)(test_block, 0xff, sizeof(test_block)); 490 __asm__ __volatile__("dcbz 0,%0" 491 : /*out*/ 492 : "r" (aligned) /*in*/ 493 : "memory" /*clobber*/); 494 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) { 495 if (!test_block[i]) 496 ++dcbz_szB; 497 } 498 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128); 499 500 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */ 501 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 502 dcbzl_szB = 0; /* indicates unsupported */ 503 } 504 else { 505 VG_(memset)(test_block, 0xff, sizeof(test_block)); 506 /* some older assemblers won't understand the dcbzl instruction 507 * variant, so we directly emit the instruction ourselves */ 508 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/ 509 : /*out*/ 510 : "r" (aligned) /*in*/ 511 : "memory", "r9" /*clobber*/); 512 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) { 513 if (!test_block[i]) 514 ++dcbzl_szB; 515 } 516 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128); 517 } 518 519 arch_info->ppc_dcbz_szB = dcbz_szB; 520 arch_info->ppc_dcbzl_szB = dcbzl_szB; 521 522 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n", 523 dcbz_szB, dcbzl_szB); 524 # undef MAX_DCBZL_SZB 525 } 526 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */ 527 528 #ifdef VGA_s390x 529 530 /* Read /proc/cpuinfo. Look for lines like these 531 532 processor 0: version = FF, identification = 0117C9, machine = 2064 533 534 and return the machine model. If the machine model could not be determined 535 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */ 536 537 static UInt VG_(get_machine_model)(void) 538 { 539 static struct model_map { 540 const HChar name[5]; 541 UInt id; 542 } model_map[] = { 543 { "2064", VEX_S390X_MODEL_Z900 }, 544 { "2066", VEX_S390X_MODEL_Z800 }, 545 { "2084", VEX_S390X_MODEL_Z990 }, 546 { "2086", VEX_S390X_MODEL_Z890 }, 547 { "2094", VEX_S390X_MODEL_Z9_EC }, 548 { "2096", VEX_S390X_MODEL_Z9_BC }, 549 { "2097", VEX_S390X_MODEL_Z10_EC }, 550 { "2098", VEX_S390X_MODEL_Z10_BC }, 551 { "2817", VEX_S390X_MODEL_Z196 }, 552 { "2818", VEX_S390X_MODEL_Z114 }, 553 { "2827", VEX_S390X_MODEL_ZEC12 }, 554 { "2828", VEX_S390X_MODEL_ZBC12 }, 555 { "2964", VEX_S390X_MODEL_Z13 }, 556 { "2965", VEX_S390X_MODEL_Z13S }, 557 }; 558 559 Int model, n, fh; 560 SysRes fd; 561 SizeT num_bytes, file_buf_size; 562 HChar *p, *m, *model_name, *file_buf; 563 564 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 565 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 566 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN; 567 568 fh = sr_Res(fd); 569 570 /* Determine the size of /proc/cpuinfo. 571 Work around broken-ness in /proc file system implementation. 572 fstat returns a zero size for /proc/cpuinfo although it is 573 claimed to be a regular file. */ 574 num_bytes = 0; 575 file_buf_size = 1000; 576 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 577 while (42) { 578 n = VG_(read)(fh, file_buf, file_buf_size); 579 if (n < 0) break; 580 581 num_bytes += n; 582 if (n < file_buf_size) break; /* reached EOF */ 583 } 584 585 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 586 587 if (num_bytes > file_buf_size) { 588 VG_(free)( file_buf ); 589 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 590 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 591 n = VG_(read)( fh, file_buf, num_bytes ); 592 if (n < 0) num_bytes = 0; 593 } 594 595 file_buf[num_bytes] = '\0'; 596 VG_(close)(fh); 597 598 /* Parse file */ 599 model = VEX_S390X_MODEL_UNKNOWN; 600 for (p = file_buf; *p; ++p) { 601 /* Beginning of line */ 602 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue; 603 604 m = VG_(strstr)( p, "machine" ); 605 if (m == NULL) continue; 606 607 p = m + sizeof "machine" - 1; 608 while ( VG_(isspace)( *p ) || *p == '=') { 609 if (*p == '\n') goto next_line; 610 ++p; 611 } 612 613 model_name = p; 614 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) { 615 struct model_map *mm = model_map + n; 616 SizeT len = VG_(strlen)( mm->name ); 617 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 && 618 VG_(isspace)( model_name[len] )) { 619 if (mm->id < model) model = mm->id; 620 p = model_name + len; 621 break; 622 } 623 } 624 /* Skip until end-of-line */ 625 while (*p != '\n') 626 ++p; 627 next_line: ; 628 } 629 630 VG_(free)( file_buf ); 631 VG_(debugLog)(1, "machine", "model = %s\n", 632 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN" 633 : model_map[model].name); 634 return model; 635 } 636 637 #endif /* defined(VGA_s390x) */ 638 639 #if defined(VGA_mips32) || defined(VGA_mips64) 640 641 /* 642 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not 643 * determine what CPU it is (it searches only for the models that are or may be 644 * supported by Valgrind). 645 */ 646 static Bool VG_(parse_cpuinfo)(void) 647 { 648 const char *search_Broadcom_str = "cpu model\t\t: Broadcom"; 649 const char *search_Cavium_str= "cpu model\t\t: Cavium"; 650 const char *search_Ingenic_str= "cpu model\t\t: Ingenic"; 651 const char *search_Loongson_str= "cpu model\t\t: ICT Loongson"; 652 const char *search_MIPS_str = "cpu model\t\t: MIPS"; 653 const char *search_Netlogic_str = "cpu model\t\t: Netlogic"; 654 655 Int n, fh; 656 SysRes fd; 657 SizeT num_bytes, file_buf_size; 658 HChar *file_buf, *isa; 659 660 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 661 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 662 if ( sr_isError(fd) ) return False; 663 664 fh = sr_Res(fd); 665 666 /* Determine the size of /proc/cpuinfo. 667 Work around broken-ness in /proc file system implementation. 668 fstat returns a zero size for /proc/cpuinfo although it is 669 claimed to be a regular file. */ 670 num_bytes = 0; 671 file_buf_size = 1000; 672 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 673 while (42) { 674 n = VG_(read)(fh, file_buf, file_buf_size); 675 if (n < 0) break; 676 677 num_bytes += n; 678 if (n < file_buf_size) break; /* reached EOF */ 679 } 680 681 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 682 683 if (num_bytes > file_buf_size) { 684 VG_(free)( file_buf ); 685 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 686 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 687 n = VG_(read)( fh, file_buf, num_bytes ); 688 if (n < 0) num_bytes = 0; 689 } 690 691 file_buf[num_bytes] = '\0'; 692 VG_(close)(fh); 693 694 /* Parse file */ 695 if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL) 696 vai.hwcaps = VEX_PRID_COMP_BROADCOM; 697 else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL) 698 vai.hwcaps = VEX_PRID_COMP_NETLOGIC; 699 else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL) 700 vai.hwcaps = VEX_PRID_COMP_CAVIUM; 701 else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL) 702 vai.hwcaps = VEX_PRID_COMP_MIPS; 703 else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL) 704 vai.hwcaps = VEX_PRID_COMP_INGENIC_E1; 705 else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL) 706 vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64); 707 else { 708 /* Did not find string in the proc file. */ 709 vai.hwcaps = 0; 710 VG_(free)(file_buf); 711 return False; 712 } 713 714 isa = VG_(strstr)(file_buf, "isa\t\t\t: "); 715 716 if (NULL != isa) { 717 if (VG_(strstr) (isa, "mips32r1") != NULL) 718 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1; 719 if (VG_(strstr) (isa, "mips32r2") != NULL) 720 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2; 721 if (VG_(strstr) (isa, "mips32r6") != NULL) 722 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6; 723 if (VG_(strstr) (isa, "mips64r1") != NULL) 724 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1; 725 if (VG_(strstr) (isa, "mips64r2") != NULL) 726 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2; 727 if (VG_(strstr) (isa, "mips64r6") != NULL) 728 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6; 729 730 /* 731 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks 732 * decide to change incorrect settings in 733 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h. 734 * The current settings show mips32r1, mips32r2 and mips64r1 as 735 * unsupported ISAs by Cavium MIPS CPUs. 736 */ 737 if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) { 738 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 | 739 VEX_MIPS_CPU_ISA_M64R1; 740 } 741 } else { 742 /* 743 * Kernel does not provide information about supported ISAs. 744 * Populate the isa level flags based on the CPU model. That is our 745 * best guess. 746 */ 747 switch VEX_MIPS_COMP_ID(vai.hwcaps) { 748 case VEX_PRID_COMP_CAVIUM: 749 case VEX_PRID_COMP_NETLOGIC: 750 vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1); 751 case VEX_PRID_COMP_INGENIC_E1: 752 case VEX_PRID_COMP_MIPS: 753 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2; 754 case VEX_PRID_COMP_BROADCOM: 755 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1; 756 break; 757 case VEX_PRID_COMP_LEGACY: 758 if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64)) 759 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 | 760 VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1; 761 break; 762 default: 763 break; 764 } 765 } 766 VG_(free)(file_buf); 767 return True; 768 } 769 770 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */ 771 772 #if defined(VGP_arm64_linux) 773 774 /* Check to see whether we are running on a Cavium core, and if so auto-enable 775 the fallback LLSC implementation. See #369459. */ 776 777 static Bool VG_(parse_cpuinfo)(void) 778 { 779 const char *search_Cavium_str = "CPU implementer\t: 0x43"; 780 781 Int n, fh; 782 SysRes fd; 783 SizeT num_bytes, file_buf_size; 784 HChar *file_buf; 785 786 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 787 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 788 if ( sr_isError(fd) ) return False; 789 790 fh = sr_Res(fd); 791 792 /* Determine the size of /proc/cpuinfo. 793 Work around broken-ness in /proc file system implementation. 794 fstat returns a zero size for /proc/cpuinfo although it is 795 claimed to be a regular file. */ 796 num_bytes = 0; 797 file_buf_size = 1000; 798 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 799 while (42) { 800 n = VG_(read)(fh, file_buf, file_buf_size); 801 if (n < 0) break; 802 803 num_bytes += n; 804 if (n < file_buf_size) break; /* reached EOF */ 805 } 806 807 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 808 809 if (num_bytes > file_buf_size) { 810 VG_(free)( file_buf ); 811 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 812 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 813 n = VG_(read)( fh, file_buf, num_bytes ); 814 if (n < 0) num_bytes = 0; 815 } 816 817 file_buf[num_bytes] = '\0'; 818 VG_(close)(fh); 819 820 /* Parse file */ 821 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL) 822 vai.arm64_requires_fallback_LLSC = True; 823 824 VG_(free)(file_buf); 825 return True; 826 } 827 828 #endif /* defined(VGP_arm64_linux) */ 829 830 Bool VG_(machine_get_hwcaps)( void ) 831 { 832 vg_assert(hwcaps_done == False); 833 hwcaps_done = True; 834 835 // Whack default settings into vai, so that we only need to fill in 836 // any interesting bits. 837 LibVEX_default_VexArchInfo(&vai); 838 839 #if defined(VGA_x86) 840 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext; 841 UInt eax, ebx, ecx, edx, max_extended; 842 HChar vstr[13]; 843 vstr[0] = 0; 844 845 if (!VG_(has_cpuid)()) 846 /* we can't do cpuid at all. Give up. */ 847 return False; 848 849 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 850 if (eax < 1) 851 /* we can't ask for cpuid(x) for x > 0. Give up. */ 852 return False; 853 854 /* Get processor ID string, and max basic/extended index 855 values. */ 856 VG_(memcpy)(&vstr[0], &ebx, 4); 857 VG_(memcpy)(&vstr[4], &edx, 4); 858 VG_(memcpy)(&vstr[8], &ecx, 4); 859 vstr[12] = 0; 860 861 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 862 max_extended = eax; 863 864 /* get capabilities bits into edx */ 865 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 866 867 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */ 868 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */ 869 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 870 871 /* cmpxchg8b is a minimum requirement now; if we don't have it we 872 must simply give up. But all CPUs since Pentium-I have it, so 873 that doesn't seem like much of a restriction. */ 874 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 875 if (!have_cx8) 876 return False; 877 878 /* Figure out if this is an AMD that can do MMXEXT. */ 879 have_mmxext = False; 880 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 881 && max_extended >= 0x80000001) { 882 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 883 /* Some older AMD processors support a sse1 subset (Integer SSE). */ 884 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0); 885 } 886 887 /* Figure out if this is an AMD or Intel that can do LZCNT. */ 888 have_lzcnt = False; 889 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD") 890 || 0 == VG_(strcmp)(vstr, "GenuineIntel")) 891 && max_extended >= 0x80000001) { 892 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 893 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 894 } 895 896 /* Intel processors don't define the mmxext extension, but since it 897 is just a sse1 subset always define it when we have sse1. */ 898 if (have_sse1) 899 have_mmxext = True; 900 901 va = VexArchX86; 902 vai.endness = VexEndnessLE; 903 904 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) { 905 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 906 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 907 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 908 vai.hwcaps |= VEX_HWCAPS_X86_SSE3; 909 if (have_lzcnt) 910 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 911 VG_(machine_x86_have_mxcsr) = 1; 912 } else if (have_sse2 && have_sse1 && have_mmxext) { 913 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 914 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 915 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 916 if (have_lzcnt) 917 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 918 VG_(machine_x86_have_mxcsr) = 1; 919 } else if (have_sse1 && have_mmxext) { 920 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 921 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 922 VG_(machine_x86_have_mxcsr) = 1; 923 } else if (have_mmxext) { 924 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/ 925 VG_(machine_x86_have_mxcsr) = 0; 926 } else { 927 vai.hwcaps = 0; /*baseline - no sse at all*/ 928 VG_(machine_x86_have_mxcsr) = 0; 929 } 930 931 VG_(machine_get_cache_info)(&vai); 932 933 return True; 934 } 935 936 #elif defined(VGA_amd64) 937 { Bool have_sse3, have_cx8, have_cx16; 938 Bool have_lzcnt, have_avx, have_bmi, have_avx2; 939 Bool have_rdtscp; 940 UInt eax, ebx, ecx, edx, max_basic, max_extended; 941 ULong xgetbv_0 = 0; 942 HChar vstr[13]; 943 vstr[0] = 0; 944 945 if (!VG_(has_cpuid)()) 946 /* we can't do cpuid at all. Give up. */ 947 return False; 948 949 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 950 max_basic = eax; 951 if (max_basic < 1) 952 /* we can't ask for cpuid(x) for x > 0. Give up. */ 953 return False; 954 955 /* Get processor ID string, and max basic/extended index 956 values. */ 957 VG_(memcpy)(&vstr[0], &ebx, 4); 958 VG_(memcpy)(&vstr[4], &edx, 4); 959 VG_(memcpy)(&vstr[8], &ecx, 4); 960 vstr[12] = 0; 961 962 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 963 max_extended = eax; 964 965 /* get capabilities bits into edx */ 966 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 967 968 // we assume that SSE1 and SSE2 are available by default 969 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 970 // ssse3 is ecx:9 971 // sse41 is ecx:19 972 // sse42 is ecx:20 973 974 // xsave is ecx:26 975 // osxsave is ecx:27 976 // avx is ecx:28 977 // fma is ecx:12 978 have_avx = False; 979 /* have_fma = False; */ 980 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) { 981 /* Processor supports AVX instructions and XGETBV is enabled 982 by OS and AVX instructions are enabled by the OS. */ 983 ULong w; 984 __asm__ __volatile__("movq $0,%%rcx ; " 985 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */ 986 "movq %%rax,%0" 987 :/*OUT*/"=r"(w) :/*IN*/ 988 :/*TRASH*/"rdx","rcx","rax"); 989 xgetbv_0 = w; 990 if ((xgetbv_0 & 7) == 7) { 991 /* Only say we have AVX if the XSAVE-allowable 992 bitfield-mask allows x87, SSE and AVX state. We could 993 actually run with a more restrictive XGETBV(0) value, 994 but VEX's implementation of XSAVE and XRSTOR assumes 995 that all 3 bits are enabled. 996 997 Also, the VEX implementation of XSAVE/XRSTOR assumes that 998 state component [2] (the YMM high halves) are located in 999 the XSAVE image at offsets 576 .. 831. So we have to 1000 check that here before declaring AVX to be supported. */ 1001 UInt eax2, ebx2, ecx2, edx2; 1002 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2); 1003 if (ebx2 == 576 && eax2 == 256) { 1004 have_avx = True; 1005 } 1006 /* have_fma = (ecx & (1<<12)) != 0; */ 1007 /* have_fma: Probably correct, but gcc complains due to 1008 unusedness. */ 1009 } 1010 } 1011 1012 /* cmpxchg8b is a minimum requirement now; if we don't have it we 1013 must simply give up. But all CPUs since Pentium-I have it, so 1014 that doesn't seem like much of a restriction. */ 1015 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 1016 if (!have_cx8) 1017 return False; 1018 1019 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */ 1020 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */ 1021 1022 /* Figure out if this CPU can do LZCNT. */ 1023 have_lzcnt = False; 1024 if (max_extended >= 0x80000001) { 1025 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 1026 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 1027 } 1028 1029 /* Can we do RDTSCP? */ 1030 have_rdtscp = False; 1031 if (max_extended >= 0x80000001) { 1032 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 1033 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */ 1034 } 1035 1036 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */ 1037 have_bmi = False; 1038 have_avx2 = False; 1039 if (have_avx && max_basic >= 7) { 1040 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx); 1041 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */ 1042 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */ 1043 } 1044 1045 va = VexArchAMD64; 1046 vai.endness = VexEndnessLE; 1047 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0) 1048 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0) 1049 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0) 1050 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0) 1051 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0) 1052 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0) 1053 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0); 1054 1055 VG_(machine_get_cache_info)(&vai); 1056 1057 return True; 1058 } 1059 1060 #elif defined(VGA_ppc32) 1061 { 1062 /* Find out which subset of the ppc32 instruction set is supported by 1063 verifying whether various ppc32 instructions generate a SIGILL 1064 or a SIGFPE. An alternative approach is to check the AT_HWCAP and 1065 AT_PLATFORM entries in the ELF auxiliary table -- see also 1066 the_iifii.client_auxv in m_main.c. 1067 */ 1068 vki_sigset_t saved_set, tmp_set; 1069 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1070 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1071 1072 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 1073 volatile Bool have_isa_2_07, have_isa_3_0; 1074 Int r; 1075 1076 /* This is a kludge. Really we ought to back-convert saved_act 1077 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1078 since that's a no-op on all ppc32 platforms so far supported, 1079 it's not worth the typing effort. At least include most basic 1080 sanity check: */ 1081 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1082 1083 VG_(sigemptyset)(&tmp_set); 1084 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1085 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1086 1087 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1088 vg_assert(r == 0); 1089 1090 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1091 vg_assert(r == 0); 1092 tmp_sigill_act = saved_sigill_act; 1093 1094 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1095 vg_assert(r == 0); 1096 tmp_sigfpe_act = saved_sigfpe_act; 1097 1098 /* NODEFER: signal handler does not return (from the kernel's point of 1099 view), hence if it is to successfully catch a signal more than once, 1100 we need the NODEFER flag. */ 1101 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1102 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1103 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1104 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1105 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1106 vg_assert(r == 0); 1107 1108 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1109 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1110 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1111 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1112 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1113 vg_assert(r == 0); 1114 1115 /* standard FP insns */ 1116 have_F = True; 1117 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1118 have_F = False; 1119 } else { 1120 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */ 1121 } 1122 1123 /* Altivec insns */ 1124 have_V = True; 1125 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1126 have_V = False; 1127 } else { 1128 /* Unfortunately some older assemblers don't speak Altivec (or 1129 choose not to), so to be safe we directly emit the 32-bit 1130 word corresponding to "vor 0,0,0". This fixes a build 1131 problem that happens on Debian 3.1 (ppc32), and probably 1132 various other places. */ 1133 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 1134 } 1135 1136 /* General-Purpose optional (fsqrt, fsqrts) */ 1137 have_FX = True; 1138 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1139 have_FX = False; 1140 } else { 1141 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */ 1142 } 1143 1144 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 1145 have_GX = True; 1146 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1147 have_GX = False; 1148 } else { 1149 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */ 1150 } 1151 1152 /* VSX support implies Power ISA 2.06 */ 1153 have_VX = True; 1154 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1155 have_VX = False; 1156 } else { 1157 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1158 } 1159 1160 /* Check for Decimal Floating Point (DFP) support. */ 1161 have_DFP = True; 1162 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1163 have_DFP = False; 1164 } else { 1165 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1166 } 1167 1168 /* Check for ISA 2.07 support. */ 1169 have_isa_2_07 = True; 1170 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1171 have_isa_2_07 = False; 1172 } else { 1173 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */ 1174 } 1175 1176 /* Check for ISA 3.0 support. */ 1177 have_isa_3_0 = True; 1178 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1179 have_isa_3_0 = False; 1180 } else { 1181 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */ 1182 } 1183 1184 /* determine dcbz/dcbzl sizes while we still have the signal 1185 * handlers registered */ 1186 find_ppc_dcbz_sz(&vai); 1187 1188 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1189 vg_assert(r == 0); 1190 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1191 vg_assert(r == 0); 1192 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1193 vg_assert(r == 0); 1194 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n", 1195 (Int)have_F, (Int)have_V, (Int)have_FX, 1196 (Int)have_GX, (Int)have_VX, (Int)have_DFP, 1197 (Int)have_isa_2_07, (Int)have_isa_3_0); 1198 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */ 1199 if (have_V && !have_F) 1200 have_V = False; 1201 if (have_FX && !have_F) 1202 have_FX = False; 1203 if (have_GX && !have_F) 1204 have_GX = False; 1205 1206 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0; 1207 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0; 1208 1209 va = VexArchPPC32; 1210 vai.endness = VexEndnessBE; 1211 1212 vai.hwcaps = 0; 1213 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F; 1214 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V; 1215 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX; 1216 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX; 1217 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX; 1218 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP; 1219 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07; 1220 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0; 1221 1222 VG_(machine_get_cache_info)(&vai); 1223 1224 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be 1225 called before we're ready to go. */ 1226 return True; 1227 } 1228 1229 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le) 1230 { 1231 /* Same instruction set detection algorithm as for ppc32. */ 1232 vki_sigset_t saved_set, tmp_set; 1233 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1234 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1235 1236 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 1237 volatile Bool have_isa_2_07, have_isa_3_0; 1238 Int r; 1239 1240 /* This is a kludge. Really we ought to back-convert saved_act 1241 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1242 since that's a no-op on all ppc64 platforms so far supported, 1243 it's not worth the typing effort. At least include most basic 1244 sanity check: */ 1245 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1246 1247 VG_(sigemptyset)(&tmp_set); 1248 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1249 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1250 1251 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1252 vg_assert(r == 0); 1253 1254 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1255 vg_assert(r == 0); 1256 tmp_sigill_act = saved_sigill_act; 1257 1258 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1259 tmp_sigfpe_act = saved_sigfpe_act; 1260 1261 /* NODEFER: signal handler does not return (from the kernel's point of 1262 view), hence if it is to successfully catch a signal more than once, 1263 we need the NODEFER flag. */ 1264 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1265 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1266 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1267 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1268 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1269 1270 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1271 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1272 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1273 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1274 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1275 1276 /* standard FP insns */ 1277 have_F = True; 1278 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1279 have_F = False; 1280 } else { 1281 __asm__ __volatile__("fmr 0,0"); 1282 } 1283 1284 /* Altivec insns */ 1285 have_V = True; 1286 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1287 have_V = False; 1288 } else { 1289 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 1290 } 1291 1292 /* General-Purpose optional (fsqrt, fsqrts) */ 1293 have_FX = True; 1294 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1295 have_FX = False; 1296 } else { 1297 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/ 1298 } 1299 1300 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 1301 have_GX = True; 1302 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1303 have_GX = False; 1304 } else { 1305 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/ 1306 } 1307 1308 /* VSX support implies Power ISA 2.06 */ 1309 have_VX = True; 1310 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1311 have_VX = False; 1312 } else { 1313 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1314 } 1315 1316 /* Check for Decimal Floating Point (DFP) support. */ 1317 have_DFP = True; 1318 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1319 have_DFP = False; 1320 } else { 1321 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1322 } 1323 1324 /* Check for ISA 2.07 support. */ 1325 have_isa_2_07 = True; 1326 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1327 have_isa_2_07 = False; 1328 } else { 1329 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */ 1330 } 1331 1332 /* Check for ISA 3.0 support. */ 1333 have_isa_3_0 = True; 1334 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1335 have_isa_3_0 = False; 1336 } else { 1337 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */ 1338 } 1339 1340 /* determine dcbz/dcbzl sizes while we still have the signal 1341 * handlers registered */ 1342 find_ppc_dcbz_sz(&vai); 1343 1344 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1345 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1346 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1347 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n", 1348 (Int)have_F, (Int)have_V, (Int)have_FX, 1349 (Int)have_GX, (Int)have_VX, (Int)have_DFP, 1350 (Int)have_isa_2_07, (int)have_isa_3_0); 1351 /* on ppc64be, if we don't even have FP, just give up. */ 1352 if (!have_F) 1353 return False; 1354 1355 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0; 1356 1357 va = VexArchPPC64; 1358 # if defined(VKI_LITTLE_ENDIAN) 1359 vai.endness = VexEndnessLE; 1360 # elif defined(VKI_BIG_ENDIAN) 1361 vai.endness = VexEndnessBE; 1362 # else 1363 vai.endness = VexEndness_INVALID; 1364 # endif 1365 1366 vai.hwcaps = 0; 1367 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V; 1368 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX; 1369 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX; 1370 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX; 1371 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP; 1372 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07; 1373 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0; 1374 1375 VG_(machine_get_cache_info)(&vai); 1376 1377 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be 1378 called before we're ready to go. */ 1379 return True; 1380 } 1381 1382 #elif defined(VGA_s390x) 1383 1384 # include "libvex_s390x_common.h" 1385 1386 { 1387 /* Instruction set detection code borrowed from ppc above. */ 1388 vki_sigset_t saved_set, tmp_set; 1389 vki_sigaction_fromK_t saved_sigill_act; 1390 vki_sigaction_toK_t tmp_sigill_act; 1391 1392 volatile Bool have_LDISP, have_STFLE; 1393 Int i, r, model; 1394 1395 /* If the model is "unknown" don't treat this as an error. Assume 1396 this is a brand-new machine model for which we don't have the 1397 identification yet. Keeping fingers crossed. */ 1398 model = VG_(get_machine_model)(); 1399 1400 /* Unblock SIGILL and stash away the old action for that signal */ 1401 VG_(sigemptyset)(&tmp_set); 1402 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1403 1404 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1405 vg_assert(r == 0); 1406 1407 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1408 vg_assert(r == 0); 1409 tmp_sigill_act = saved_sigill_act; 1410 1411 /* NODEFER: signal handler does not return (from the kernel's point of 1412 view), hence if it is to successfully catch a signal more than once, 1413 we need the NODEFER flag. */ 1414 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1415 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1416 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1417 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1418 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1419 1420 /* Determine hwcaps. Note, we cannot use the stfle insn because it 1421 is not supported on z900. */ 1422 1423 have_LDISP = True; 1424 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1425 have_LDISP = False; 1426 } else { 1427 /* BASR loads the address of the next insn into r1. Needed to avoid 1428 a segfault in XY. */ 1429 __asm__ __volatile__("basr %%r1,%%r0\n\t" 1430 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */ 1431 ".short 0x0057" : : : "r0", "r1", "cc", "memory"); 1432 } 1433 1434 /* Check availability of STFLE. If available store facility bits 1435 in hoststfle. */ 1436 ULong hoststfle[S390_NUM_FACILITY_DW]; 1437 1438 for (i = 0; i < S390_NUM_FACILITY_DW; ++i) 1439 hoststfle[i] = 0; 1440 1441 have_STFLE = True; 1442 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1443 have_STFLE = False; 1444 } else { 1445 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1; 1446 1447 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */ 1448 : "=m" (hoststfle), "+d"(reg0) 1449 : : "cc", "memory"); 1450 } 1451 1452 /* Restore signals */ 1453 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1454 vg_assert(r == 0); 1455 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1456 vg_assert(r == 0); 1457 va = VexArchS390X; 1458 vai.endness = VexEndnessBE; 1459 1460 vai.hwcaps = model; 1461 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE; 1462 if (have_LDISP) { 1463 /* Use long displacement only on machines >= z990. For all other 1464 machines it is millicoded and therefore slow. */ 1465 if (model >= VEX_S390X_MODEL_Z990) 1466 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP; 1467 } 1468 1469 /* Detect presence of certain facilities using the STFLE insn. 1470 Note, that these facilities were introduced at the same time or later 1471 as STFLE, so the absence of STLFE implies the absence of the facility 1472 we're trying to detect. */ 1473 struct fac_hwcaps_map { 1474 UInt installed; 1475 UInt facility_bit; 1476 UInt hwcaps_bit; 1477 const HChar name[6]; // may need adjustment for new facility names 1478 } fac_hwcaps[] = { 1479 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" }, 1480 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" }, 1481 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" }, 1482 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" }, 1483 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" }, 1484 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" }, 1485 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" }, 1486 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" }, 1487 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" }, 1488 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" }, 1489 }; 1490 1491 /* Set hwcaps according to the detected facilities */ 1492 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) { 1493 vg_assert(fac_hwcaps[i].facility_bit <= 63); // for now 1494 if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) { 1495 fac_hwcaps[i].installed = True; 1496 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit; 1497 } 1498 } 1499 1500 /* Build up a string showing the probed-for facilities */ 1501 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) * 1502 (sizeof fac_hwcaps[0].name + 3) + // %s %d 1503 7 + 1 + 4 + 2 // machine %4d 1504 + 1]; // \0 1505 HChar *p = fac_str; 1506 p += VG_(sprintf)(p, "machine %4d ", model); 1507 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) { 1508 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name, 1509 fac_hwcaps[i].installed); 1510 } 1511 *p++ = '\0'; 1512 1513 VG_(debugLog)(1, "machine", "%s\n", fac_str); 1514 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1515 1516 VG_(machine_get_cache_info)(&vai); 1517 1518 return True; 1519 } 1520 1521 #elif defined(VGA_arm) 1522 { 1523 /* Same instruction set detection algorithm as for ppc32. */ 1524 vki_sigset_t saved_set, tmp_set; 1525 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1526 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1527 1528 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8; 1529 volatile Int archlevel; 1530 Int r; 1531 1532 /* This is a kludge. Really we ought to back-convert saved_act 1533 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1534 since that's a no-op on all ppc64 platforms so far supported, 1535 it's not worth the typing effort. At least include most basic 1536 sanity check: */ 1537 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1538 1539 VG_(sigemptyset)(&tmp_set); 1540 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1541 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1542 1543 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1544 vg_assert(r == 0); 1545 1546 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1547 vg_assert(r == 0); 1548 tmp_sigill_act = saved_sigill_act; 1549 1550 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1551 tmp_sigfpe_act = saved_sigfpe_act; 1552 1553 /* NODEFER: signal handler does not return (from the kernel's point of 1554 view), hence if it is to successfully catch a signal more than once, 1555 we need the NODEFER flag. */ 1556 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1557 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1558 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1559 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1560 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1561 1562 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1563 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1564 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1565 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1566 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1567 1568 /* VFP insns */ 1569 have_VFP = True; 1570 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1571 have_VFP = False; 1572 } else { 1573 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */ 1574 } 1575 /* There are several generation of VFP extension but they differs very 1576 little so for now we will not distinguish them. */ 1577 have_VFP2 = have_VFP; 1578 have_VFP3 = have_VFP; 1579 1580 /* NEON insns */ 1581 have_NEON = True; 1582 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1583 have_NEON = False; 1584 } else { 1585 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */ 1586 } 1587 1588 /* ARM architecture level */ 1589 archlevel = 5; /* v5 will be base level */ 1590 if (archlevel < 7) { 1591 archlevel = 7; 1592 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1593 archlevel = 5; 1594 } else { 1595 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */ 1596 } 1597 } 1598 if (archlevel < 6) { 1599 archlevel = 6; 1600 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1601 archlevel = 5; 1602 } else { 1603 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */ 1604 } 1605 } 1606 1607 /* ARMv8 insns */ 1608 have_V8 = True; 1609 if (archlevel == 7) { 1610 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1611 have_V8 = False; 1612 } else { 1613 __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */ 1614 } 1615 if (have_V8 && have_NEON && have_VFP3) { 1616 archlevel = 8; 1617 } 1618 } 1619 1620 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1621 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act); 1622 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1623 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1624 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1625 1626 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n", 1627 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3, 1628 (Int)have_NEON); 1629 1630 VG_(machine_arm_archlevel) = archlevel; 1631 1632 va = VexArchARM; 1633 vai.endness = VexEndnessLE; 1634 1635 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel); 1636 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3; 1637 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2; 1638 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP; 1639 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1640 1641 VG_(machine_get_cache_info)(&vai); 1642 1643 return True; 1644 } 1645 1646 #elif defined(VGA_arm64) 1647 { 1648 va = VexArchARM64; 1649 vai.endness = VexEndnessLE; 1650 1651 /* So far there are no variants. */ 1652 vai.hwcaps = 0; 1653 1654 VG_(machine_get_cache_info)(&vai); 1655 1656 /* Check whether we need to use the fallback LLSC implementation. 1657 If the check fails, give up. */ 1658 if (! VG_(parse_cpuinfo)()) 1659 return False; 1660 1661 /* 0 denotes 'not set'. The range of legitimate values here, 1662 after being set that is, is 2 though 17 inclusive. */ 1663 vg_assert(vai.arm64_dMinLine_lg2_szB == 0); 1664 vg_assert(vai.arm64_iMinLine_lg2_szB == 0); 1665 ULong ctr_el0; 1666 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0)); 1667 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2; 1668 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2; 1669 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, " 1670 "ctr_el0.iMinLine_szB = %d\n", 1671 1 << vai.arm64_dMinLine_lg2_szB, 1672 1 << vai.arm64_iMinLine_lg2_szB); 1673 VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n", 1674 vai.arm64_requires_fallback_LLSC ? "yes" : "no"); 1675 1676 return True; 1677 } 1678 1679 #elif defined(VGA_mips32) 1680 { 1681 /* Define the position of F64 bit in FIR register. */ 1682 # define FP64 22 1683 va = VexArchMIPS32; 1684 if (!VG_(parse_cpuinfo)()) 1685 return False; 1686 1687 # if defined(VKI_LITTLE_ENDIAN) 1688 vai.endness = VexEndnessLE; 1689 # elif defined(VKI_BIG_ENDIAN) 1690 vai.endness = VexEndnessBE; 1691 # else 1692 vai.endness = VexEndness_INVALID; 1693 # endif 1694 1695 /* Same instruction set detection algorithm as for ppc32/arm... */ 1696 vki_sigset_t saved_set, tmp_set; 1697 vki_sigaction_fromK_t saved_sigill_act; 1698 vki_sigaction_toK_t tmp_sigill_act; 1699 1700 volatile Bool have_DSP, have_DSPr2; 1701 Int r; 1702 1703 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1704 1705 VG_(sigemptyset)(&tmp_set); 1706 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1707 1708 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1709 vg_assert(r == 0); 1710 1711 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1712 vg_assert(r == 0); 1713 tmp_sigill_act = saved_sigill_act; 1714 1715 /* NODEFER: signal handler does not return (from the kernel's point of 1716 view), hence if it is to successfully catch a signal more than once, 1717 we need the NODEFER flag. */ 1718 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1719 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1720 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1721 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1722 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1723 1724 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) { 1725 /* DSPr2 instructions. */ 1726 have_DSPr2 = True; 1727 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1728 have_DSPr2 = False; 1729 } else { 1730 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */ 1731 } 1732 if (have_DSPr2) { 1733 /* We assume it's 74K, since it can run DSPr2. */ 1734 vai.hwcaps |= VEX_PRID_IMP_74K; 1735 } else { 1736 /* DSP instructions. */ 1737 have_DSP = True; 1738 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1739 have_DSP = False; 1740 } else { 1741 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */ 1742 } 1743 if (have_DSP) { 1744 /* We assume it's 34K, since it has support for DSP. */ 1745 vai.hwcaps |= VEX_PRID_IMP_34K; 1746 } 1747 } 1748 } 1749 1750 # if defined(VGP_mips32_linux) 1751 Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0); 1752 # else 1753 Int fpmode = -1; 1754 # endif 1755 1756 if (fpmode < 0) { 1757 /* prctl(PR_GET_FP_MODE) is not supported by Kernel, 1758 we are using alternative way to determine FP mode */ 1759 ULong result = 0; 1760 1761 if (!VG_MINIMAL_SETJMP(env_unsup_insn)) { 1762 __asm__ volatile ( 1763 ".set push\n\t" 1764 ".set noreorder\n\t" 1765 ".set oddspreg\n\t" 1766 ".set hardfloat\n\t" 1767 "lui $t0, 0x3FF0\n\t" 1768 "ldc1 $f0, %0\n\t" 1769 "mtc1 $t0, $f1\n\t" 1770 "sdc1 $f0, %0\n\t" 1771 ".set pop\n\t" 1772 : "+m"(result) 1773 : 1774 : "t0", "$f0", "$f1", "memory"); 1775 1776 fpmode = (result != 0x3FF0000000000000ull); 1777 } 1778 } 1779 1780 if (fpmode != 0) 1781 vai.hwcaps |= VEX_MIPS_HOST_FR; 1782 1783 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1784 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1785 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1786 1787 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1788 VG_(machine_get_cache_info)(&vai); 1789 1790 return True; 1791 } 1792 1793 #elif defined(VGA_mips64) 1794 { 1795 va = VexArchMIPS64; 1796 if (!VG_(parse_cpuinfo)()) 1797 return False; 1798 1799 # if defined(VKI_LITTLE_ENDIAN) 1800 vai.endness = VexEndnessLE; 1801 # elif defined(VKI_BIG_ENDIAN) 1802 vai.endness = VexEndnessBE; 1803 # else 1804 vai.endness = VexEndness_INVALID; 1805 # endif 1806 1807 vai.hwcaps |= VEX_MIPS_HOST_FR; 1808 1809 VG_(machine_get_cache_info)(&vai); 1810 1811 return True; 1812 } 1813 1814 #else 1815 # error "Unknown arch" 1816 #endif 1817 } 1818 1819 /* Notify host cpu instruction cache line size. */ 1820 #if defined(VGA_ppc32) 1821 void VG_(machine_ppc32_set_clszB)( Int szB ) 1822 { 1823 vg_assert(hwcaps_done); 1824 1825 /* Either the value must not have been set yet (zero) or we can 1826 tolerate it being set to the same value multiple times, as the 1827 stack scanning logic in m_main is a bit stupid. */ 1828 vg_assert(vai.ppc_icache_line_szB == 0 1829 || vai.ppc_icache_line_szB == szB); 1830 1831 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128); 1832 vai.ppc_icache_line_szB = szB; 1833 } 1834 #endif 1835 1836 1837 /* Notify host cpu instruction cache line size. */ 1838 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le) 1839 void VG_(machine_ppc64_set_clszB)( Int szB ) 1840 { 1841 vg_assert(hwcaps_done); 1842 1843 /* Either the value must not have been set yet (zero) or we can 1844 tolerate it being set to the same value multiple times, as the 1845 stack scanning logic in m_main is a bit stupid. */ 1846 vg_assert(vai.ppc_icache_line_szB == 0 1847 || vai.ppc_icache_line_szB == szB); 1848 1849 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128); 1850 vai.ppc_icache_line_szB = szB; 1851 } 1852 #endif 1853 1854 1855 /* Notify host's ability to handle NEON instructions. */ 1856 #if defined(VGA_arm) 1857 void VG_(machine_arm_set_has_NEON)( Bool has_neon ) 1858 { 1859 vg_assert(hwcaps_done); 1860 /* There's nothing else we can sanity check. */ 1861 1862 if (has_neon) { 1863 vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1864 } else { 1865 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON; 1866 } 1867 } 1868 #endif 1869 1870 1871 /* Fetch host cpu info, once established. */ 1872 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa, 1873 /*OUT*/VexArchInfo* pVai ) 1874 { 1875 vg_assert(hwcaps_done); 1876 if (pVa) *pVa = va; 1877 if (pVai) *pVai = vai; 1878 } 1879 1880 1881 /* Returns the size of the largest guest register that we will 1882 simulate in this run. This depends on both the guest architecture 1883 and on the specific capabilities we are simulating for that guest 1884 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16 1885 or 32. General rule: if in doubt, return a value larger than 1886 reality. 1887 1888 This information is needed by Cachegrind and Callgrind to decide 1889 what the minimum cache line size they are prepared to simulate is. 1890 Basically require that the minimum cache line size is at least as 1891 large as the largest register that might get transferred to/from 1892 memory, so as to guarantee that any such transaction can straddle 1893 at most 2 cache lines. 1894 */ 1895 Int VG_(machine_get_size_of_largest_guest_register) ( void ) 1896 { 1897 vg_assert(hwcaps_done); 1898 /* Once hwcaps_done is True, we can fish around inside va/vai to 1899 find the information we need. */ 1900 1901 # if defined(VGA_x86) 1902 vg_assert(va == VexArchX86); 1903 /* We don't support AVX, so 32 is out. At the other end, even if 1904 we don't support any SSE, the X87 can generate 10 byte 1905 transfers, so let's say 16 to be on the safe side. Hence the 1906 answer is always 16. */ 1907 return 16; 1908 1909 # elif defined(VGA_amd64) 1910 /* if AVX then 32 else 16 */ 1911 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16; 1912 1913 # elif defined(VGA_ppc32) 1914 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1915 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16; 1916 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16; 1917 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16; 1918 return 8; 1919 1920 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le) 1921 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1922 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16; 1923 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16; 1924 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16; 1925 return 8; 1926 1927 # elif defined(VGA_s390x) 1928 return 8; 1929 1930 # elif defined(VGA_arm) 1931 /* Really it depends whether or not we have NEON, but let's just 1932 assume we always do. */ 1933 return 16; 1934 1935 # elif defined(VGA_arm64) 1936 /* ARM64 always has Neon, AFAICS. */ 1937 return 16; 1938 1939 # elif defined(VGA_mips32) 1940 /* The guest state implies 4, but that can't really be true, can 1941 it? */ 1942 return 8; 1943 1944 # elif defined(VGA_mips64) 1945 return 8; 1946 1947 # else 1948 # error "Unknown arch" 1949 # endif 1950 } 1951 1952 1953 // Given a pointer to a function as obtained by "& functionname" in C, 1954 // produce a pointer to the actual entry point for the function. 1955 void* VG_(fnptr_to_fnentry)( void* f ) 1956 { 1957 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \ 1958 || defined(VGP_arm_linux) || defined(VGO_darwin) \ 1959 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \ 1960 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \ 1961 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \ 1962 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) 1963 return f; 1964 # elif defined(VGP_ppc64be_linux) 1965 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a 1966 3-word function descriptor, of which the first word is the entry 1967 address. */ 1968 UWord* descr = (UWord*)f; 1969 return (void*)(descr[0]); 1970 # else 1971 # error "Unknown platform" 1972 # endif 1973 } 1974 1975 /*--------------------------------------------------------------------*/ 1976 /*--- end ---*/ 1977 /*--------------------------------------------------------------------*/ 1978