1 /*--------------------------------------------------------------------*/ 2 /*--- Machine-related stuff. m_machine.c ---*/ 3 /*--------------------------------------------------------------------*/ 4 5 /* 6 This file is part of Valgrind, a dynamic binary instrumentation 7 framework. 8 9 Copyright (C) 2000-2012 Julian Seward 10 jseward (at) acm.org 11 12 This program is free software; you can redistribute it and/or 13 modify it under the terms of the GNU General Public License as 14 published by the Free Software Foundation; either version 2 of the 15 License, or (at your option) any later version. 16 17 This program is distributed in the hope that it will be useful, but 18 WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 General Public License for more details. 21 22 You should have received a copy of the GNU General Public License 23 along with this program; if not, write to the Free Software 24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 25 02111-1307, USA. 26 27 The GNU General Public License is contained in the file COPYING. 28 */ 29 30 #include "pub_core_basics.h" 31 #include "pub_core_vki.h" 32 #include "pub_core_libcsetjmp.h" // setjmp facilities 33 #include "pub_core_threadstate.h" 34 #include "pub_core_libcassert.h" 35 #include "pub_core_libcbase.h" 36 #include "pub_core_libcfile.h" 37 #include "pub_core_mallocfree.h" 38 #include "pub_core_machine.h" 39 #include "pub_core_cpuid.h" 40 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE 41 #include "pub_core_debuglog.h" 42 43 44 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR) 45 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR) 46 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR) 47 48 Addr VG_(get_IP) ( ThreadId tid ) { 49 return INSTR_PTR( VG_(threads)[tid].arch ); 50 } 51 Addr VG_(get_SP) ( ThreadId tid ) { 52 return STACK_PTR( VG_(threads)[tid].arch ); 53 } 54 Addr VG_(get_FP) ( ThreadId tid ) { 55 return FRAME_PTR( VG_(threads)[tid].arch ); 56 } 57 58 void VG_(set_IP) ( ThreadId tid, Addr ip ) { 59 INSTR_PTR( VG_(threads)[tid].arch ) = ip; 60 } 61 void VG_(set_SP) ( ThreadId tid, Addr sp ) { 62 STACK_PTR( VG_(threads)[tid].arch ) = sp; 63 } 64 65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs, 66 ThreadId tid ) 67 { 68 # if defined(VGA_x86) 69 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP; 70 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP; 71 regs->misc.X86.r_ebp 72 = VG_(threads)[tid].arch.vex.guest_EBP; 73 # elif defined(VGA_amd64) 74 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP; 75 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP; 76 regs->misc.AMD64.r_rbp 77 = VG_(threads)[tid].arch.vex.guest_RBP; 78 # elif defined(VGA_ppc32) 79 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA; 80 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1; 81 regs->misc.PPC32.r_lr 82 = VG_(threads)[tid].arch.vex.guest_LR; 83 # elif defined(VGA_ppc64) 84 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA; 85 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1; 86 regs->misc.PPC64.r_lr 87 = VG_(threads)[tid].arch.vex.guest_LR; 88 # elif defined(VGA_arm) 89 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T; 90 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13; 91 regs->misc.ARM.r14 92 = VG_(threads)[tid].arch.vex.guest_R14; 93 regs->misc.ARM.r12 94 = VG_(threads)[tid].arch.vex.guest_R12; 95 regs->misc.ARM.r11 96 = VG_(threads)[tid].arch.vex.guest_R11; 97 regs->misc.ARM.r7 98 = VG_(threads)[tid].arch.vex.guest_R7; 99 # elif defined(VGA_s390x) 100 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA; 101 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP; 102 regs->misc.S390X.r_fp 103 = VG_(threads)[tid].arch.vex.guest_r11; 104 regs->misc.S390X.r_lr 105 = VG_(threads)[tid].arch.vex.guest_r14; 106 # elif defined(VGA_mips32) 107 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 108 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 109 regs->misc.MIPS32.r30 110 = VG_(threads)[tid].arch.vex.guest_r30; 111 regs->misc.MIPS32.r31 112 = VG_(threads)[tid].arch.vex.guest_r31; 113 regs->misc.MIPS32.r28 114 = VG_(threads)[tid].arch.vex.guest_r28; 115 # else 116 # error "Unknown arch" 117 # endif 118 } 119 120 121 void VG_(set_syscall_return_shadows) ( ThreadId tid, 122 /* shadow vals for the result */ 123 UWord s1res, UWord s2res, 124 /* shadow vals for the error val */ 125 UWord s1err, UWord s2err ) 126 { 127 # if defined(VGP_x86_linux) 128 VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res; 129 VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res; 130 # elif defined(VGP_amd64_linux) 131 VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res; 132 VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res; 133 # elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) 134 VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res; 135 VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res; 136 # elif defined(VGP_arm_linux) 137 VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res; 138 VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res; 139 # elif defined(VGO_darwin) 140 // GrP fixme darwin syscalls may return more values (2 registers plus error) 141 # elif defined(VGP_s390x_linux) 142 VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res; 143 VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res; 144 # elif defined(VGP_mips32_linux) 145 VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res; 146 VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res; 147 # else 148 # error "Unknown plat" 149 # endif 150 } 151 152 void 153 VG_(get_shadow_regs_area) ( ThreadId tid, 154 /*DST*/UChar* dst, 155 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size ) 156 { 157 void* src; 158 ThreadState* tst; 159 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 160 vg_assert(VG_(is_valid_tid)(tid)); 161 // Bounds check 162 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 163 vg_assert(offset + size <= sizeof(VexGuestArchState)); 164 // Copy 165 tst = & VG_(threads)[tid]; 166 src = NULL; 167 switch (shadowNo) { 168 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 169 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 170 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 171 } 172 tl_assert(src != NULL); 173 VG_(memcpy)( dst, src, size); 174 } 175 176 void 177 VG_(set_shadow_regs_area) ( ThreadId tid, 178 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size, 179 /*SRC*/const UChar* src ) 180 { 181 void* dst; 182 ThreadState* tst; 183 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 184 vg_assert(VG_(is_valid_tid)(tid)); 185 // Bounds check 186 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 187 vg_assert(offset + size <= sizeof(VexGuestArchState)); 188 // Copy 189 tst = & VG_(threads)[tid]; 190 dst = NULL; 191 switch (shadowNo) { 192 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 193 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 194 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 195 } 196 tl_assert(dst != NULL); 197 VG_(memcpy)( dst, src, size); 198 } 199 200 201 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, HChar*, Addr)) 202 { 203 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex); 204 #if defined(VGA_x86) 205 (*f)(tid, "EAX", vex->guest_EAX); 206 (*f)(tid, "ECX", vex->guest_ECX); 207 (*f)(tid, "EDX", vex->guest_EDX); 208 (*f)(tid, "EBX", vex->guest_EBX); 209 (*f)(tid, "ESI", vex->guest_ESI); 210 (*f)(tid, "EDI", vex->guest_EDI); 211 (*f)(tid, "ESP", vex->guest_ESP); 212 (*f)(tid, "EBP", vex->guest_EBP); 213 #elif defined(VGA_amd64) 214 (*f)(tid, "RAX", vex->guest_RAX); 215 (*f)(tid, "RCX", vex->guest_RCX); 216 (*f)(tid, "RDX", vex->guest_RDX); 217 (*f)(tid, "RBX", vex->guest_RBX); 218 (*f)(tid, "RSI", vex->guest_RSI); 219 (*f)(tid, "RDI", vex->guest_RDI); 220 (*f)(tid, "RSP", vex->guest_RSP); 221 (*f)(tid, "RBP", vex->guest_RBP); 222 (*f)(tid, "R8" , vex->guest_R8 ); 223 (*f)(tid, "R9" , vex->guest_R9 ); 224 (*f)(tid, "R10", vex->guest_R10); 225 (*f)(tid, "R11", vex->guest_R11); 226 (*f)(tid, "R12", vex->guest_R12); 227 (*f)(tid, "R13", vex->guest_R13); 228 (*f)(tid, "R14", vex->guest_R14); 229 (*f)(tid, "R15", vex->guest_R15); 230 #elif defined(VGA_ppc32) || defined(VGA_ppc64) 231 (*f)(tid, "GPR0" , vex->guest_GPR0 ); 232 (*f)(tid, "GPR1" , vex->guest_GPR1 ); 233 (*f)(tid, "GPR2" , vex->guest_GPR2 ); 234 (*f)(tid, "GPR3" , vex->guest_GPR3 ); 235 (*f)(tid, "GPR4" , vex->guest_GPR4 ); 236 (*f)(tid, "GPR5" , vex->guest_GPR5 ); 237 (*f)(tid, "GPR6" , vex->guest_GPR6 ); 238 (*f)(tid, "GPR7" , vex->guest_GPR7 ); 239 (*f)(tid, "GPR8" , vex->guest_GPR8 ); 240 (*f)(tid, "GPR9" , vex->guest_GPR9 ); 241 (*f)(tid, "GPR10", vex->guest_GPR10); 242 (*f)(tid, "GPR11", vex->guest_GPR11); 243 (*f)(tid, "GPR12", vex->guest_GPR12); 244 (*f)(tid, "GPR13", vex->guest_GPR13); 245 (*f)(tid, "GPR14", vex->guest_GPR14); 246 (*f)(tid, "GPR15", vex->guest_GPR15); 247 (*f)(tid, "GPR16", vex->guest_GPR16); 248 (*f)(tid, "GPR17", vex->guest_GPR17); 249 (*f)(tid, "GPR18", vex->guest_GPR18); 250 (*f)(tid, "GPR19", vex->guest_GPR19); 251 (*f)(tid, "GPR20", vex->guest_GPR20); 252 (*f)(tid, "GPR21", vex->guest_GPR21); 253 (*f)(tid, "GPR22", vex->guest_GPR22); 254 (*f)(tid, "GPR23", vex->guest_GPR23); 255 (*f)(tid, "GPR24", vex->guest_GPR24); 256 (*f)(tid, "GPR25", vex->guest_GPR25); 257 (*f)(tid, "GPR26", vex->guest_GPR26); 258 (*f)(tid, "GPR27", vex->guest_GPR27); 259 (*f)(tid, "GPR28", vex->guest_GPR28); 260 (*f)(tid, "GPR29", vex->guest_GPR29); 261 (*f)(tid, "GPR30", vex->guest_GPR30); 262 (*f)(tid, "GPR31", vex->guest_GPR31); 263 (*f)(tid, "CTR" , vex->guest_CTR ); 264 (*f)(tid, "LR" , vex->guest_LR ); 265 #elif defined(VGA_arm) 266 (*f)(tid, "R0" , vex->guest_R0 ); 267 (*f)(tid, "R1" , vex->guest_R1 ); 268 (*f)(tid, "R2" , vex->guest_R2 ); 269 (*f)(tid, "R3" , vex->guest_R3 ); 270 (*f)(tid, "R4" , vex->guest_R4 ); 271 (*f)(tid, "R5" , vex->guest_R5 ); 272 (*f)(tid, "R6" , vex->guest_R6 ); 273 (*f)(tid, "R8" , vex->guest_R8 ); 274 (*f)(tid, "R9" , vex->guest_R9 ); 275 (*f)(tid, "R10", vex->guest_R10); 276 (*f)(tid, "R11", vex->guest_R11); 277 (*f)(tid, "R12", vex->guest_R12); 278 (*f)(tid, "R13", vex->guest_R13); 279 (*f)(tid, "R14", vex->guest_R14); 280 #elif defined(VGA_s390x) 281 (*f)(tid, "r0" , vex->guest_r0 ); 282 (*f)(tid, "r1" , vex->guest_r1 ); 283 (*f)(tid, "r2" , vex->guest_r2 ); 284 (*f)(tid, "r3" , vex->guest_r3 ); 285 (*f)(tid, "r4" , vex->guest_r4 ); 286 (*f)(tid, "r5" , vex->guest_r5 ); 287 (*f)(tid, "r6" , vex->guest_r6 ); 288 (*f)(tid, "r7" , vex->guest_r7 ); 289 (*f)(tid, "r8" , vex->guest_r8 ); 290 (*f)(tid, "r9" , vex->guest_r9 ); 291 (*f)(tid, "r10", vex->guest_r10); 292 (*f)(tid, "r11", vex->guest_r11); 293 (*f)(tid, "r12", vex->guest_r12); 294 (*f)(tid, "r13", vex->guest_r13); 295 (*f)(tid, "r14", vex->guest_r14); 296 (*f)(tid, "r15", vex->guest_r15); 297 #elif defined(VGA_mips32) 298 (*f)(tid, "r0" , vex->guest_r0 ); 299 (*f)(tid, "r1" , vex->guest_r1 ); 300 (*f)(tid, "r2" , vex->guest_r2 ); 301 (*f)(tid, "r3" , vex->guest_r3 ); 302 (*f)(tid, "r4" , vex->guest_r4 ); 303 (*f)(tid, "r5" , vex->guest_r5 ); 304 (*f)(tid, "r6" , vex->guest_r6 ); 305 (*f)(tid, "r7" , vex->guest_r7 ); 306 (*f)(tid, "r8" , vex->guest_r8 ); 307 (*f)(tid, "r9" , vex->guest_r9 ); 308 (*f)(tid, "r10", vex->guest_r10); 309 (*f)(tid, "r11", vex->guest_r11); 310 (*f)(tid, "r12", vex->guest_r12); 311 (*f)(tid, "r13", vex->guest_r13); 312 (*f)(tid, "r14", vex->guest_r14); 313 (*f)(tid, "r15", vex->guest_r15); 314 (*f)(tid, "r16", vex->guest_r16); 315 (*f)(tid, "r17", vex->guest_r17); 316 (*f)(tid, "r18", vex->guest_r18); 317 (*f)(tid, "r19", vex->guest_r19); 318 (*f)(tid, "r20", vex->guest_r20); 319 (*f)(tid, "r21", vex->guest_r21); 320 (*f)(tid, "r22", vex->guest_r22); 321 (*f)(tid, "r23", vex->guest_r23); 322 (*f)(tid, "r24", vex->guest_r24); 323 (*f)(tid, "r25", vex->guest_r25); 324 (*f)(tid, "r26", vex->guest_r26); 325 (*f)(tid, "r27", vex->guest_r27); 326 (*f)(tid, "r28", vex->guest_r28); 327 (*f)(tid, "r29", vex->guest_r29); 328 (*f)(tid, "r30", vex->guest_r30); 329 (*f)(tid, "r31", vex->guest_r31); 330 #else 331 # error Unknown arch 332 #endif 333 } 334 335 336 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, HChar*, UWord)) 337 { 338 ThreadId tid; 339 340 for (tid = 1; tid < VG_N_THREADS; tid++) { 341 if (VG_(is_valid_tid)(tid)) { 342 apply_to_GPs_of_tid(tid, f); 343 } 344 } 345 } 346 347 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid) 348 { 349 *tid = (ThreadId)(-1); 350 } 351 352 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid, 353 /*OUT*/Addr* stack_min, 354 /*OUT*/Addr* stack_max) 355 { 356 ThreadId i; 357 for (i = (*tid)+1; i < VG_N_THREADS; i++) { 358 if (i == VG_INVALID_THREADID) 359 continue; 360 if (VG_(threads)[i].status != VgTs_Empty) { 361 *tid = i; 362 *stack_min = VG_(get_SP)(i); 363 *stack_max = VG_(threads)[i].client_stack_highest_word; 364 return True; 365 } 366 } 367 return False; 368 } 369 370 Addr VG_(thread_get_stack_max)(ThreadId tid) 371 { 372 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 373 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 374 return VG_(threads)[tid].client_stack_highest_word; 375 } 376 377 SizeT VG_(thread_get_stack_size)(ThreadId tid) 378 { 379 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 380 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 381 return VG_(threads)[tid].client_stack_szB; 382 } 383 384 Addr VG_(thread_get_altstack_min)(ThreadId tid) 385 { 386 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 387 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 388 return (Addr)VG_(threads)[tid].altstack.ss_sp; 389 } 390 391 SizeT VG_(thread_get_altstack_size)(ThreadId tid) 392 { 393 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 394 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 395 return VG_(threads)[tid].altstack.ss_size; 396 } 397 398 //------------------------------------------------------------- 399 /* Details about the capabilities of the underlying (host) CPU. These 400 details are acquired by (1) enquiring with the CPU at startup, or 401 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache 402 line size). It's a bit nasty in the sense that there's no obvious 403 way to stop uses of some of this info before it's ready to go. 404 See pub_core_machine.h for more information about that. 405 406 VG_(machine_get_hwcaps) may use signals (although it attempts to 407 leave signal state unchanged) and therefore should only be 408 called before m_main sets up the client's signal state. 409 */ 410 411 /* --------- State --------- */ 412 static Bool hwcaps_done = False; 413 414 /* --- all archs --- */ 415 static VexArch va = VexArch_INVALID; 416 static VexArchInfo vai; 417 418 #if defined(VGA_x86) 419 UInt VG_(machine_x86_have_mxcsr) = 0; 420 #endif 421 #if defined(VGA_ppc32) 422 UInt VG_(machine_ppc32_has_FP) = 0; 423 UInt VG_(machine_ppc32_has_VMX) = 0; 424 #endif 425 #if defined(VGA_ppc64) 426 ULong VG_(machine_ppc64_has_VMX) = 0; 427 #endif 428 #if defined(VGA_arm) 429 Int VG_(machine_arm_archlevel) = 4; 430 #endif 431 432 /* fixs390: anything for s390x here ? */ 433 434 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL 435 testing, so we need a VG_MINIMAL_JMP_BUF. */ 436 #if defined(VGA_ppc32) || defined(VGA_ppc64) \ 437 || defined(VGA_arm) || defined(VGA_s390x) 438 #include "pub_tool_libcsetjmp.h" 439 static VG_MINIMAL_JMP_BUF(env_unsup_insn); 440 static void handler_unsup_insn ( Int x ) { 441 VG_MINIMAL_LONGJMP(env_unsup_insn); 442 } 443 #endif 444 445 446 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc 447 * handlers are installed. Determines the the sizes affected by dcbz 448 * and dcbzl instructions and updates the given VexArchInfo structure 449 * accordingly. 450 * 451 * Not very defensive: assumes that as long as the dcbz/dcbzl 452 * instructions don't raise a SIGILL, that they will zero an aligned, 453 * contiguous block of memory of a sensible size. */ 454 #if defined(VGA_ppc32) || defined(VGA_ppc64) 455 static void find_ppc_dcbz_sz(VexArchInfo *arch_info) 456 { 457 Int dcbz_szB = 0; 458 Int dcbzl_szB; 459 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */ 460 char test_block[4*MAX_DCBZL_SZB]; 461 char *aligned = test_block; 462 Int i; 463 464 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */ 465 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1)); 466 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]); 467 468 /* dcbz often clears 32B, although sometimes whatever the native cache 469 * block size is */ 470 VG_(memset)(test_block, 0xff, sizeof(test_block)); 471 __asm__ __volatile__("dcbz 0,%0" 472 : /*out*/ 473 : "r" (aligned) /*in*/ 474 : "memory" /*clobber*/); 475 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) { 476 if (!test_block[i]) 477 ++dcbz_szB; 478 } 479 vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128); 480 481 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */ 482 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 483 dcbzl_szB = 0; /* indicates unsupported */ 484 } 485 else { 486 VG_(memset)(test_block, 0xff, sizeof(test_block)); 487 /* some older assemblers won't understand the dcbzl instruction 488 * variant, so we directly emit the instruction ourselves */ 489 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/ 490 : /*out*/ 491 : "r" (aligned) /*in*/ 492 : "memory", "r9" /*clobber*/); 493 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) { 494 if (!test_block[i]) 495 ++dcbzl_szB; 496 } 497 vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128); 498 } 499 500 arch_info->ppc_dcbz_szB = dcbz_szB; 501 arch_info->ppc_dcbzl_szB = dcbzl_szB; 502 503 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n", 504 dcbz_szB, dcbzl_szB); 505 # undef MAX_DCBZL_SZB 506 } 507 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */ 508 509 #ifdef VGA_s390x 510 511 /* Read /proc/cpuinfo. Look for lines like these 512 513 processor 0: version = FF, identification = 0117C9, machine = 2064 514 515 and return the machine model. If the machine model could not be determined 516 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */ 517 518 static UInt VG_(get_machine_model)(void) 519 { 520 static struct model_map { 521 HChar name[5]; 522 UInt id; 523 } model_map[] = { 524 { "2064", VEX_S390X_MODEL_Z900 }, 525 { "2066", VEX_S390X_MODEL_Z800 }, 526 { "2084", VEX_S390X_MODEL_Z990 }, 527 { "2086", VEX_S390X_MODEL_Z890 }, 528 { "2094", VEX_S390X_MODEL_Z9_EC }, 529 { "2096", VEX_S390X_MODEL_Z9_BC }, 530 { "2097", VEX_S390X_MODEL_Z10_EC }, 531 { "2098", VEX_S390X_MODEL_Z10_BC }, 532 { "2817", VEX_S390X_MODEL_Z196 }, 533 { "2818", VEX_S390X_MODEL_Z114 }, 534 }; 535 536 Int model, n, fh; 537 SysRes fd; 538 SizeT num_bytes, file_buf_size; 539 HChar *p, *m, *model_name, *file_buf; 540 541 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 542 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 543 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN; 544 545 fh = sr_Res(fd); 546 547 /* Determine the size of /proc/cpuinfo. 548 Work around broken-ness in /proc file system implementation. 549 fstat returns a zero size for /proc/cpuinfo although it is 550 claimed to be a regular file. */ 551 num_bytes = 0; 552 file_buf_size = 1000; 553 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 554 while (42) { 555 n = VG_(read)(fh, file_buf, file_buf_size); 556 if (n < 0) break; 557 558 num_bytes += n; 559 if (n < file_buf_size) break; /* reached EOF */ 560 } 561 562 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 563 564 if (num_bytes > file_buf_size) { 565 VG_(free)( file_buf ); 566 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 567 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 568 n = VG_(read)( fh, file_buf, num_bytes ); 569 if (n < 0) num_bytes = 0; 570 } 571 572 file_buf[num_bytes] = '\0'; 573 VG_(close)(fh); 574 575 /* Parse file */ 576 model = VEX_S390X_MODEL_UNKNOWN; 577 for (p = file_buf; *p; ++p) { 578 /* Beginning of line */ 579 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue; 580 581 m = VG_(strstr)( p, "machine" ); 582 if (m == NULL) continue; 583 584 p = m + sizeof "machine" - 1; 585 while ( VG_(isspace)( *p ) || *p == '=') { 586 if (*p == '\n') goto next_line; 587 ++p; 588 } 589 590 model_name = p; 591 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) { 592 struct model_map *mm = model_map + n; 593 SizeT len = VG_(strlen)( mm->name ); 594 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 && 595 VG_(isspace)( model_name[len] )) { 596 if (mm->id < model) model = mm->id; 597 p = model_name + len; 598 break; 599 } 600 } 601 /* Skip until end-of-line */ 602 while (*p != '\n') 603 ++p; 604 next_line: ; 605 } 606 607 VG_(free)( file_buf ); 608 VG_(debugLog)(1, "machine", "model = %s\n", 609 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN" 610 : model_map[model].name); 611 return model; 612 } 613 614 #endif /* VGA_s390x */ 615 616 #ifdef VGA_mips32 617 618 /* Read /proc/cpuinfo and return the machine model. */ 619 static UInt VG_(get_machine_model)(void) 620 { 621 char *search_MIPS_str = "MIPS"; 622 char *search_Broadcom_str = "Broadcom"; 623 Int n, fh; 624 SysRes fd; 625 SizeT num_bytes, file_buf_size; 626 HChar *file_buf; 627 628 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 629 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 630 if ( sr_isError(fd) ) return -1; 631 632 fh = sr_Res(fd); 633 634 /* Determine the size of /proc/cpuinfo. 635 Work around broken-ness in /proc file system implementation. 636 fstat returns a zero size for /proc/cpuinfo although it is 637 claimed to be a regular file. */ 638 num_bytes = 0; 639 file_buf_size = 1000; 640 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 641 while (42) { 642 n = VG_(read)(fh, file_buf, file_buf_size); 643 if (n < 0) break; 644 645 num_bytes += n; 646 if (n < file_buf_size) break; /* reached EOF */ 647 } 648 649 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 650 651 if (num_bytes > file_buf_size) { 652 VG_(free)( file_buf ); 653 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 654 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 655 n = VG_(read)( fh, file_buf, num_bytes ); 656 if (n < 0) num_bytes = 0; 657 } 658 659 file_buf[num_bytes] = '\0'; 660 VG_(close)(fh); 661 662 /* Parse file */ 663 if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL) 664 return VEX_PRID_COMP_BROADCOM; 665 if (VG_(strstr) (file_buf, search_MIPS_str) != NULL) 666 return VEX_PRID_COMP_MIPS; 667 668 /* Did not find string in the proc file. */ 669 return -1; 670 } 671 672 #endif 673 674 /* Determine what insn set and insn set variant the host has, and 675 record it. To be called once at system startup. Returns False if 676 this a CPU incapable of running Valgrind. */ 677 678 Bool VG_(machine_get_hwcaps)( void ) 679 { 680 vg_assert(hwcaps_done == False); 681 hwcaps_done = True; 682 683 // Whack default settings into vai, so that we only need to fill in 684 // any interesting bits. 685 LibVEX_default_VexArchInfo(&vai); 686 687 #if defined(VGA_x86) 688 { Bool have_sse1, have_sse2, have_cx8, have_lzcnt; 689 UInt eax, ebx, ecx, edx, max_extended; 690 UChar vstr[13]; 691 vstr[0] = 0; 692 693 if (!VG_(has_cpuid)()) 694 /* we can't do cpuid at all. Give up. */ 695 return False; 696 697 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 698 if (eax < 1) 699 /* we can't ask for cpuid(x) for x > 0. Give up. */ 700 return False; 701 702 /* Get processor ID string, and max basic/extended index 703 values. */ 704 VG_(memcpy)(&vstr[0], &ebx, 4); 705 VG_(memcpy)(&vstr[4], &edx, 4); 706 VG_(memcpy)(&vstr[8], &ecx, 4); 707 vstr[12] = 0; 708 709 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 710 max_extended = eax; 711 712 /* get capabilities bits into edx */ 713 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 714 715 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */ 716 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */ 717 718 /* cmpxchg8b is a minimum requirement now; if we don't have it we 719 must simply give up. But all CPUs since Pentium-I have it, so 720 that doesn't seem like much of a restriction. */ 721 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 722 if (!have_cx8) 723 return False; 724 725 /* Figure out if this is an AMD that can do LZCNT. */ 726 have_lzcnt = False; 727 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 728 && max_extended >= 0x80000001) { 729 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 730 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 731 } 732 733 if (have_sse2 && have_sse1) { 734 va = VexArchX86; 735 vai.hwcaps = VEX_HWCAPS_X86_SSE1; 736 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 737 if (have_lzcnt) 738 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 739 VG_(machine_x86_have_mxcsr) = 1; 740 return True; 741 } 742 743 if (have_sse1) { 744 va = VexArchX86; 745 vai.hwcaps = VEX_HWCAPS_X86_SSE1; 746 VG_(machine_x86_have_mxcsr) = 1; 747 return True; 748 } 749 750 va = VexArchX86; 751 vai.hwcaps = 0; /*baseline - no sse at all*/ 752 VG_(machine_x86_have_mxcsr) = 0; 753 return True; 754 } 755 756 #elif defined(VGA_amd64) 757 { Bool have_sse3, have_cx8, have_cx16; 758 Bool have_lzcnt, have_avx /*, have_fma*/; 759 UInt eax, ebx, ecx, edx, max_extended; 760 UChar vstr[13]; 761 vstr[0] = 0; 762 763 if (!VG_(has_cpuid)()) 764 /* we can't do cpuid at all. Give up. */ 765 return False; 766 767 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 768 if (eax < 1) 769 /* we can't ask for cpuid(x) for x > 0. Give up. */ 770 return False; 771 772 /* Get processor ID string, and max basic/extended index 773 values. */ 774 VG_(memcpy)(&vstr[0], &ebx, 4); 775 VG_(memcpy)(&vstr[4], &edx, 4); 776 VG_(memcpy)(&vstr[8], &ecx, 4); 777 vstr[12] = 0; 778 779 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 780 max_extended = eax; 781 782 /* get capabilities bits into edx */ 783 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 784 785 // we assume that SSE1 and SSE2 are available by default 786 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 787 // ssse3 is ecx:9 788 // sse41 is ecx:19 789 // sse42 is ecx:20 790 791 // osxsave is ecx:27 792 // avx is ecx:28 793 // fma is ecx:12 794 have_avx = False; 795 /* have_fma = False; */ 796 if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) { 797 /* processor supports AVX instructions and XGETBV is enabled 798 by OS */ 799 ULong w; 800 __asm__ __volatile__("movq $0,%%rcx ; " 801 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */ 802 "movq %%rax,%0" 803 :/*OUT*/"=r"(w) :/*IN*/ 804 :/*TRASH*/"rdx","rcx"); 805 if ((w & 6) == 6) { 806 /* OS has enabled both XMM and YMM state support */ 807 have_avx = True; 808 /* have_fma = (ecx & (1<<12)) != 0; */ 809 /* have_fma: Probably correct, but gcc complains due to 810 unusedness. &*/ 811 } 812 } 813 814 815 /* cmpxchg8b is a minimum requirement now; if we don't have it we 816 must simply give up. But all CPUs since Pentium-I have it, so 817 that doesn't seem like much of a restriction. */ 818 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 819 if (!have_cx8) 820 return False; 821 822 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */ 823 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */ 824 825 /* Figure out if this is an AMD that can do LZCNT. */ 826 have_lzcnt = False; 827 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 828 && max_extended >= 0x80000001) { 829 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 830 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 831 } 832 833 va = VexArchAMD64; 834 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0) 835 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0) 836 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0) 837 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0); 838 return True; 839 } 840 841 #elif defined(VGA_ppc32) 842 { 843 /* Find out which subset of the ppc32 instruction set is supported by 844 verifying whether various ppc32 instructions generate a SIGILL 845 or a SIGFPE. An alternative approach is to check the AT_HWCAP and 846 AT_PLATFORM entries in the ELF auxiliary table -- see also 847 the_iifii.client_auxv in m_main.c. 848 */ 849 vki_sigset_t saved_set, tmp_set; 850 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 851 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 852 853 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 854 Int r; 855 856 /* This is a kludge. Really we ought to back-convert saved_act 857 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 858 since that's a no-op on all ppc32 platforms so far supported, 859 it's not worth the typing effort. At least include most basic 860 sanity check: */ 861 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 862 863 VG_(sigemptyset)(&tmp_set); 864 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 865 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 866 867 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 868 vg_assert(r == 0); 869 870 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 871 vg_assert(r == 0); 872 tmp_sigill_act = saved_sigill_act; 873 874 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 875 vg_assert(r == 0); 876 tmp_sigfpe_act = saved_sigfpe_act; 877 878 /* NODEFER: signal handler does not return (from the kernel's point of 879 view), hence if it is to successfully catch a signal more than once, 880 we need the NODEFER flag. */ 881 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 882 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 883 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 884 tmp_sigill_act.ksa_handler = handler_unsup_insn; 885 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 886 vg_assert(r == 0); 887 888 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 889 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 890 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 891 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 892 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 893 vg_assert(r == 0); 894 895 /* standard FP insns */ 896 have_F = True; 897 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 898 have_F = False; 899 } else { 900 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */ 901 } 902 903 /* Altivec insns */ 904 have_V = True; 905 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 906 have_V = False; 907 } else { 908 /* Unfortunately some older assemblers don't speak Altivec (or 909 choose not to), so to be safe we directly emit the 32-bit 910 word corresponding to "vor 0,0,0". This fixes a build 911 problem that happens on Debian 3.1 (ppc32), and probably 912 various other places. */ 913 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 914 } 915 916 /* General-Purpose optional (fsqrt, fsqrts) */ 917 have_FX = True; 918 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 919 have_FX = False; 920 } else { 921 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */ 922 } 923 924 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 925 have_GX = True; 926 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 927 have_GX = False; 928 } else { 929 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */ 930 } 931 932 /* VSX support implies Power ISA 2.06 */ 933 have_VX = True; 934 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 935 have_VX = False; 936 } else { 937 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 938 } 939 940 /* Check for Decimal Floating Point (DFP) support. */ 941 have_DFP = True; 942 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 943 have_DFP = False; 944 } else { 945 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 946 } 947 948 /* determine dcbz/dcbzl sizes while we still have the signal 949 * handlers registered */ 950 find_ppc_dcbz_sz(&vai); 951 952 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 953 vg_assert(r == 0); 954 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 955 vg_assert(r == 0); 956 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 957 vg_assert(r == 0); 958 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n", 959 (Int)have_F, (Int)have_V, (Int)have_FX, 960 (Int)have_GX, (Int)have_VX, (Int)have_DFP); 961 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */ 962 if (have_V && !have_F) 963 have_V = False; 964 if (have_FX && !have_F) 965 have_FX = False; 966 if (have_GX && !have_F) 967 have_GX = False; 968 969 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0; 970 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0; 971 972 va = VexArchPPC32; 973 974 vai.hwcaps = 0; 975 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F; 976 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V; 977 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX; 978 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX; 979 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX; 980 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP; 981 982 983 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be 984 called before we're ready to go. */ 985 return True; 986 } 987 988 #elif defined(VGA_ppc64) 989 { 990 /* Same instruction set detection algorithm as for ppc32. */ 991 vki_sigset_t saved_set, tmp_set; 992 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 993 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 994 995 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 996 Int r; 997 998 /* This is a kludge. Really we ought to back-convert saved_act 999 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1000 since that's a no-op on all ppc64 platforms so far supported, 1001 it's not worth the typing effort. At least include most basic 1002 sanity check: */ 1003 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1004 1005 VG_(sigemptyset)(&tmp_set); 1006 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1007 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1008 1009 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1010 vg_assert(r == 0); 1011 1012 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1013 vg_assert(r == 0); 1014 tmp_sigill_act = saved_sigill_act; 1015 1016 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1017 tmp_sigfpe_act = saved_sigfpe_act; 1018 1019 /* NODEFER: signal handler does not return (from the kernel's point of 1020 view), hence if it is to successfully catch a signal more than once, 1021 we need the NODEFER flag. */ 1022 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1023 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1024 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1025 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1026 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1027 1028 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1029 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1030 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1031 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1032 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1033 1034 /* standard FP insns */ 1035 have_F = True; 1036 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1037 have_F = False; 1038 } else { 1039 __asm__ __volatile__("fmr 0,0"); 1040 } 1041 1042 /* Altivec insns */ 1043 have_V = True; 1044 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1045 have_V = False; 1046 } else { 1047 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 1048 } 1049 1050 /* General-Purpose optional (fsqrt, fsqrts) */ 1051 have_FX = True; 1052 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1053 have_FX = False; 1054 } else { 1055 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/ 1056 } 1057 1058 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 1059 have_GX = True; 1060 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1061 have_GX = False; 1062 } else { 1063 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/ 1064 } 1065 1066 /* VSX support implies Power ISA 2.06 */ 1067 have_VX = True; 1068 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1069 have_VX = False; 1070 } else { 1071 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1072 } 1073 1074 /* Check for Decimal Floating Point (DFP) support. */ 1075 have_DFP = True; 1076 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1077 have_DFP = False; 1078 } else { 1079 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1080 } 1081 1082 /* determine dcbz/dcbzl sizes while we still have the signal 1083 * handlers registered */ 1084 find_ppc_dcbz_sz(&vai); 1085 1086 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1087 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1088 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1089 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n", 1090 (Int)have_F, (Int)have_V, (Int)have_FX, 1091 (Int)have_GX, (Int)have_VX, (Int)have_DFP); 1092 /* on ppc64, if we don't even have FP, just give up. */ 1093 if (!have_F) 1094 return False; 1095 1096 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0; 1097 1098 va = VexArchPPC64; 1099 1100 vai.hwcaps = 0; 1101 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V; 1102 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX; 1103 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX; 1104 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX; 1105 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP; 1106 1107 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be 1108 called before we're ready to go. */ 1109 return True; 1110 } 1111 1112 #elif defined(VGA_s390x) 1113 { 1114 /* Instruction set detection code borrowed from ppc above. */ 1115 vki_sigset_t saved_set, tmp_set; 1116 vki_sigaction_fromK_t saved_sigill_act; 1117 vki_sigaction_toK_t tmp_sigill_act; 1118 1119 volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX; 1120 volatile Bool have_STFLE, have_ETF2, have_ETF3; 1121 Int r, model; 1122 1123 /* Unblock SIGILL and stash away the old action for that signal */ 1124 VG_(sigemptyset)(&tmp_set); 1125 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1126 1127 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1128 vg_assert(r == 0); 1129 1130 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1131 vg_assert(r == 0); 1132 tmp_sigill_act = saved_sigill_act; 1133 1134 /* NODEFER: signal handler does not return (from the kernel's point of 1135 view), hence if it is to successfully catch a signal more than once, 1136 we need the NODEFER flag. */ 1137 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1138 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1139 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1140 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1141 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1142 1143 /* Determine hwcaps. Note, we cannot use the stfle insn because it 1144 is not supported on z900. */ 1145 1146 have_LDISP = True; 1147 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1148 have_LDISP = False; 1149 } else { 1150 /* BASR loads the address of the next insn into r1. Needed to avoid 1151 a segfault in XY. */ 1152 __asm__ __volatile__("basr %%r1,%%r0\n\t" 1153 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */ 1154 ".short 0x0057" : : : "r0", "r1", "cc", "memory"); 1155 } 1156 1157 have_EIMM = True; 1158 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1159 have_EIMM = False; 1160 } else { 1161 __asm__ __volatile__(".long 0xc0090000\n\t" /* iilf r0,0 */ 1162 ".short 0x0000" : : : "r0", "memory"); 1163 } 1164 1165 have_GIE = True; 1166 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1167 have_GIE = False; 1168 } else { 1169 __asm__ __volatile__(".long 0xc2010000\n\t" /* msfi r0,0 */ 1170 ".short 0x0000" : : : "r0", "memory"); 1171 } 1172 1173 have_DFP = True; 1174 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1175 have_DFP = False; 1176 } else { 1177 __asm__ __volatile__(".long 0xb3d20000" 1178 : : : "r0", "cc", "memory"); /* adtr r0,r0,r0 */ 1179 } 1180 1181 have_FGX = True; 1182 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1183 have_FGX = False; 1184 } else { 1185 __asm__ __volatile__(".long 0xb3cd0000" : : : "r0"); /* lgdr r0,f0 */ 1186 } 1187 1188 /* Detect presence of the ETF2-enhancement facility using the 1189 STFLE insn. Note, that STFLE and ETF2 were introduced at the same 1190 time, so the absence of STLFE implies the absence of ETF2. */ 1191 have_STFLE = True; 1192 have_ETF2 = False; 1193 have_ETF3 = False; 1194 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1195 have_STFLE = False; 1196 } else { 1197 ULong hoststfle[1]; 1198 register ULong reg0 asm("0") = 0; /* one double word available */ 1199 1200 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */ 1201 : "=m" (hoststfle), "+d"(reg0) 1202 : : "cc", "memory"); 1203 if (hoststfle[0] & (1ULL << (63 - 24))) 1204 have_ETF2 = True; 1205 if (hoststfle[0] & (1ULL << (63 - 30))) 1206 have_ETF3 = True; 1207 } 1208 1209 /* Restore signals */ 1210 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1211 vg_assert(r == 0); 1212 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1213 vg_assert(r == 0); 1214 va = VexArchS390X; 1215 1216 model = VG_(get_machine_model)(); 1217 1218 /* If the model is "unknown" don't treat this as an error. Assume 1219 this is a brand-new machine model for which we don't have the 1220 identification yet. Keeping fingers crossed. */ 1221 1222 VG_(debugLog)(1, "machine", "machine %d LDISP %d EIMM %d GIE %d DFP %d " 1223 "FGX %d STFLE %d ETF2 %d ETF3 %d\n", model, have_LDISP, have_EIMM, 1224 have_GIE, have_DFP, have_FGX, have_STFLE, have_ETF2, have_ETF3); 1225 1226 vai.hwcaps = model; 1227 if (have_LDISP) { 1228 /* Use long displacement only on machines >= z990. For all other machines 1229 it is millicoded and therefore slow. */ 1230 if (model >= VEX_S390X_MODEL_Z990) 1231 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP; 1232 } 1233 if (have_EIMM) vai.hwcaps |= VEX_HWCAPS_S390X_EIMM; 1234 if (have_GIE) vai.hwcaps |= VEX_HWCAPS_S390X_GIE; 1235 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_S390X_DFP; 1236 if (have_FGX) vai.hwcaps |= VEX_HWCAPS_S390X_FGX; 1237 if (have_ETF2) vai.hwcaps |= VEX_HWCAPS_S390X_ETF2; 1238 if (have_ETF3) vai.hwcaps |= VEX_HWCAPS_S390X_ETF3; 1239 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE; 1240 1241 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1242 1243 return True; 1244 } 1245 1246 #elif defined(VGA_arm) 1247 { 1248 /* Same instruction set detection algorithm as for ppc32. */ 1249 vki_sigset_t saved_set, tmp_set; 1250 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1251 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1252 1253 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON; 1254 volatile Int archlevel; 1255 Int r; 1256 1257 /* This is a kludge. Really we ought to back-convert saved_act 1258 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1259 since that's a no-op on all ppc64 platforms so far supported, 1260 it's not worth the typing effort. At least include most basic 1261 sanity check: */ 1262 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1263 1264 VG_(sigemptyset)(&tmp_set); 1265 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1266 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1267 1268 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1269 vg_assert(r == 0); 1270 1271 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1272 vg_assert(r == 0); 1273 tmp_sigill_act = saved_sigill_act; 1274 1275 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1276 tmp_sigfpe_act = saved_sigfpe_act; 1277 1278 /* NODEFER: signal handler does not return (from the kernel's point of 1279 view), hence if it is to successfully catch a signal more than once, 1280 we need the NODEFER flag. */ 1281 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1282 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1283 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1284 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1285 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1286 1287 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1288 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1289 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1290 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1291 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1292 1293 /* VFP insns */ 1294 have_VFP = True; 1295 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1296 have_VFP = False; 1297 } else { 1298 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */ 1299 } 1300 /* There are several generation of VFP extension but they differs very 1301 little so for now we will not distinguish them. */ 1302 have_VFP2 = have_VFP; 1303 have_VFP3 = have_VFP; 1304 1305 /* NEON insns */ 1306 have_NEON = True; 1307 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1308 have_NEON = False; 1309 } else { 1310 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */ 1311 } 1312 1313 /* ARM architecture level */ 1314 archlevel = 5; /* v5 will be base level */ 1315 if (archlevel < 7) { 1316 archlevel = 7; 1317 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1318 archlevel = 5; 1319 } else { 1320 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */ 1321 } 1322 } 1323 if (archlevel < 6) { 1324 archlevel = 6; 1325 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1326 archlevel = 5; 1327 } else { 1328 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */ 1329 } 1330 } 1331 1332 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1333 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act); 1334 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1335 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1336 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1337 1338 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n", 1339 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3, 1340 (Int)have_NEON); 1341 1342 VG_(machine_arm_archlevel) = archlevel; 1343 1344 va = VexArchARM; 1345 1346 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel); 1347 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3; 1348 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2; 1349 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP; 1350 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1351 1352 return True; 1353 } 1354 1355 #elif defined(VGA_mips32) 1356 { 1357 va = VexArchMIPS32; 1358 UInt model = VG_(get_machine_model)(); 1359 if (model== -1) 1360 return False; 1361 1362 vai.hwcaps = model; 1363 return True; 1364 } 1365 1366 #else 1367 # error "Unknown arch" 1368 #endif 1369 } 1370 1371 /* Notify host cpu cache line size. */ 1372 #if defined(VGA_ppc32) 1373 void VG_(machine_ppc32_set_clszB)( Int szB ) 1374 { 1375 vg_assert(hwcaps_done); 1376 1377 /* Either the value must not have been set yet (zero) or we can 1378 tolerate it being set to the same value multiple times, as the 1379 stack scanning logic in m_main is a bit stupid. */ 1380 vg_assert(vai.ppc_cache_line_szB == 0 1381 || vai.ppc_cache_line_szB == szB); 1382 1383 vg_assert(szB == 32 || szB == 64 || szB == 128); 1384 vai.ppc_cache_line_szB = szB; 1385 } 1386 #endif 1387 1388 1389 /* Notify host cpu cache line size. */ 1390 #if defined(VGA_ppc64) 1391 void VG_(machine_ppc64_set_clszB)( Int szB ) 1392 { 1393 vg_assert(hwcaps_done); 1394 1395 /* Either the value must not have been set yet (zero) or we can 1396 tolerate it being set to the same value multiple times, as the 1397 stack scanning logic in m_main is a bit stupid. */ 1398 vg_assert(vai.ppc_cache_line_szB == 0 1399 || vai.ppc_cache_line_szB == szB); 1400 1401 vg_assert(szB == 32 || szB == 64 || szB == 128); 1402 vai.ppc_cache_line_szB = szB; 1403 } 1404 #endif 1405 1406 1407 /* Notify host's ability to handle NEON instructions. */ 1408 #if defined(VGA_arm) 1409 void VG_(machine_arm_set_has_NEON)( Bool has_neon ) 1410 { 1411 vg_assert(hwcaps_done); 1412 /* There's nothing else we can sanity check. */ 1413 1414 if (has_neon) { 1415 vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1416 } else { 1417 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON; 1418 } 1419 } 1420 #endif 1421 1422 1423 /* Fetch host cpu info, once established. */ 1424 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa, 1425 /*OUT*/VexArchInfo* pVai ) 1426 { 1427 vg_assert(hwcaps_done); 1428 if (pVa) *pVa = va; 1429 if (pVai) *pVai = vai; 1430 } 1431 1432 1433 /* Returns the size of the largest guest register that we will 1434 simulate in this run. This depends on both the guest architecture 1435 and on the specific capabilities we are simulating for that guest 1436 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16 1437 or 32. General rule: if in doubt, return a value larger than 1438 reality. 1439 1440 This information is needed by Cachegrind and Callgrind to decide 1441 what the minimum cache line size they are prepared to simulate is. 1442 Basically require that the minimum cache line size is at least as 1443 large as the largest register that might get transferred to/from 1444 memory, so as to guarantee that any such transaction can straddle 1445 at most 2 cache lines. 1446 */ 1447 Int VG_(machine_get_size_of_largest_guest_register) ( void ) 1448 { 1449 vg_assert(hwcaps_done); 1450 /* Once hwcaps_done is True, we can fish around inside va/vai to 1451 find the information we need. */ 1452 1453 # if defined(VGA_x86) 1454 vg_assert(va == VexArchX86); 1455 /* We don't support AVX, so 32 is out. At the other end, even if 1456 we don't support any SSE, the X87 can generate 10 byte 1457 transfers, so let's say 16 to be on the safe side. Hence the 1458 answer is always 16. */ 1459 return 16; 1460 1461 # elif defined(VGA_amd64) 1462 /* if AVX then 32 else 16 */ 1463 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16; 1464 1465 # elif defined(VGA_ppc32) 1466 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1467 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16; 1468 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16; 1469 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16; 1470 return 8; 1471 1472 # elif defined(VGA_ppc64) 1473 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1474 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16; 1475 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16; 1476 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16; 1477 return 8; 1478 1479 # elif defined(VGA_s390x) 1480 return 8; 1481 1482 # elif defined(VGA_arm) 1483 /* Really it depends whether or not we have NEON, but let's just 1484 assume we always do. */ 1485 return 16; 1486 1487 # elif defined(VGA_mips32) 1488 /* The guest state implies 4, but that can't really be true, can 1489 it? */ 1490 return 8; 1491 1492 # else 1493 # error "Unknown arch" 1494 # endif 1495 } 1496 1497 1498 // Given a pointer to a function as obtained by "& functionname" in C, 1499 // produce a pointer to the actual entry point for the function. 1500 void* VG_(fnptr_to_fnentry)( void* f ) 1501 { 1502 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \ 1503 || defined(VGP_arm_linux) \ 1504 || defined(VGP_ppc32_linux) || defined(VGO_darwin) \ 1505 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) 1506 return f; 1507 # elif defined(VGP_ppc64_linux) 1508 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a 1509 3-word function descriptor, of which the first word is the entry 1510 address. */ 1511 UWord* descr = (UWord*)f; 1512 return (void*)(descr[0]); 1513 # else 1514 # error "Unknown platform" 1515 # endif 1516 } 1517 1518 /*--------------------------------------------------------------------*/ 1519 /*--- end ---*/ 1520 /*--------------------------------------------------------------------*/ 1521