1 2 /*--------------------------------------------------------------------*/ 3 /*--- Machine-related stuff. m_machine.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2000-2010 Julian Seward 11 jseward (at) acm.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 #include "pub_core_basics.h" 32 #include "pub_core_vki.h" 33 #include "pub_core_threadstate.h" 34 #include "pub_core_libcassert.h" 35 #include "pub_core_libcbase.h" 36 #include "pub_core_machine.h" 37 #include "pub_core_cpuid.h" 38 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE 39 #include "pub_core_debuglog.h" 40 41 42 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR) 43 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR) 44 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR) 45 46 Addr VG_(get_IP) ( ThreadId tid ) { 47 return INSTR_PTR( VG_(threads)[tid].arch ); 48 } 49 Addr VG_(get_SP) ( ThreadId tid ) { 50 return STACK_PTR( VG_(threads)[tid].arch ); 51 } 52 Addr VG_(get_FP) ( ThreadId tid ) { 53 return FRAME_PTR( VG_(threads)[tid].arch ); 54 } 55 56 void VG_(set_IP) ( ThreadId tid, Addr ip ) { 57 INSTR_PTR( VG_(threads)[tid].arch ) = ip; 58 } 59 void VG_(set_SP) ( ThreadId tid, Addr sp ) { 60 STACK_PTR( VG_(threads)[tid].arch ) = sp; 61 } 62 63 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs, 64 ThreadId tid ) 65 { 66 # if defined(VGA_x86) 67 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP; 68 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP; 69 regs->misc.X86.r_ebp 70 = VG_(threads)[tid].arch.vex.guest_EBP; 71 # elif defined(VGA_amd64) 72 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP; 73 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP; 74 regs->misc.AMD64.r_rbp 75 = VG_(threads)[tid].arch.vex.guest_RBP; 76 # elif defined(VGA_ppc32) 77 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA; 78 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1; 79 regs->misc.PPC32.r_lr 80 = VG_(threads)[tid].arch.vex.guest_LR; 81 # elif defined(VGA_ppc64) 82 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA; 83 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1; 84 regs->misc.PPC64.r_lr 85 = VG_(threads)[tid].arch.vex.guest_LR; 86 # elif defined(VGA_arm) 87 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T; 88 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13; 89 regs->misc.ARM.r14 90 = VG_(threads)[tid].arch.vex.guest_R14; 91 regs->misc.ARM.r12 92 = VG_(threads)[tid].arch.vex.guest_R12; 93 regs->misc.ARM.r11 94 = VG_(threads)[tid].arch.vex.guest_R11; 95 regs->misc.ARM.r7 96 = VG_(threads)[tid].arch.vex.guest_R7; 97 # else 98 # error "Unknown arch" 99 # endif 100 } 101 102 103 void VG_(set_syscall_return_shadows) ( ThreadId tid, 104 /* shadow vals for the result */ 105 UWord s1res, UWord s2res, 106 /* shadow vals for the error val */ 107 UWord s1err, UWord s2err ) 108 { 109 # if defined(VGP_x86_linux) 110 VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res; 111 VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res; 112 # elif defined(VGP_amd64_linux) 113 VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res; 114 VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res; 115 # elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) 116 VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res; 117 VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res; 118 # elif defined(VGP_arm_linux) 119 VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res; 120 VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res; 121 # elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5) 122 VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res; 123 VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res; 124 VG_(threads)[tid].arch.vex_shadow1.guest_GPR4 = s1err; 125 VG_(threads)[tid].arch.vex_shadow2.guest_GPR4 = s2err; 126 # elif defined(VGO_darwin) 127 // GrP fixme darwin syscalls may return more values (2 registers plus error) 128 # else 129 # error "Unknown plat" 130 # endif 131 } 132 133 void 134 VG_(get_shadow_regs_area) ( ThreadId tid, 135 /*DST*/UChar* dst, 136 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size ) 137 { 138 void* src; 139 ThreadState* tst; 140 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 141 vg_assert(VG_(is_valid_tid)(tid)); 142 // Bounds check 143 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 144 vg_assert(offset + size <= sizeof(VexGuestArchState)); 145 // Copy 146 tst = & VG_(threads)[tid]; 147 src = NULL; 148 switch (shadowNo) { 149 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 150 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 151 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 152 } 153 tl_assert(src != NULL); 154 VG_(memcpy)( dst, src, size); 155 } 156 157 void 158 VG_(set_shadow_regs_area) ( ThreadId tid, 159 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size, 160 /*SRC*/const UChar* src ) 161 { 162 void* dst; 163 ThreadState* tst; 164 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 165 vg_assert(VG_(is_valid_tid)(tid)); 166 // Bounds check 167 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 168 vg_assert(offset + size <= sizeof(VexGuestArchState)); 169 // Copy 170 tst = & VG_(threads)[tid]; 171 dst = NULL; 172 switch (shadowNo) { 173 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 174 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 175 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 176 } 177 tl_assert(dst != NULL); 178 VG_(memcpy)( dst, src, size); 179 } 180 181 182 static void apply_to_GPs_of_tid(VexGuestArchState* vex, void (*f)(Addr)) 183 { 184 #if defined(VGA_x86) 185 (*f)(vex->guest_EAX); 186 (*f)(vex->guest_ECX); 187 (*f)(vex->guest_EDX); 188 (*f)(vex->guest_EBX); 189 (*f)(vex->guest_ESI); 190 (*f)(vex->guest_EDI); 191 (*f)(vex->guest_ESP); 192 (*f)(vex->guest_EBP); 193 #elif defined(VGA_amd64) 194 (*f)(vex->guest_RAX); 195 (*f)(vex->guest_RCX); 196 (*f)(vex->guest_RDX); 197 (*f)(vex->guest_RBX); 198 (*f)(vex->guest_RSI); 199 (*f)(vex->guest_RDI); 200 (*f)(vex->guest_RSP); 201 (*f)(vex->guest_RBP); 202 (*f)(vex->guest_R8); 203 (*f)(vex->guest_R9); 204 (*f)(vex->guest_R10); 205 (*f)(vex->guest_R11); 206 (*f)(vex->guest_R12); 207 (*f)(vex->guest_R13); 208 (*f)(vex->guest_R14); 209 (*f)(vex->guest_R15); 210 #elif defined(VGA_ppc32) || defined(VGA_ppc64) 211 (*f)(vex->guest_GPR0); 212 (*f)(vex->guest_GPR1); 213 (*f)(vex->guest_GPR2); 214 (*f)(vex->guest_GPR3); 215 (*f)(vex->guest_GPR4); 216 (*f)(vex->guest_GPR5); 217 (*f)(vex->guest_GPR6); 218 (*f)(vex->guest_GPR7); 219 (*f)(vex->guest_GPR8); 220 (*f)(vex->guest_GPR9); 221 (*f)(vex->guest_GPR10); 222 (*f)(vex->guest_GPR11); 223 (*f)(vex->guest_GPR12); 224 (*f)(vex->guest_GPR13); 225 (*f)(vex->guest_GPR14); 226 (*f)(vex->guest_GPR15); 227 (*f)(vex->guest_GPR16); 228 (*f)(vex->guest_GPR17); 229 (*f)(vex->guest_GPR18); 230 (*f)(vex->guest_GPR19); 231 (*f)(vex->guest_GPR20); 232 (*f)(vex->guest_GPR21); 233 (*f)(vex->guest_GPR22); 234 (*f)(vex->guest_GPR23); 235 (*f)(vex->guest_GPR24); 236 (*f)(vex->guest_GPR25); 237 (*f)(vex->guest_GPR26); 238 (*f)(vex->guest_GPR27); 239 (*f)(vex->guest_GPR28); 240 (*f)(vex->guest_GPR29); 241 (*f)(vex->guest_GPR30); 242 (*f)(vex->guest_GPR31); 243 (*f)(vex->guest_CTR); 244 (*f)(vex->guest_LR); 245 #elif defined(VGA_arm) 246 (*f)(vex->guest_R0); 247 (*f)(vex->guest_R1); 248 (*f)(vex->guest_R2); 249 (*f)(vex->guest_R3); 250 (*f)(vex->guest_R4); 251 (*f)(vex->guest_R5); 252 (*f)(vex->guest_R6); 253 (*f)(vex->guest_R8); 254 (*f)(vex->guest_R9); 255 (*f)(vex->guest_R10); 256 (*f)(vex->guest_R11); 257 (*f)(vex->guest_R12); 258 (*f)(vex->guest_R13); 259 (*f)(vex->guest_R14); 260 #else 261 # error Unknown arch 262 #endif 263 } 264 265 266 void VG_(apply_to_GP_regs)(void (*f)(UWord)) 267 { 268 ThreadId tid; 269 270 for (tid = 1; tid < VG_N_THREADS; tid++) { 271 if (VG_(is_valid_tid)(tid)) { 272 ThreadState* tst = VG_(get_ThreadState)(tid); 273 apply_to_GPs_of_tid(&(tst->arch.vex), f); 274 } 275 } 276 } 277 278 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid) 279 { 280 *tid = (ThreadId)(-1); 281 } 282 283 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid, 284 /*OUT*/Addr* stack_min, 285 /*OUT*/Addr* stack_max) 286 { 287 ThreadId i; 288 for (i = (*tid)+1; i < VG_N_THREADS; i++) { 289 if (i == VG_INVALID_THREADID) 290 continue; 291 if (VG_(threads)[i].status != VgTs_Empty) { 292 *tid = i; 293 *stack_min = VG_(get_SP)(i); 294 *stack_max = VG_(threads)[i].client_stack_highest_word; 295 return True; 296 } 297 } 298 return False; 299 } 300 301 Addr VG_(thread_get_stack_max)(ThreadId tid) 302 { 303 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 304 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 305 return VG_(threads)[tid].client_stack_highest_word; 306 } 307 308 SizeT VG_(thread_get_stack_size)(ThreadId tid) 309 { 310 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 311 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 312 return VG_(threads)[tid].client_stack_szB; 313 } 314 315 Addr VG_(thread_get_altstack_min)(ThreadId tid) 316 { 317 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 318 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 319 return (Addr)VG_(threads)[tid].altstack.ss_sp; 320 } 321 322 SizeT VG_(thread_get_altstack_size)(ThreadId tid) 323 { 324 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 325 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 326 return VG_(threads)[tid].altstack.ss_size; 327 } 328 329 //------------------------------------------------------------- 330 /* Details about the capabilities of the underlying (host) CPU. These 331 details are acquired by (1) enquiring with the CPU at startup, or 332 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache 333 line size). It's a bit nasty in the sense that there's no obvious 334 way to stop uses of some of this info before it's ready to go. 335 336 Current dependencies are: 337 338 x86: initially: call VG_(machine_get_hwcaps) 339 340 then safe to use VG_(machine_get_VexArchInfo) 341 and VG_(machine_x86_have_mxcsr) 342 ------------- 343 amd64: initially: call VG_(machine_get_hwcaps) 344 345 then safe to use VG_(machine_get_VexArchInfo) 346 ------------- 347 ppc32: initially: call VG_(machine_get_hwcaps) 348 call VG_(machine_ppc32_set_clszB) 349 350 then safe to use VG_(machine_get_VexArchInfo) 351 and VG_(machine_ppc32_has_FP) 352 and VG_(machine_ppc32_has_VMX) 353 ------------- 354 ppc64: initially: call VG_(machine_get_hwcaps) 355 call VG_(machine_ppc64_set_clszB) 356 357 then safe to use VG_(machine_get_VexArchInfo) 358 and VG_(machine_ppc64_has_VMX) 359 360 VG_(machine_get_hwcaps) may use signals (although it attempts to 361 leave signal state unchanged) and therefore should only be 362 called before m_main sets up the client's signal state. 363 */ 364 365 /* --------- State --------- */ 366 static Bool hwcaps_done = False; 367 368 /* --- all archs --- */ 369 static VexArch va; 370 static VexArchInfo vai; 371 372 #if defined(VGA_x86) 373 UInt VG_(machine_x86_have_mxcsr) = 0; 374 #endif 375 #if defined(VGA_ppc32) 376 UInt VG_(machine_ppc32_has_FP) = 0; 377 UInt VG_(machine_ppc32_has_VMX) = 0; 378 #endif 379 #if defined(VGA_ppc64) 380 ULong VG_(machine_ppc64_has_VMX) = 0; 381 #endif 382 #if defined(VGA_arm) 383 Int VG_(machine_arm_archlevel) = 4; 384 #endif 385 386 387 /* For hwcaps detection on ppc32/64 and arm we'll need to do SIGILL 388 testing, so we need a jmp_buf. */ 389 #if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm) 390 #include <setjmp.h> // For jmp_buf 391 static jmp_buf env_unsup_insn; 392 static void handler_unsup_insn ( Int x ) { __builtin_longjmp(env_unsup_insn,1); } 393 #endif 394 395 396 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc 397 * handlers are installed. Determines the the sizes affected by dcbz 398 * and dcbzl instructions and updates the given VexArchInfo structure 399 * accordingly. 400 * 401 * Not very defensive: assumes that as long as the dcbz/dcbzl 402 * instructions don't raise a SIGILL, that they will zero an aligned, 403 * contiguous block of memory of a sensible size. */ 404 #if defined(VGA_ppc32) || defined(VGA_ppc64) 405 static void find_ppc_dcbz_sz(VexArchInfo *arch_info) 406 { 407 Int dcbz_szB = 0; 408 Int dcbzl_szB; 409 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */ 410 char test_block[4*MAX_DCBZL_SZB]; 411 char *aligned = test_block; 412 Int i; 413 414 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */ 415 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1)); 416 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]); 417 418 /* dcbz often clears 32B, although sometimes whatever the native cache 419 * block size is */ 420 VG_(memset)(test_block, 0xff, sizeof(test_block)); 421 __asm__ __volatile__("dcbz 0,%0" 422 : /*out*/ 423 : "r" (aligned) /*in*/ 424 : "memory" /*clobber*/); 425 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) { 426 if (!test_block[i]) 427 ++dcbz_szB; 428 } 429 vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128); 430 431 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */ 432 if (__builtin_setjmp(env_unsup_insn)) { 433 dcbzl_szB = 0; /* indicates unsupported */ 434 } 435 else { 436 VG_(memset)(test_block, 0xff, sizeof(test_block)); 437 /* some older assemblers won't understand the dcbzl instruction 438 * variant, so we directly emit the instruction ourselves */ 439 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/ 440 : /*out*/ 441 : "r" (aligned) /*in*/ 442 : "memory", "r9" /*clobber*/); 443 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) { 444 if (!test_block[i]) 445 ++dcbzl_szB; 446 } 447 vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128); 448 } 449 450 arch_info->ppc_dcbz_szB = dcbz_szB; 451 arch_info->ppc_dcbzl_szB = dcbzl_szB; 452 453 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n", 454 dcbz_szB, dcbzl_szB); 455 # undef MAX_DCBZL_SZB 456 } 457 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */ 458 459 460 461 /* Determine what insn set and insn set variant the host has, and 462 record it. To be called once at system startup. Returns False if 463 this a CPU incapable of running Valgrind. */ 464 465 Bool VG_(machine_get_hwcaps)( void ) 466 { 467 vg_assert(hwcaps_done == False); 468 hwcaps_done = True; 469 470 // Whack default settings into vai, so that we only need to fill in 471 // any interesting bits. 472 LibVEX_default_VexArchInfo(&vai); 473 474 #if defined(VGA_x86) 475 { Bool have_sse1, have_sse2, have_cx8, have_lzcnt; 476 UInt eax, ebx, ecx, edx, max_basic, max_extended; 477 UChar vstr[13]; 478 vstr[0] = 0; 479 480 if (!VG_(has_cpuid)()) 481 /* we can't do cpuid at all. Give up. */ 482 return False; 483 484 VG_(cpuid)(0, &eax, &ebx, &ecx, &edx); 485 if (eax < 1) 486 /* we can't ask for cpuid(x) for x > 0. Give up. */ 487 return False; 488 489 /* Get processor ID string, and max basic/extended index 490 values. */ 491 max_basic = eax; 492 VG_(memcpy)(&vstr[0], &ebx, 4); 493 VG_(memcpy)(&vstr[4], &edx, 4); 494 VG_(memcpy)(&vstr[8], &ecx, 4); 495 vstr[12] = 0; 496 497 VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx); 498 max_extended = eax; 499 500 /* get capabilities bits into edx */ 501 VG_(cpuid)(1, &eax, &ebx, &ecx, &edx); 502 503 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */ 504 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */ 505 506 /* cmpxchg8b is a minimum requirement now; if we don't have it we 507 must simply give up. But all CPUs since Pentium-I have it, so 508 that doesn't seem like much of a restriction. */ 509 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 510 if (!have_cx8) 511 return False; 512 513 /* Figure out if this is an AMD that can do LZCNT. */ 514 have_lzcnt = False; 515 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 516 && max_extended >= 0x80000001) { 517 VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx); 518 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 519 } 520 521 if (have_sse2 && have_sse1) { 522 va = VexArchX86; 523 vai.hwcaps = VEX_HWCAPS_X86_SSE1; 524 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 525 if (have_lzcnt) 526 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 527 VG_(machine_x86_have_mxcsr) = 1; 528 return True; 529 } 530 531 if (have_sse1) { 532 va = VexArchX86; 533 vai.hwcaps = VEX_HWCAPS_X86_SSE1; 534 VG_(machine_x86_have_mxcsr) = 1; 535 return True; 536 } 537 538 va = VexArchX86; 539 vai.hwcaps = 0; /*baseline - no sse at all*/ 540 VG_(machine_x86_have_mxcsr) = 0; 541 return True; 542 } 543 544 #elif defined(VGA_amd64) 545 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_cx16; 546 Bool have_lzcnt; 547 UInt eax, ebx, ecx, edx, max_basic, max_extended; 548 UChar vstr[13]; 549 vstr[0] = 0; 550 551 if (!VG_(has_cpuid)()) 552 /* we can't do cpuid at all. Give up. */ 553 return False; 554 555 VG_(cpuid)(0, &eax, &ebx, &ecx, &edx); 556 if (eax < 1) 557 /* we can't ask for cpuid(x) for x > 0. Give up. */ 558 return False; 559 560 /* Get processor ID string, and max basic/extended index 561 values. */ 562 max_basic = eax; 563 VG_(memcpy)(&vstr[0], &ebx, 4); 564 VG_(memcpy)(&vstr[4], &edx, 4); 565 VG_(memcpy)(&vstr[8], &ecx, 4); 566 vstr[12] = 0; 567 568 VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx); 569 max_extended = eax; 570 571 /* get capabilities bits into edx */ 572 VG_(cpuid)(1, &eax, &ebx, &ecx, &edx); 573 574 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */ 575 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */ 576 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 577 // ssse3 is ecx:9 578 // sse41 is ecx:19 579 // sse42 is ecx:20 580 581 /* cmpxchg8b is a minimum requirement now; if we don't have it we 582 must simply give up. But all CPUs since Pentium-I have it, so 583 that doesn't seem like much of a restriction. */ 584 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 585 if (!have_cx8) 586 return False; 587 588 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */ 589 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */ 590 591 /* Figure out if this is an AMD that can do LZCNT. */ 592 have_lzcnt = False; 593 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 594 && max_extended >= 0x80000001) { 595 VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx); 596 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 597 } 598 599 va = VexArchAMD64; 600 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0) 601 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0) 602 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0); 603 return True; 604 } 605 606 #elif defined(VGA_ppc32) 607 { 608 /* Find out which subset of the ppc32 instruction set is supported by 609 verifying whether various ppc32 instructions generate a SIGILL 610 or a SIGFPE. An alternative approach is to check the AT_HWCAP and 611 AT_PLATFORM entries in the ELF auxiliary table -- see also 612 the_iifii.client_auxv in m_main.c. 613 */ 614 vki_sigset_t saved_set, tmp_set; 615 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 616 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 617 618 volatile Bool have_F, have_V, have_FX, have_GX; 619 Int r; 620 621 /* This is a kludge. Really we ought to back-convert saved_act 622 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 623 since that's a no-op on all ppc32 platforms so far supported, 624 it's not worth the typing effort. At least include most basic 625 sanity check: */ 626 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 627 628 VG_(sigemptyset)(&tmp_set); 629 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 630 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 631 632 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 633 vg_assert(r == 0); 634 635 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 636 vg_assert(r == 0); 637 tmp_sigill_act = saved_sigill_act; 638 639 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 640 vg_assert(r == 0); 641 tmp_sigfpe_act = saved_sigfpe_act; 642 643 /* NODEFER: signal handler does not return (from the kernel's point of 644 view), hence if it is to successfully catch a signal more than once, 645 we need the NODEFER flag. */ 646 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 647 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 648 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 649 tmp_sigill_act.ksa_handler = handler_unsup_insn; 650 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 651 vg_assert(r == 0); 652 653 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 654 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 655 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 656 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 657 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 658 vg_assert(r == 0); 659 660 /* standard FP insns */ 661 have_F = True; 662 if (__builtin_setjmp(env_unsup_insn)) { 663 have_F = False; 664 } else { 665 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */ 666 } 667 668 /* Altivec insns */ 669 have_V = True; 670 if (__builtin_setjmp(env_unsup_insn)) { 671 have_V = False; 672 } else { 673 /* Unfortunately some older assemblers don't speak Altivec (or 674 choose not to), so to be safe we directly emit the 32-bit 675 word corresponding to "vor 0,0,0". This fixes a build 676 problem that happens on Debian 3.1 (ppc32), and probably 677 various other places. */ 678 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 679 } 680 681 /* General-Purpose optional (fsqrt, fsqrts) */ 682 have_FX = True; 683 if (__builtin_setjmp(env_unsup_insn)) { 684 have_FX = False; 685 } else { 686 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */ 687 } 688 689 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 690 have_GX = True; 691 if (__builtin_setjmp(env_unsup_insn)) { 692 have_GX = False; 693 } else { 694 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */ 695 } 696 697 /* determine dcbz/dcbzl sizes while we still have the signal 698 * handlers registered */ 699 find_ppc_dcbz_sz(&vai); 700 701 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 702 vg_assert(r == 0); 703 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 704 vg_assert(r == 0); 705 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 706 vg_assert(r == 0); 707 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d\n", 708 (Int)have_F, (Int)have_V, (Int)have_FX, (Int)have_GX); 709 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */ 710 if (have_V && !have_F) 711 have_V = False; 712 if (have_FX && !have_F) 713 have_FX = False; 714 if (have_GX && !have_F) 715 have_GX = False; 716 717 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0; 718 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0; 719 720 va = VexArchPPC32; 721 722 vai.hwcaps = 0; 723 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F; 724 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V; 725 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX; 726 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX; 727 728 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be 729 called before we're ready to go. */ 730 return True; 731 } 732 733 #elif defined(VGA_ppc64) 734 { 735 /* Same instruction set detection algorithm as for ppc32. */ 736 vki_sigset_t saved_set, tmp_set; 737 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 738 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 739 740 volatile Bool have_F, have_V, have_FX, have_GX; 741 Int r; 742 743 /* This is a kludge. Really we ought to back-convert saved_act 744 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 745 since that's a no-op on all ppc64 platforms so far supported, 746 it's not worth the typing effort. At least include most basic 747 sanity check: */ 748 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 749 750 VG_(sigemptyset)(&tmp_set); 751 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 752 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 753 754 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 755 vg_assert(r == 0); 756 757 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 758 vg_assert(r == 0); 759 tmp_sigill_act = saved_sigill_act; 760 761 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 762 tmp_sigfpe_act = saved_sigfpe_act; 763 764 /* NODEFER: signal handler does not return (from the kernel's point of 765 view), hence if it is to successfully catch a signal more than once, 766 we need the NODEFER flag. */ 767 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 768 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 769 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 770 tmp_sigill_act.ksa_handler = handler_unsup_insn; 771 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 772 773 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 774 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 775 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 776 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 777 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 778 779 /* standard FP insns */ 780 have_F = True; 781 if (__builtin_setjmp(env_unsup_insn)) { 782 have_F = False; 783 } else { 784 __asm__ __volatile__("fmr 0,0"); 785 } 786 787 /* Altivec insns */ 788 have_V = True; 789 if (__builtin_setjmp(env_unsup_insn)) { 790 have_V = False; 791 } else { 792 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 793 } 794 795 /* General-Purpose optional (fsqrt, fsqrts) */ 796 have_FX = True; 797 if (__builtin_setjmp(env_unsup_insn)) { 798 have_FX = False; 799 } else { 800 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/ 801 } 802 803 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 804 have_GX = True; 805 if (__builtin_setjmp(env_unsup_insn)) { 806 have_GX = False; 807 } else { 808 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/ 809 } 810 811 /* determine dcbz/dcbzl sizes while we still have the signal 812 * handlers registered */ 813 find_ppc_dcbz_sz(&vai); 814 815 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 816 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 817 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 818 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d\n", 819 (Int)have_F, (Int)have_V, (Int)have_FX, (Int)have_GX); 820 /* on ppc64, if we don't even have FP, just give up. */ 821 if (!have_F) 822 return False; 823 824 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0; 825 826 va = VexArchPPC64; 827 828 vai.hwcaps = 0; 829 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V; 830 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX; 831 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX; 832 833 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be 834 called before we're ready to go. */ 835 return True; 836 } 837 838 #elif defined(VGA_arm) 839 { 840 /* Same instruction set detection algorithm as for ppc32. */ 841 vki_sigset_t saved_set, tmp_set; 842 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 843 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 844 845 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON; 846 volatile Int archlevel; 847 Int r; 848 849 /* This is a kludge. Really we ought to back-convert saved_act 850 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 851 since that's a no-op on all ppc64 platforms so far supported, 852 it's not worth the typing effort. At least include most basic 853 sanity check: */ 854 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 855 856 VG_(sigemptyset)(&tmp_set); 857 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 858 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 859 860 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 861 vg_assert(r == 0); 862 863 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 864 vg_assert(r == 0); 865 tmp_sigill_act = saved_sigill_act; 866 867 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 868 tmp_sigfpe_act = saved_sigfpe_act; 869 870 /* NODEFER: signal handler does not return (from the kernel's point of 871 view), hence if it is to successfully catch a signal more than once, 872 we need the NODEFER flag. */ 873 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 874 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 875 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 876 tmp_sigill_act.ksa_handler = handler_unsup_insn; 877 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 878 879 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 880 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 881 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 882 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 883 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 884 885 /* VFP insns */ 886 have_VFP = True; 887 if (__builtin_setjmp(env_unsup_insn)) { 888 have_VFP = False; 889 } else { 890 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */ 891 } 892 /* There are several generation of VFP extension but they differs very 893 little so for now we will not distinguish them. */ 894 have_VFP2 = have_VFP; 895 have_VFP3 = have_VFP; 896 897 /* NEON insns */ 898 have_NEON = True; 899 if (__builtin_setjmp(env_unsup_insn)) { 900 have_NEON = False; 901 } else { 902 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */ 903 } 904 905 /* ARM architecture level */ 906 archlevel = 5; /* v5 will be base level */ 907 if (archlevel < 7) { 908 archlevel = 7; 909 if (__builtin_setjmp(env_unsup_insn)) { 910 archlevel = 5; 911 } else { 912 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */ 913 } 914 } 915 if (archlevel < 6) { 916 archlevel = 6; 917 if (__builtin_setjmp(env_unsup_insn)) { 918 archlevel = 5; 919 } else { 920 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */ 921 } 922 } 923 924 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 925 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act); 926 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 927 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 928 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 929 930 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n", 931 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3, 932 (Int)have_NEON); 933 934 VG_(machine_arm_archlevel) = archlevel; 935 936 va = VexArchARM; 937 938 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel); 939 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3; 940 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2; 941 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP; 942 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 943 944 return True; 945 } 946 947 #else 948 # error "Unknown arch" 949 #endif 950 } 951 952 /* Notify host cpu cache line size. */ 953 #if defined(VGA_ppc32) 954 void VG_(machine_ppc32_set_clszB)( Int szB ) 955 { 956 vg_assert(hwcaps_done); 957 958 /* Either the value must not have been set yet (zero) or we can 959 tolerate it being set to the same value multiple times, as the 960 stack scanning logic in m_main is a bit stupid. */ 961 vg_assert(vai.ppc_cache_line_szB == 0 962 || vai.ppc_cache_line_szB == szB); 963 964 vg_assert(szB == 32 || szB == 64 || szB == 128); 965 vai.ppc_cache_line_szB = szB; 966 } 967 #endif 968 969 970 /* Notify host cpu cache line size. */ 971 #if defined(VGA_ppc64) 972 void VG_(machine_ppc64_set_clszB)( Int szB ) 973 { 974 vg_assert(hwcaps_done); 975 976 /* Either the value must not have been set yet (zero) or we can 977 tolerate it being set to the same value multiple times, as the 978 stack scanning logic in m_main is a bit stupid. */ 979 vg_assert(vai.ppc_cache_line_szB == 0 980 || vai.ppc_cache_line_szB == szB); 981 982 vg_assert(szB == 32 || szB == 64 || szB == 128); 983 vai.ppc_cache_line_szB = szB; 984 } 985 #endif 986 987 988 /* Notify host's ability to handle NEON instructions. */ 989 #if defined(VGA_arm) 990 void VG_(machine_arm_set_has_NEON)( Bool has_neon ) 991 { 992 vg_assert(hwcaps_done); 993 /* There's nothing else we can sanity check. */ 994 995 if (has_neon) { 996 vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 997 } else { 998 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON; 999 } 1000 } 1001 #endif 1002 1003 1004 /* Fetch host cpu info, once established. */ 1005 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa, 1006 /*OUT*/VexArchInfo* pVai ) 1007 { 1008 vg_assert(hwcaps_done); 1009 if (pVa) *pVa = va; 1010 if (pVai) *pVai = vai; 1011 } 1012 1013 1014 // Given a pointer to a function as obtained by "& functionname" in C, 1015 // produce a pointer to the actual entry point for the function. 1016 void* VG_(fnptr_to_fnentry)( void* f ) 1017 { 1018 #if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \ 1019 || defined(VGP_arm_linux) \ 1020 || defined(VGP_ppc32_linux) || defined(VGO_darwin) 1021 return f; 1022 #elif defined(VGP_ppc64_linux) || defined(VGP_ppc32_aix5) \ 1023 || defined(VGP_ppc64_aix5) 1024 /* All other ppc variants use the AIX scheme, in which f is a 1025 pointer to a 3-word function descriptor, of which the first word 1026 is the entry address. */ 1027 UWord* descr = (UWord*)f; 1028 return (void*)(descr[0]); 1029 #else 1030 # error "Unknown platform" 1031 #endif 1032 } 1033 1034 /*--------------------------------------------------------------------*/ 1035 /*--- end ---*/ 1036 /*--------------------------------------------------------------------*/ 1037