1 /*--------------------------------------------------------------------*/ 2 /*--- Machine-related stuff. m_machine.c ---*/ 3 /*--------------------------------------------------------------------*/ 4 5 /* 6 This file is part of Valgrind, a dynamic binary instrumentation 7 framework. 8 9 Copyright (C) 2000-2015 Julian Seward 10 jseward (at) acm.org 11 12 This program is free software; you can redistribute it and/or 13 modify it under the terms of the GNU General Public License as 14 published by the Free Software Foundation; either version 2 of the 15 License, or (at your option) any later version. 16 17 This program is distributed in the hope that it will be useful, but 18 WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 General Public License for more details. 21 22 You should have received a copy of the GNU General Public License 23 along with this program; if not, write to the Free Software 24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 25 02111-1307, USA. 26 27 The GNU General Public License is contained in the file COPYING. 28 */ 29 30 #include "pub_core_basics.h" 31 #include "pub_core_vki.h" 32 #include "pub_core_threadstate.h" 33 #include "pub_core_libcassert.h" 34 #include "pub_core_libcbase.h" 35 #include "pub_core_libcfile.h" 36 #include "pub_core_libcprint.h" 37 #include "pub_core_mallocfree.h" 38 #include "pub_core_machine.h" 39 #include "pub_core_cpuid.h" 40 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE 41 #include "pub_core_debuglog.h" 42 43 44 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR) 45 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR) 46 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR) 47 48 Addr VG_(get_IP) ( ThreadId tid ) { 49 return INSTR_PTR( VG_(threads)[tid].arch ); 50 } 51 Addr VG_(get_SP) ( ThreadId tid ) { 52 return STACK_PTR( VG_(threads)[tid].arch ); 53 } 54 Addr VG_(get_FP) ( ThreadId tid ) { 55 return FRAME_PTR( VG_(threads)[tid].arch ); 56 } 57 58 void VG_(set_IP) ( ThreadId tid, Addr ip ) { 59 INSTR_PTR( VG_(threads)[tid].arch ) = ip; 60 } 61 void VG_(set_SP) ( ThreadId tid, Addr sp ) { 62 STACK_PTR( VG_(threads)[tid].arch ) = sp; 63 } 64 65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs, 66 ThreadId tid ) 67 { 68 # if defined(VGA_x86) 69 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP; 70 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP; 71 regs->misc.X86.r_ebp 72 = VG_(threads)[tid].arch.vex.guest_EBP; 73 # elif defined(VGA_amd64) 74 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP; 75 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP; 76 regs->misc.AMD64.r_rbp 77 = VG_(threads)[tid].arch.vex.guest_RBP; 78 # elif defined(VGA_ppc32) 79 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA; 80 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1; 81 regs->misc.PPC32.r_lr 82 = VG_(threads)[tid].arch.vex.guest_LR; 83 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le) 84 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA; 85 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1; 86 regs->misc.PPC64.r_lr 87 = VG_(threads)[tid].arch.vex.guest_LR; 88 # elif defined(VGA_arm) 89 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T; 90 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13; 91 regs->misc.ARM.r14 92 = VG_(threads)[tid].arch.vex.guest_R14; 93 regs->misc.ARM.r12 94 = VG_(threads)[tid].arch.vex.guest_R12; 95 regs->misc.ARM.r11 96 = VG_(threads)[tid].arch.vex.guest_R11; 97 regs->misc.ARM.r7 98 = VG_(threads)[tid].arch.vex.guest_R7; 99 # elif defined(VGA_arm64) 100 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 101 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP; 102 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29; 103 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30; 104 # elif defined(VGA_s390x) 105 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA; 106 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP; 107 regs->misc.S390X.r_fp 108 = VG_(threads)[tid].arch.vex.guest_FP; 109 regs->misc.S390X.r_lr 110 = VG_(threads)[tid].arch.vex.guest_LR; 111 # elif defined(VGA_mips32) 112 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 113 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 114 regs->misc.MIPS32.r30 115 = VG_(threads)[tid].arch.vex.guest_r30; 116 regs->misc.MIPS32.r31 117 = VG_(threads)[tid].arch.vex.guest_r31; 118 regs->misc.MIPS32.r28 119 = VG_(threads)[tid].arch.vex.guest_r28; 120 # elif defined(VGA_mips64) 121 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 122 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 123 regs->misc.MIPS64.r30 124 = VG_(threads)[tid].arch.vex.guest_r30; 125 regs->misc.MIPS64.r31 126 = VG_(threads)[tid].arch.vex.guest_r31; 127 regs->misc.MIPS64.r28 128 = VG_(threads)[tid].arch.vex.guest_r28; 129 # elif defined(VGA_tilegx) 130 regs->r_pc = VG_(threads)[tid].arch.vex.guest_pc; 131 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r54; 132 regs->misc.TILEGX.r52 133 = VG_(threads)[tid].arch.vex.guest_r52; 134 regs->misc.TILEGX.r55 135 = VG_(threads)[tid].arch.vex.guest_r55; 136 # else 137 # error "Unknown arch" 138 # endif 139 } 140 141 void 142 VG_(get_shadow_regs_area) ( ThreadId tid, 143 /*DST*/UChar* dst, 144 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size ) 145 { 146 void* src; 147 ThreadState* tst; 148 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 149 vg_assert(VG_(is_valid_tid)(tid)); 150 // Bounds check 151 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 152 vg_assert(offset + size <= sizeof(VexGuestArchState)); 153 // Copy 154 tst = & VG_(threads)[tid]; 155 src = NULL; 156 switch (shadowNo) { 157 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 158 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 159 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 160 } 161 vg_assert(src != NULL); 162 VG_(memcpy)( dst, src, size); 163 } 164 165 void 166 VG_(set_shadow_regs_area) ( ThreadId tid, 167 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size, 168 /*SRC*/const UChar* src ) 169 { 170 void* dst; 171 ThreadState* tst; 172 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 173 vg_assert(VG_(is_valid_tid)(tid)); 174 // Bounds check 175 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 176 vg_assert(offset + size <= sizeof(VexGuestArchState)); 177 // Copy 178 tst = & VG_(threads)[tid]; 179 dst = NULL; 180 switch (shadowNo) { 181 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 182 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 183 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 184 } 185 vg_assert(dst != NULL); 186 VG_(memcpy)( dst, src, size); 187 } 188 189 190 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, 191 const HChar*, Addr)) 192 { 193 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex); 194 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid); 195 #if defined(VGA_x86) 196 (*f)(tid, "EAX", vex->guest_EAX); 197 (*f)(tid, "ECX", vex->guest_ECX); 198 (*f)(tid, "EDX", vex->guest_EDX); 199 (*f)(tid, "EBX", vex->guest_EBX); 200 (*f)(tid, "ESI", vex->guest_ESI); 201 (*f)(tid, "EDI", vex->guest_EDI); 202 (*f)(tid, "ESP", vex->guest_ESP); 203 (*f)(tid, "EBP", vex->guest_EBP); 204 #elif defined(VGA_amd64) 205 (*f)(tid, "RAX", vex->guest_RAX); 206 (*f)(tid, "RCX", vex->guest_RCX); 207 (*f)(tid, "RDX", vex->guest_RDX); 208 (*f)(tid, "RBX", vex->guest_RBX); 209 (*f)(tid, "RSI", vex->guest_RSI); 210 (*f)(tid, "RDI", vex->guest_RDI); 211 (*f)(tid, "RSP", vex->guest_RSP); 212 (*f)(tid, "RBP", vex->guest_RBP); 213 (*f)(tid, "R8" , vex->guest_R8 ); 214 (*f)(tid, "R9" , vex->guest_R9 ); 215 (*f)(tid, "R10", vex->guest_R10); 216 (*f)(tid, "R11", vex->guest_R11); 217 (*f)(tid, "R12", vex->guest_R12); 218 (*f)(tid, "R13", vex->guest_R13); 219 (*f)(tid, "R14", vex->guest_R14); 220 (*f)(tid, "R15", vex->guest_R15); 221 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) 222 (*f)(tid, "GPR0" , vex->guest_GPR0 ); 223 (*f)(tid, "GPR1" , vex->guest_GPR1 ); 224 (*f)(tid, "GPR2" , vex->guest_GPR2 ); 225 (*f)(tid, "GPR3" , vex->guest_GPR3 ); 226 (*f)(tid, "GPR4" , vex->guest_GPR4 ); 227 (*f)(tid, "GPR5" , vex->guest_GPR5 ); 228 (*f)(tid, "GPR6" , vex->guest_GPR6 ); 229 (*f)(tid, "GPR7" , vex->guest_GPR7 ); 230 (*f)(tid, "GPR8" , vex->guest_GPR8 ); 231 (*f)(tid, "GPR9" , vex->guest_GPR9 ); 232 (*f)(tid, "GPR10", vex->guest_GPR10); 233 (*f)(tid, "GPR11", vex->guest_GPR11); 234 (*f)(tid, "GPR12", vex->guest_GPR12); 235 (*f)(tid, "GPR13", vex->guest_GPR13); 236 (*f)(tid, "GPR14", vex->guest_GPR14); 237 (*f)(tid, "GPR15", vex->guest_GPR15); 238 (*f)(tid, "GPR16", vex->guest_GPR16); 239 (*f)(tid, "GPR17", vex->guest_GPR17); 240 (*f)(tid, "GPR18", vex->guest_GPR18); 241 (*f)(tid, "GPR19", vex->guest_GPR19); 242 (*f)(tid, "GPR20", vex->guest_GPR20); 243 (*f)(tid, "GPR21", vex->guest_GPR21); 244 (*f)(tid, "GPR22", vex->guest_GPR22); 245 (*f)(tid, "GPR23", vex->guest_GPR23); 246 (*f)(tid, "GPR24", vex->guest_GPR24); 247 (*f)(tid, "GPR25", vex->guest_GPR25); 248 (*f)(tid, "GPR26", vex->guest_GPR26); 249 (*f)(tid, "GPR27", vex->guest_GPR27); 250 (*f)(tid, "GPR28", vex->guest_GPR28); 251 (*f)(tid, "GPR29", vex->guest_GPR29); 252 (*f)(tid, "GPR30", vex->guest_GPR30); 253 (*f)(tid, "GPR31", vex->guest_GPR31); 254 (*f)(tid, "CTR" , vex->guest_CTR ); 255 (*f)(tid, "LR" , vex->guest_LR ); 256 #elif defined(VGA_arm) 257 (*f)(tid, "R0" , vex->guest_R0 ); 258 (*f)(tid, "R1" , vex->guest_R1 ); 259 (*f)(tid, "R2" , vex->guest_R2 ); 260 (*f)(tid, "R3" , vex->guest_R3 ); 261 (*f)(tid, "R4" , vex->guest_R4 ); 262 (*f)(tid, "R5" , vex->guest_R5 ); 263 (*f)(tid, "R6" , vex->guest_R6 ); 264 (*f)(tid, "R8" , vex->guest_R8 ); 265 (*f)(tid, "R9" , vex->guest_R9 ); 266 (*f)(tid, "R10", vex->guest_R10); 267 (*f)(tid, "R11", vex->guest_R11); 268 (*f)(tid, "R12", vex->guest_R12); 269 (*f)(tid, "R13", vex->guest_R13); 270 (*f)(tid, "R14", vex->guest_R14); 271 #elif defined(VGA_s390x) 272 (*f)(tid, "r0" , vex->guest_r0 ); 273 (*f)(tid, "r1" , vex->guest_r1 ); 274 (*f)(tid, "r2" , vex->guest_r2 ); 275 (*f)(tid, "r3" , vex->guest_r3 ); 276 (*f)(tid, "r4" , vex->guest_r4 ); 277 (*f)(tid, "r5" , vex->guest_r5 ); 278 (*f)(tid, "r6" , vex->guest_r6 ); 279 (*f)(tid, "r7" , vex->guest_r7 ); 280 (*f)(tid, "r8" , vex->guest_r8 ); 281 (*f)(tid, "r9" , vex->guest_r9 ); 282 (*f)(tid, "r10", vex->guest_r10); 283 (*f)(tid, "r11", vex->guest_r11); 284 (*f)(tid, "r12", vex->guest_r12); 285 (*f)(tid, "r13", vex->guest_r13); 286 (*f)(tid, "r14", vex->guest_r14); 287 (*f)(tid, "r15", vex->guest_r15); 288 #elif defined(VGA_mips32) || defined(VGA_mips64) 289 (*f)(tid, "r0" , vex->guest_r0 ); 290 (*f)(tid, "r1" , vex->guest_r1 ); 291 (*f)(tid, "r2" , vex->guest_r2 ); 292 (*f)(tid, "r3" , vex->guest_r3 ); 293 (*f)(tid, "r4" , vex->guest_r4 ); 294 (*f)(tid, "r5" , vex->guest_r5 ); 295 (*f)(tid, "r6" , vex->guest_r6 ); 296 (*f)(tid, "r7" , vex->guest_r7 ); 297 (*f)(tid, "r8" , vex->guest_r8 ); 298 (*f)(tid, "r9" , vex->guest_r9 ); 299 (*f)(tid, "r10", vex->guest_r10); 300 (*f)(tid, "r11", vex->guest_r11); 301 (*f)(tid, "r12", vex->guest_r12); 302 (*f)(tid, "r13", vex->guest_r13); 303 (*f)(tid, "r14", vex->guest_r14); 304 (*f)(tid, "r15", vex->guest_r15); 305 (*f)(tid, "r16", vex->guest_r16); 306 (*f)(tid, "r17", vex->guest_r17); 307 (*f)(tid, "r18", vex->guest_r18); 308 (*f)(tid, "r19", vex->guest_r19); 309 (*f)(tid, "r20", vex->guest_r20); 310 (*f)(tid, "r21", vex->guest_r21); 311 (*f)(tid, "r22", vex->guest_r22); 312 (*f)(tid, "r23", vex->guest_r23); 313 (*f)(tid, "r24", vex->guest_r24); 314 (*f)(tid, "r25", vex->guest_r25); 315 (*f)(tid, "r26", vex->guest_r26); 316 (*f)(tid, "r27", vex->guest_r27); 317 (*f)(tid, "r28", vex->guest_r28); 318 (*f)(tid, "r29", vex->guest_r29); 319 (*f)(tid, "r30", vex->guest_r30); 320 (*f)(tid, "r31", vex->guest_r31); 321 #elif defined(VGA_arm64) 322 (*f)(tid, "x0" , vex->guest_X0 ); 323 (*f)(tid, "x1" , vex->guest_X1 ); 324 (*f)(tid, "x2" , vex->guest_X2 ); 325 (*f)(tid, "x3" , vex->guest_X3 ); 326 (*f)(tid, "x4" , vex->guest_X4 ); 327 (*f)(tid, "x5" , vex->guest_X5 ); 328 (*f)(tid, "x6" , vex->guest_X6 ); 329 (*f)(tid, "x7" , vex->guest_X7 ); 330 (*f)(tid, "x8" , vex->guest_X8 ); 331 (*f)(tid, "x9" , vex->guest_X9 ); 332 (*f)(tid, "x10", vex->guest_X10); 333 (*f)(tid, "x11", vex->guest_X11); 334 (*f)(tid, "x12", vex->guest_X12); 335 (*f)(tid, "x13", vex->guest_X13); 336 (*f)(tid, "x14", vex->guest_X14); 337 (*f)(tid, "x15", vex->guest_X15); 338 (*f)(tid, "x16", vex->guest_X16); 339 (*f)(tid, "x17", vex->guest_X17); 340 (*f)(tid, "x18", vex->guest_X18); 341 (*f)(tid, "x19", vex->guest_X19); 342 (*f)(tid, "x20", vex->guest_X20); 343 (*f)(tid, "x21", vex->guest_X21); 344 (*f)(tid, "x22", vex->guest_X22); 345 (*f)(tid, "x23", vex->guest_X23); 346 (*f)(tid, "x24", vex->guest_X24); 347 (*f)(tid, "x25", vex->guest_X25); 348 (*f)(tid, "x26", vex->guest_X26); 349 (*f)(tid, "x27", vex->guest_X27); 350 (*f)(tid, "x28", vex->guest_X28); 351 (*f)(tid, "x29", vex->guest_X29); 352 (*f)(tid, "x30", vex->guest_X30); 353 #elif defined(VGA_tilegx) 354 (*f)(tid, "r0", vex->guest_r0 ); 355 (*f)(tid, "r1", vex->guest_r1 ); 356 (*f)(tid, "r2", vex->guest_r2 ); 357 (*f)(tid, "r3", vex->guest_r3 ); 358 (*f)(tid, "r4", vex->guest_r4 ); 359 (*f)(tid, "r5", vex->guest_r5 ); 360 (*f)(tid, "r6", vex->guest_r6 ); 361 (*f)(tid, "r7", vex->guest_r7 ); 362 (*f)(tid, "r8", vex->guest_r8 ); 363 (*f)(tid, "r9", vex->guest_r9 ); 364 (*f)(tid, "r10", vex->guest_r10); 365 (*f)(tid, "r11", vex->guest_r11); 366 (*f)(tid, "r12", vex->guest_r12); 367 (*f)(tid, "r13", vex->guest_r13); 368 (*f)(tid, "r14", vex->guest_r14); 369 (*f)(tid, "r15", vex->guest_r15); 370 (*f)(tid, "r16", vex->guest_r16); 371 (*f)(tid, "r17", vex->guest_r17); 372 (*f)(tid, "r18", vex->guest_r18); 373 (*f)(tid, "r19", vex->guest_r19); 374 (*f)(tid, "r20", vex->guest_r20); 375 (*f)(tid, "r21", vex->guest_r21); 376 (*f)(tid, "r22", vex->guest_r22); 377 (*f)(tid, "r23", vex->guest_r23); 378 (*f)(tid, "r24", vex->guest_r24); 379 (*f)(tid, "r25", vex->guest_r25); 380 (*f)(tid, "r26", vex->guest_r26); 381 (*f)(tid, "r27", vex->guest_r27); 382 (*f)(tid, "r28", vex->guest_r28); 383 (*f)(tid, "r29", vex->guest_r29); 384 (*f)(tid, "r30", vex->guest_r30); 385 (*f)(tid, "r31", vex->guest_r31); 386 (*f)(tid, "r32", vex->guest_r32); 387 (*f)(tid, "r33", vex->guest_r33); 388 (*f)(tid, "r34", vex->guest_r34); 389 (*f)(tid, "r35", vex->guest_r35); 390 (*f)(tid, "r36", vex->guest_r36); 391 (*f)(tid, "r37", vex->guest_r37); 392 (*f)(tid, "r38", vex->guest_r38); 393 (*f)(tid, "r39", vex->guest_r39); 394 (*f)(tid, "r40", vex->guest_r40); 395 (*f)(tid, "r41", vex->guest_r41); 396 (*f)(tid, "r42", vex->guest_r42); 397 (*f)(tid, "r43", vex->guest_r43); 398 (*f)(tid, "r44", vex->guest_r44); 399 (*f)(tid, "r45", vex->guest_r45); 400 (*f)(tid, "r46", vex->guest_r46); 401 (*f)(tid, "r47", vex->guest_r47); 402 (*f)(tid, "r48", vex->guest_r48); 403 (*f)(tid, "r49", vex->guest_r49); 404 (*f)(tid, "r50", vex->guest_r50); 405 (*f)(tid, "r51", vex->guest_r51); 406 (*f)(tid, "r52", vex->guest_r52); 407 (*f)(tid, "r53", vex->guest_r53); 408 (*f)(tid, "r54", vex->guest_r54); 409 (*f)(tid, "r55", vex->guest_r55); 410 #else 411 # error Unknown arch 412 #endif 413 } 414 415 416 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord)) 417 { 418 ThreadId tid; 419 420 for (tid = 1; tid < VG_N_THREADS; tid++) { 421 if (VG_(is_valid_tid)(tid) 422 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) { 423 // live thread or thread instructed to die by another thread that 424 // called exit. 425 apply_to_GPs_of_tid(tid, f); 426 } 427 } 428 } 429 430 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid) 431 { 432 *tid = (ThreadId)(-1); 433 } 434 435 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid, 436 /*OUT*/Addr* stack_min, 437 /*OUT*/Addr* stack_max) 438 { 439 ThreadId i; 440 for (i = (*tid)+1; i < VG_N_THREADS; i++) { 441 if (i == VG_INVALID_THREADID) 442 continue; 443 if (VG_(threads)[i].status != VgTs_Empty) { 444 *tid = i; 445 *stack_min = VG_(get_SP)(i); 446 *stack_max = VG_(threads)[i].client_stack_highest_byte; 447 return True; 448 } 449 } 450 return False; 451 } 452 453 Addr VG_(thread_get_stack_max)(ThreadId tid) 454 { 455 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 456 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 457 return VG_(threads)[tid].client_stack_highest_byte; 458 } 459 460 SizeT VG_(thread_get_stack_size)(ThreadId tid) 461 { 462 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 463 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 464 return VG_(threads)[tid].client_stack_szB; 465 } 466 467 Addr VG_(thread_get_altstack_min)(ThreadId tid) 468 { 469 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 470 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 471 return (Addr)VG_(threads)[tid].altstack.ss_sp; 472 } 473 474 SizeT VG_(thread_get_altstack_size)(ThreadId tid) 475 { 476 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 477 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 478 return VG_(threads)[tid].altstack.ss_size; 479 } 480 481 //------------------------------------------------------------- 482 /* Details about the capabilities of the underlying (host) CPU. These 483 details are acquired by (1) enquiring with the CPU at startup, or 484 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache 485 line size). It's a bit nasty in the sense that there's no obvious 486 way to stop uses of some of this info before it's ready to go. 487 See pub_core_machine.h for more information about that. 488 489 VG_(machine_get_hwcaps) may use signals (although it attempts to 490 leave signal state unchanged) and therefore should only be 491 called before m_main sets up the client's signal state. 492 */ 493 494 /* --------- State --------- */ 495 static Bool hwcaps_done = False; 496 497 /* --- all archs --- */ 498 static VexArch va = VexArch_INVALID; 499 static VexArchInfo vai; 500 501 #if defined(VGA_x86) 502 UInt VG_(machine_x86_have_mxcsr) = 0; 503 #endif 504 #if defined(VGA_ppc32) 505 UInt VG_(machine_ppc32_has_FP) = 0; 506 UInt VG_(machine_ppc32_has_VMX) = 0; 507 #endif 508 #if defined(VGA_ppc64be) || defined(VGA_ppc64le) 509 ULong VG_(machine_ppc64_has_VMX) = 0; 510 #endif 511 #if defined(VGA_arm) 512 Int VG_(machine_arm_archlevel) = 4; 513 #endif 514 515 516 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL 517 testing, so we need a VG_MINIMAL_JMP_BUF. */ 518 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ 519 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) 520 #include "pub_core_libcsetjmp.h" 521 static VG_MINIMAL_JMP_BUF(env_unsup_insn); 522 static void handler_unsup_insn ( Int x ) { 523 VG_MINIMAL_LONGJMP(env_unsup_insn); 524 } 525 #endif 526 527 528 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc 529 * handlers are installed. Determines the sizes affected by dcbz 530 * and dcbzl instructions and updates the given VexArchInfo structure 531 * accordingly. 532 * 533 * Not very defensive: assumes that as long as the dcbz/dcbzl 534 * instructions don't raise a SIGILL, that they will zero an aligned, 535 * contiguous block of memory of a sensible size. */ 536 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) 537 static void find_ppc_dcbz_sz(VexArchInfo *arch_info) 538 { 539 Int dcbz_szB = 0; 540 Int dcbzl_szB; 541 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */ 542 char test_block[4*MAX_DCBZL_SZB]; 543 char *aligned = test_block; 544 Int i; 545 546 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */ 547 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1)); 548 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]); 549 550 /* dcbz often clears 32B, although sometimes whatever the native cache 551 * block size is */ 552 VG_(memset)(test_block, 0xff, sizeof(test_block)); 553 __asm__ __volatile__("dcbz 0,%0" 554 : /*out*/ 555 : "r" (aligned) /*in*/ 556 : "memory" /*clobber*/); 557 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) { 558 if (!test_block[i]) 559 ++dcbz_szB; 560 } 561 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128); 562 563 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */ 564 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 565 dcbzl_szB = 0; /* indicates unsupported */ 566 } 567 else { 568 VG_(memset)(test_block, 0xff, sizeof(test_block)); 569 /* some older assemblers won't understand the dcbzl instruction 570 * variant, so we directly emit the instruction ourselves */ 571 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/ 572 : /*out*/ 573 : "r" (aligned) /*in*/ 574 : "memory", "r9" /*clobber*/); 575 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) { 576 if (!test_block[i]) 577 ++dcbzl_szB; 578 } 579 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128); 580 } 581 582 arch_info->ppc_dcbz_szB = dcbz_szB; 583 arch_info->ppc_dcbzl_szB = dcbzl_szB; 584 585 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n", 586 dcbz_szB, dcbzl_szB); 587 # undef MAX_DCBZL_SZB 588 } 589 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */ 590 591 #ifdef VGA_s390x 592 593 /* Read /proc/cpuinfo. Look for lines like these 594 595 processor 0: version = FF, identification = 0117C9, machine = 2064 596 597 and return the machine model. If the machine model could not be determined 598 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */ 599 600 static UInt VG_(get_machine_model)(void) 601 { 602 static struct model_map { 603 const HChar name[5]; 604 UInt id; 605 } model_map[] = { 606 { "2064", VEX_S390X_MODEL_Z900 }, 607 { "2066", VEX_S390X_MODEL_Z800 }, 608 { "2084", VEX_S390X_MODEL_Z990 }, 609 { "2086", VEX_S390X_MODEL_Z890 }, 610 { "2094", VEX_S390X_MODEL_Z9_EC }, 611 { "2096", VEX_S390X_MODEL_Z9_BC }, 612 { "2097", VEX_S390X_MODEL_Z10_EC }, 613 { "2098", VEX_S390X_MODEL_Z10_BC }, 614 { "2817", VEX_S390X_MODEL_Z196 }, 615 { "2818", VEX_S390X_MODEL_Z114 }, 616 { "2827", VEX_S390X_MODEL_ZEC12 }, 617 { "2828", VEX_S390X_MODEL_ZBC12 }, 618 { "2964", VEX_S390X_MODEL_Z13 }, 619 }; 620 621 Int model, n, fh; 622 SysRes fd; 623 SizeT num_bytes, file_buf_size; 624 HChar *p, *m, *model_name, *file_buf; 625 626 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 627 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 628 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN; 629 630 fh = sr_Res(fd); 631 632 /* Determine the size of /proc/cpuinfo. 633 Work around broken-ness in /proc file system implementation. 634 fstat returns a zero size for /proc/cpuinfo although it is 635 claimed to be a regular file. */ 636 num_bytes = 0; 637 file_buf_size = 1000; 638 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 639 while (42) { 640 n = VG_(read)(fh, file_buf, file_buf_size); 641 if (n < 0) break; 642 643 num_bytes += n; 644 if (n < file_buf_size) break; /* reached EOF */ 645 } 646 647 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 648 649 if (num_bytes > file_buf_size) { 650 VG_(free)( file_buf ); 651 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 652 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 653 n = VG_(read)( fh, file_buf, num_bytes ); 654 if (n < 0) num_bytes = 0; 655 } 656 657 file_buf[num_bytes] = '\0'; 658 VG_(close)(fh); 659 660 /* Parse file */ 661 model = VEX_S390X_MODEL_UNKNOWN; 662 for (p = file_buf; *p; ++p) { 663 /* Beginning of line */ 664 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue; 665 666 m = VG_(strstr)( p, "machine" ); 667 if (m == NULL) continue; 668 669 p = m + sizeof "machine" - 1; 670 while ( VG_(isspace)( *p ) || *p == '=') { 671 if (*p == '\n') goto next_line; 672 ++p; 673 } 674 675 model_name = p; 676 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) { 677 struct model_map *mm = model_map + n; 678 SizeT len = VG_(strlen)( mm->name ); 679 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 && 680 VG_(isspace)( model_name[len] )) { 681 if (mm->id < model) model = mm->id; 682 p = model_name + len; 683 break; 684 } 685 } 686 /* Skip until end-of-line */ 687 while (*p != '\n') 688 ++p; 689 next_line: ; 690 } 691 692 VG_(free)( file_buf ); 693 VG_(debugLog)(1, "machine", "model = %s\n", 694 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN" 695 : model_map[model].name); 696 return model; 697 } 698 699 #endif /* VGA_s390x */ 700 701 #if defined(VGA_mips32) || defined(VGA_mips64) 702 703 /* Read /proc/cpuinfo and return the machine model. */ 704 static UInt VG_(get_machine_model)(void) 705 { 706 const char *search_MIPS_str = "MIPS"; 707 const char *search_Broadcom_str = "Broadcom"; 708 const char *search_Netlogic_str = "Netlogic"; 709 const char *search_Cavium_str= "Cavium"; 710 Int n, fh; 711 SysRes fd; 712 SizeT num_bytes, file_buf_size; 713 HChar *file_buf; 714 715 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 716 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 717 if ( sr_isError(fd) ) return -1; 718 719 fh = sr_Res(fd); 720 721 /* Determine the size of /proc/cpuinfo. 722 Work around broken-ness in /proc file system implementation. 723 fstat returns a zero size for /proc/cpuinfo although it is 724 claimed to be a regular file. */ 725 num_bytes = 0; 726 file_buf_size = 1000; 727 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 728 while (42) { 729 n = VG_(read)(fh, file_buf, file_buf_size); 730 if (n < 0) break; 731 732 num_bytes += n; 733 if (n < file_buf_size) break; /* reached EOF */ 734 } 735 736 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 737 738 if (num_bytes > file_buf_size) { 739 VG_(free)( file_buf ); 740 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 741 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 742 n = VG_(read)( fh, file_buf, num_bytes ); 743 if (n < 0) num_bytes = 0; 744 } 745 746 file_buf[num_bytes] = '\0'; 747 VG_(close)(fh); 748 749 /* Parse file */ 750 if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL) 751 return VEX_PRID_COMP_BROADCOM; 752 if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL) 753 return VEX_PRID_COMP_NETLOGIC; 754 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL) 755 return VEX_PRID_COMP_CAVIUM; 756 if (VG_(strstr) (file_buf, search_MIPS_str) != NULL) 757 return VEX_PRID_COMP_MIPS; 758 759 /* Did not find string in the proc file. */ 760 return -1; 761 } 762 763 #endif 764 765 /* Determine what insn set and insn set variant the host has, and 766 record it. To be called once at system startup. Returns False if 767 this a CPU incapable of running Valgrind. 768 Also determine information about the caches on this host. */ 769 770 Bool VG_(machine_get_hwcaps)( void ) 771 { 772 vg_assert(hwcaps_done == False); 773 hwcaps_done = True; 774 775 // Whack default settings into vai, so that we only need to fill in 776 // any interesting bits. 777 LibVEX_default_VexArchInfo(&vai); 778 779 #if defined(VGA_x86) 780 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext; 781 UInt eax, ebx, ecx, edx, max_extended; 782 HChar vstr[13]; 783 vstr[0] = 0; 784 785 if (!VG_(has_cpuid)()) 786 /* we can't do cpuid at all. Give up. */ 787 return False; 788 789 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 790 if (eax < 1) 791 /* we can't ask for cpuid(x) for x > 0. Give up. */ 792 return False; 793 794 /* Get processor ID string, and max basic/extended index 795 values. */ 796 VG_(memcpy)(&vstr[0], &ebx, 4); 797 VG_(memcpy)(&vstr[4], &edx, 4); 798 VG_(memcpy)(&vstr[8], &ecx, 4); 799 vstr[12] = 0; 800 801 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 802 max_extended = eax; 803 804 /* get capabilities bits into edx */ 805 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 806 807 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */ 808 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */ 809 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 810 811 /* cmpxchg8b is a minimum requirement now; if we don't have it we 812 must simply give up. But all CPUs since Pentium-I have it, so 813 that doesn't seem like much of a restriction. */ 814 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 815 if (!have_cx8) 816 return False; 817 818 /* Figure out if this is an AMD that can do MMXEXT. */ 819 have_mmxext = False; 820 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 821 && max_extended >= 0x80000001) { 822 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 823 /* Some older AMD processors support a sse1 subset (Integer SSE). */ 824 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0); 825 } 826 827 /* Figure out if this is an AMD or Intel that can do LZCNT. */ 828 have_lzcnt = False; 829 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD") 830 || 0 == VG_(strcmp)(vstr, "GenuineIntel")) 831 && max_extended >= 0x80000001) { 832 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 833 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 834 } 835 836 /* Intel processors don't define the mmxext extension, but since it 837 is just a sse1 subset always define it when we have sse1. */ 838 if (have_sse1) 839 have_mmxext = True; 840 841 va = VexArchX86; 842 vai.endness = VexEndnessLE; 843 844 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) { 845 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 846 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 847 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 848 vai.hwcaps |= VEX_HWCAPS_X86_SSE3; 849 if (have_lzcnt) 850 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 851 VG_(machine_x86_have_mxcsr) = 1; 852 } else if (have_sse2 && have_sse1 && have_mmxext) { 853 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 854 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 855 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 856 if (have_lzcnt) 857 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 858 VG_(machine_x86_have_mxcsr) = 1; 859 } else if (have_sse1 && have_mmxext) { 860 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 861 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 862 VG_(machine_x86_have_mxcsr) = 1; 863 } else if (have_mmxext) { 864 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/ 865 VG_(machine_x86_have_mxcsr) = 0; 866 } else { 867 vai.hwcaps = 0; /*baseline - no sse at all*/ 868 VG_(machine_x86_have_mxcsr) = 0; 869 } 870 871 VG_(machine_get_cache_info)(&vai); 872 873 return True; 874 } 875 876 #elif defined(VGA_amd64) 877 { Bool have_sse3, have_cx8, have_cx16; 878 Bool have_lzcnt, have_avx, have_bmi, have_avx2; 879 Bool have_rdtscp; 880 UInt eax, ebx, ecx, edx, max_basic, max_extended; 881 ULong xgetbv_0 = 0; 882 HChar vstr[13]; 883 vstr[0] = 0; 884 885 if (!VG_(has_cpuid)()) 886 /* we can't do cpuid at all. Give up. */ 887 return False; 888 889 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 890 max_basic = eax; 891 if (max_basic < 1) 892 /* we can't ask for cpuid(x) for x > 0. Give up. */ 893 return False; 894 895 /* Get processor ID string, and max basic/extended index 896 values. */ 897 VG_(memcpy)(&vstr[0], &ebx, 4); 898 VG_(memcpy)(&vstr[4], &edx, 4); 899 VG_(memcpy)(&vstr[8], &ecx, 4); 900 vstr[12] = 0; 901 902 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 903 max_extended = eax; 904 905 /* get capabilities bits into edx */ 906 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 907 908 // we assume that SSE1 and SSE2 are available by default 909 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 910 // ssse3 is ecx:9 911 // sse41 is ecx:19 912 // sse42 is ecx:20 913 914 // xsave is ecx:26 915 // osxsave is ecx:27 916 // avx is ecx:28 917 // fma is ecx:12 918 have_avx = False; 919 /* have_fma = False; */ 920 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) { 921 /* Processor supports AVX instructions and XGETBV is enabled 922 by OS and AVX instructions are enabled by the OS. */ 923 ULong w; 924 __asm__ __volatile__("movq $0,%%rcx ; " 925 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */ 926 "movq %%rax,%0" 927 :/*OUT*/"=r"(w) :/*IN*/ 928 :/*TRASH*/"rdx","rcx","rax"); 929 xgetbv_0 = w; 930 if ((xgetbv_0 & 7) == 7) { 931 /* Only say we have AVX if the XSAVE-allowable 932 bitfield-mask allows x87, SSE and AVX state. We could 933 actually run with a more restrictive XGETBV(0) value, 934 but VEX's implementation of XSAVE and XRSTOR assumes 935 that all 3 bits are enabled. 936 937 Also, the VEX implementation of XSAVE/XRSTOR assumes that 938 state component [2] (the YMM high halves) are located in 939 the XSAVE image at offsets 576 .. 831. So we have to 940 check that here before declaring AVX to be supported. */ 941 UInt eax2, ebx2, ecx2, edx2; 942 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2); 943 if (ebx2 == 576 && eax2 == 256) { 944 have_avx = True; 945 } 946 /* have_fma = (ecx & (1<<12)) != 0; */ 947 /* have_fma: Probably correct, but gcc complains due to 948 unusedness. */ 949 } 950 } 951 952 /* cmpxchg8b is a minimum requirement now; if we don't have it we 953 must simply give up. But all CPUs since Pentium-I have it, so 954 that doesn't seem like much of a restriction. */ 955 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 956 if (!have_cx8) 957 return False; 958 959 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */ 960 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */ 961 962 /* Figure out if this CPU can do LZCNT. */ 963 have_lzcnt = False; 964 if (max_extended >= 0x80000001) { 965 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 966 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 967 } 968 969 /* Can we do RDTSCP? */ 970 have_rdtscp = False; 971 if (max_extended >= 0x80000001) { 972 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 973 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */ 974 } 975 976 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */ 977 have_bmi = False; 978 have_avx2 = False; 979 if (have_avx && max_basic >= 7) { 980 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx); 981 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */ 982 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */ 983 } 984 985 va = VexArchAMD64; 986 vai.endness = VexEndnessLE; 987 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0) 988 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0) 989 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0) 990 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0) 991 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0) 992 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0) 993 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0); 994 995 VG_(machine_get_cache_info)(&vai); 996 997 return True; 998 } 999 1000 #elif defined(VGA_ppc32) 1001 { 1002 /* Find out which subset of the ppc32 instruction set is supported by 1003 verifying whether various ppc32 instructions generate a SIGILL 1004 or a SIGFPE. An alternative approach is to check the AT_HWCAP and 1005 AT_PLATFORM entries in the ELF auxiliary table -- see also 1006 the_iifii.client_auxv in m_main.c. 1007 */ 1008 vki_sigset_t saved_set, tmp_set; 1009 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1010 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1011 1012 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 1013 volatile Bool have_isa_2_07; 1014 Int r; 1015 1016 /* This is a kludge. Really we ought to back-convert saved_act 1017 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1018 since that's a no-op on all ppc32 platforms so far supported, 1019 it's not worth the typing effort. At least include most basic 1020 sanity check: */ 1021 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1022 1023 VG_(sigemptyset)(&tmp_set); 1024 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1025 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1026 1027 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1028 vg_assert(r == 0); 1029 1030 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1031 vg_assert(r == 0); 1032 tmp_sigill_act = saved_sigill_act; 1033 1034 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1035 vg_assert(r == 0); 1036 tmp_sigfpe_act = saved_sigfpe_act; 1037 1038 /* NODEFER: signal handler does not return (from the kernel's point of 1039 view), hence if it is to successfully catch a signal more than once, 1040 we need the NODEFER flag. */ 1041 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1042 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1043 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1044 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1045 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1046 vg_assert(r == 0); 1047 1048 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1049 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1050 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1051 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1052 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1053 vg_assert(r == 0); 1054 1055 /* standard FP insns */ 1056 have_F = True; 1057 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1058 have_F = False; 1059 } else { 1060 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */ 1061 } 1062 1063 /* Altivec insns */ 1064 have_V = True; 1065 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1066 have_V = False; 1067 } else { 1068 /* Unfortunately some older assemblers don't speak Altivec (or 1069 choose not to), so to be safe we directly emit the 32-bit 1070 word corresponding to "vor 0,0,0". This fixes a build 1071 problem that happens on Debian 3.1 (ppc32), and probably 1072 various other places. */ 1073 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 1074 } 1075 1076 /* General-Purpose optional (fsqrt, fsqrts) */ 1077 have_FX = True; 1078 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1079 have_FX = False; 1080 } else { 1081 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */ 1082 } 1083 1084 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 1085 have_GX = True; 1086 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1087 have_GX = False; 1088 } else { 1089 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */ 1090 } 1091 1092 /* VSX support implies Power ISA 2.06 */ 1093 have_VX = True; 1094 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1095 have_VX = False; 1096 } else { 1097 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1098 } 1099 1100 /* Check for Decimal Floating Point (DFP) support. */ 1101 have_DFP = True; 1102 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1103 have_DFP = False; 1104 } else { 1105 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1106 } 1107 1108 /* Check for ISA 2.07 support. */ 1109 have_isa_2_07 = True; 1110 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1111 have_isa_2_07 = False; 1112 } else { 1113 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */ 1114 } 1115 1116 /* determine dcbz/dcbzl sizes while we still have the signal 1117 * handlers registered */ 1118 find_ppc_dcbz_sz(&vai); 1119 1120 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1121 vg_assert(r == 0); 1122 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1123 vg_assert(r == 0); 1124 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1125 vg_assert(r == 0); 1126 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n", 1127 (Int)have_F, (Int)have_V, (Int)have_FX, 1128 (Int)have_GX, (Int)have_VX, (Int)have_DFP, 1129 (Int)have_isa_2_07); 1130 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */ 1131 if (have_V && !have_F) 1132 have_V = False; 1133 if (have_FX && !have_F) 1134 have_FX = False; 1135 if (have_GX && !have_F) 1136 have_GX = False; 1137 1138 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0; 1139 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0; 1140 1141 va = VexArchPPC32; 1142 vai.endness = VexEndnessBE; 1143 1144 vai.hwcaps = 0; 1145 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F; 1146 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V; 1147 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX; 1148 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX; 1149 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX; 1150 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP; 1151 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07; 1152 1153 VG_(machine_get_cache_info)(&vai); 1154 1155 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be 1156 called before we're ready to go. */ 1157 return True; 1158 } 1159 1160 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le) 1161 { 1162 /* Same instruction set detection algorithm as for ppc32. */ 1163 vki_sigset_t saved_set, tmp_set; 1164 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1165 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1166 1167 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 1168 volatile Bool have_isa_2_07; 1169 Int r; 1170 1171 /* This is a kludge. Really we ought to back-convert saved_act 1172 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1173 since that's a no-op on all ppc64 platforms so far supported, 1174 it's not worth the typing effort. At least include most basic 1175 sanity check: */ 1176 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1177 1178 VG_(sigemptyset)(&tmp_set); 1179 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1180 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1181 1182 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1183 vg_assert(r == 0); 1184 1185 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1186 vg_assert(r == 0); 1187 tmp_sigill_act = saved_sigill_act; 1188 1189 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1190 tmp_sigfpe_act = saved_sigfpe_act; 1191 1192 /* NODEFER: signal handler does not return (from the kernel's point of 1193 view), hence if it is to successfully catch a signal more than once, 1194 we need the NODEFER flag. */ 1195 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1196 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1197 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1198 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1199 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1200 1201 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1202 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1203 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1204 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1205 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1206 1207 /* standard FP insns */ 1208 have_F = True; 1209 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1210 have_F = False; 1211 } else { 1212 __asm__ __volatile__("fmr 0,0"); 1213 } 1214 1215 /* Altivec insns */ 1216 have_V = True; 1217 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1218 have_V = False; 1219 } else { 1220 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 1221 } 1222 1223 /* General-Purpose optional (fsqrt, fsqrts) */ 1224 have_FX = True; 1225 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1226 have_FX = False; 1227 } else { 1228 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/ 1229 } 1230 1231 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 1232 have_GX = True; 1233 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1234 have_GX = False; 1235 } else { 1236 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/ 1237 } 1238 1239 /* VSX support implies Power ISA 2.06 */ 1240 have_VX = True; 1241 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1242 have_VX = False; 1243 } else { 1244 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1245 } 1246 1247 /* Check for Decimal Floating Point (DFP) support. */ 1248 have_DFP = True; 1249 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1250 have_DFP = False; 1251 } else { 1252 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1253 } 1254 1255 /* Check for ISA 2.07 support. */ 1256 have_isa_2_07 = True; 1257 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1258 have_isa_2_07 = False; 1259 } else { 1260 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */ 1261 } 1262 1263 /* determine dcbz/dcbzl sizes while we still have the signal 1264 * handlers registered */ 1265 find_ppc_dcbz_sz(&vai); 1266 1267 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1268 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1269 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1270 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n", 1271 (Int)have_F, (Int)have_V, (Int)have_FX, 1272 (Int)have_GX, (Int)have_VX, (Int)have_DFP, 1273 (Int)have_isa_2_07); 1274 /* on ppc64be, if we don't even have FP, just give up. */ 1275 if (!have_F) 1276 return False; 1277 1278 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0; 1279 1280 va = VexArchPPC64; 1281 # if defined(VKI_LITTLE_ENDIAN) 1282 vai.endness = VexEndnessLE; 1283 # elif defined(VKI_BIG_ENDIAN) 1284 vai.endness = VexEndnessBE; 1285 # else 1286 vai.endness = VexEndness_INVALID; 1287 # endif 1288 1289 vai.hwcaps = 0; 1290 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V; 1291 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX; 1292 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX; 1293 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX; 1294 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP; 1295 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07; 1296 1297 VG_(machine_get_cache_info)(&vai); 1298 1299 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be 1300 called before we're ready to go. */ 1301 return True; 1302 } 1303 1304 #elif defined(VGA_s390x) 1305 1306 # include "libvex_s390x_common.h" 1307 1308 { 1309 /* Instruction set detection code borrowed from ppc above. */ 1310 vki_sigset_t saved_set, tmp_set; 1311 vki_sigaction_fromK_t saved_sigill_act; 1312 vki_sigaction_toK_t tmp_sigill_act; 1313 1314 volatile Bool have_LDISP, have_STFLE; 1315 Int i, r, model; 1316 1317 /* If the model is "unknown" don't treat this as an error. Assume 1318 this is a brand-new machine model for which we don't have the 1319 identification yet. Keeping fingers crossed. */ 1320 model = VG_(get_machine_model)(); 1321 1322 /* Unblock SIGILL and stash away the old action for that signal */ 1323 VG_(sigemptyset)(&tmp_set); 1324 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1325 1326 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1327 vg_assert(r == 0); 1328 1329 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1330 vg_assert(r == 0); 1331 tmp_sigill_act = saved_sigill_act; 1332 1333 /* NODEFER: signal handler does not return (from the kernel's point of 1334 view), hence if it is to successfully catch a signal more than once, 1335 we need the NODEFER flag. */ 1336 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1337 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1338 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1339 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1340 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1341 1342 /* Determine hwcaps. Note, we cannot use the stfle insn because it 1343 is not supported on z900. */ 1344 1345 have_LDISP = True; 1346 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1347 have_LDISP = False; 1348 } else { 1349 /* BASR loads the address of the next insn into r1. Needed to avoid 1350 a segfault in XY. */ 1351 __asm__ __volatile__("basr %%r1,%%r0\n\t" 1352 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */ 1353 ".short 0x0057" : : : "r0", "r1", "cc", "memory"); 1354 } 1355 1356 /* Check availability of STFLE. If available store facility bits 1357 in hoststfle. */ 1358 ULong hoststfle[S390_NUM_FACILITY_DW]; 1359 1360 for (i = 0; i < S390_NUM_FACILITY_DW; ++i) 1361 hoststfle[i] = 0; 1362 1363 have_STFLE = True; 1364 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1365 have_STFLE = False; 1366 } else { 1367 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1; 1368 1369 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */ 1370 : "=m" (hoststfle), "+d"(reg0) 1371 : : "cc", "memory"); 1372 } 1373 1374 /* Restore signals */ 1375 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1376 vg_assert(r == 0); 1377 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1378 vg_assert(r == 0); 1379 va = VexArchS390X; 1380 vai.endness = VexEndnessBE; 1381 1382 vai.hwcaps = model; 1383 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE; 1384 if (have_LDISP) { 1385 /* Use long displacement only on machines >= z990. For all other 1386 machines it is millicoded and therefore slow. */ 1387 if (model >= VEX_S390X_MODEL_Z990) 1388 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP; 1389 } 1390 1391 /* Detect presence of certain facilities using the STFLE insn. 1392 Note, that these facilities were introduced at the same time or later 1393 as STFLE, so the absence of STLFE implies the absence of the facility 1394 we're trying to detect. */ 1395 struct fac_hwcaps_map { 1396 UInt installed; 1397 UInt facility_bit; 1398 UInt hwcaps_bit; 1399 const HChar name[6]; // may need adjustment for new facility names 1400 } fac_hwcaps[] = { 1401 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" }, 1402 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" }, 1403 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" }, 1404 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" }, 1405 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" }, 1406 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" }, 1407 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" }, 1408 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" }, 1409 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" }, 1410 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" }, 1411 }; 1412 1413 /* Set hwcaps according to the detected facilities */ 1414 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) { 1415 vg_assert(fac_hwcaps[i].facility_bit <= 63); // for now 1416 if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) { 1417 fac_hwcaps[i].installed = True; 1418 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit; 1419 } 1420 } 1421 1422 /* Build up a string showing the probed-for facilities */ 1423 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) * 1424 (sizeof fac_hwcaps[0].name + 3) + // %s %d 1425 7 + 1 + 4 + 2 // machine %4d 1426 + 1]; // \0 1427 HChar *p = fac_str; 1428 p += VG_(sprintf)(p, "machine %4d ", model); 1429 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) { 1430 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name, 1431 fac_hwcaps[i].installed); 1432 } 1433 *p++ = '\0'; 1434 1435 VG_(debugLog)(1, "machine", "%s\n", fac_str); 1436 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1437 1438 VG_(machine_get_cache_info)(&vai); 1439 1440 return True; 1441 } 1442 1443 #elif defined(VGA_arm) 1444 { 1445 /* Same instruction set detection algorithm as for ppc32. */ 1446 vki_sigset_t saved_set, tmp_set; 1447 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1448 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1449 1450 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON; 1451 volatile Int archlevel; 1452 Int r; 1453 1454 /* This is a kludge. Really we ought to back-convert saved_act 1455 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1456 since that's a no-op on all ppc64 platforms so far supported, 1457 it's not worth the typing effort. At least include most basic 1458 sanity check: */ 1459 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1460 1461 VG_(sigemptyset)(&tmp_set); 1462 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1463 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1464 1465 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1466 vg_assert(r == 0); 1467 1468 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1469 vg_assert(r == 0); 1470 tmp_sigill_act = saved_sigill_act; 1471 1472 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1473 tmp_sigfpe_act = saved_sigfpe_act; 1474 1475 /* NODEFER: signal handler does not return (from the kernel's point of 1476 view), hence if it is to successfully catch a signal more than once, 1477 we need the NODEFER flag. */ 1478 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1479 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1480 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1481 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1482 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1483 1484 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1485 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1486 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1487 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1488 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1489 1490 /* VFP insns */ 1491 have_VFP = True; 1492 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1493 have_VFP = False; 1494 } else { 1495 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */ 1496 } 1497 /* There are several generation of VFP extension but they differs very 1498 little so for now we will not distinguish them. */ 1499 have_VFP2 = have_VFP; 1500 have_VFP3 = have_VFP; 1501 1502 /* NEON insns */ 1503 have_NEON = True; 1504 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1505 have_NEON = False; 1506 } else { 1507 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */ 1508 } 1509 1510 /* ARM architecture level */ 1511 archlevel = 5; /* v5 will be base level */ 1512 if (archlevel < 7) { 1513 archlevel = 7; 1514 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1515 archlevel = 5; 1516 } else { 1517 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */ 1518 } 1519 } 1520 if (archlevel < 6) { 1521 archlevel = 6; 1522 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1523 archlevel = 5; 1524 } else { 1525 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */ 1526 } 1527 } 1528 1529 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1530 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act); 1531 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1532 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1533 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1534 1535 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n", 1536 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3, 1537 (Int)have_NEON); 1538 1539 VG_(machine_arm_archlevel) = archlevel; 1540 1541 va = VexArchARM; 1542 vai.endness = VexEndnessLE; 1543 1544 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel); 1545 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3; 1546 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2; 1547 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP; 1548 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1549 1550 VG_(machine_get_cache_info)(&vai); 1551 1552 return True; 1553 } 1554 1555 #elif defined(VGA_arm64) 1556 { 1557 va = VexArchARM64; 1558 vai.endness = VexEndnessLE; 1559 1560 /* So far there are no variants. */ 1561 vai.hwcaps = 0; 1562 1563 VG_(machine_get_cache_info)(&vai); 1564 1565 /* 0 denotes 'not set'. The range of legitimate values here, 1566 after being set that is, is 2 though 17 inclusive. */ 1567 vg_assert(vai.arm64_dMinLine_lg2_szB == 0); 1568 vg_assert(vai.arm64_iMinLine_lg2_szB == 0); 1569 ULong ctr_el0; 1570 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0)); 1571 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2; 1572 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2; 1573 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, " 1574 "ctr_el0.iMinLine_szB = %d\n", 1575 1 << vai.arm64_dMinLine_lg2_szB, 1576 1 << vai.arm64_iMinLine_lg2_szB); 1577 1578 return True; 1579 } 1580 1581 #elif defined(VGA_mips32) 1582 { 1583 /* Define the position of F64 bit in FIR register. */ 1584 # define FP64 22 1585 va = VexArchMIPS32; 1586 UInt model = VG_(get_machine_model)(); 1587 if (model == -1) 1588 return False; 1589 1590 vai.hwcaps = model; 1591 1592 # if defined(VKI_LITTLE_ENDIAN) 1593 vai.endness = VexEndnessLE; 1594 # elif defined(VKI_BIG_ENDIAN) 1595 vai.endness = VexEndnessBE; 1596 # else 1597 vai.endness = VexEndness_INVALID; 1598 # endif 1599 1600 /* Same instruction set detection algorithm as for ppc32/arm... */ 1601 vki_sigset_t saved_set, tmp_set; 1602 vki_sigaction_fromK_t saved_sigill_act; 1603 vki_sigaction_toK_t tmp_sigill_act; 1604 1605 volatile Bool have_DSP, have_DSPr2; 1606 Int r; 1607 1608 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1609 1610 VG_(sigemptyset)(&tmp_set); 1611 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1612 1613 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1614 vg_assert(r == 0); 1615 1616 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1617 vg_assert(r == 0); 1618 tmp_sigill_act = saved_sigill_act; 1619 1620 /* NODEFER: signal handler does not return (from the kernel's point of 1621 view), hence if it is to successfully catch a signal more than once, 1622 we need the NODEFER flag. */ 1623 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1624 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1625 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1626 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1627 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1628 1629 if (model == VEX_PRID_COMP_MIPS) { 1630 /* DSPr2 instructions. */ 1631 have_DSPr2 = True; 1632 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1633 have_DSPr2 = False; 1634 } else { 1635 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */ 1636 } 1637 if (have_DSPr2) { 1638 /* We assume it's 74K, since it can run DSPr2. */ 1639 vai.hwcaps |= VEX_PRID_IMP_74K; 1640 } else { 1641 /* DSP instructions. */ 1642 have_DSP = True; 1643 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1644 have_DSP = False; 1645 } else { 1646 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */ 1647 } 1648 if (have_DSP) { 1649 /* We assume it's 34K, since it has support for DSP. */ 1650 vai.hwcaps |= VEX_PRID_IMP_34K; 1651 } 1652 } 1653 } 1654 1655 /* Check if CPU has FPU and 32 dbl. prec. FP registers */ 1656 int FIR = 0; 1657 __asm__ __volatile__( 1658 "cfc1 %0, $0" "\n\t" 1659 : "=r" (FIR) 1660 ); 1661 if (FIR & (1 << FP64)) { 1662 vai.hwcaps |= VEX_PRID_CPU_32FPR; 1663 } 1664 1665 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1666 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1667 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1668 1669 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1670 VG_(machine_get_cache_info)(&vai); 1671 1672 return True; 1673 } 1674 1675 #elif defined(VGA_mips64) 1676 { 1677 va = VexArchMIPS64; 1678 UInt model = VG_(get_machine_model)(); 1679 if (model == -1) 1680 return False; 1681 1682 vai.hwcaps = model; 1683 1684 # if defined(VKI_LITTLE_ENDIAN) 1685 vai.endness = VexEndnessLE; 1686 # elif defined(VKI_BIG_ENDIAN) 1687 vai.endness = VexEndnessBE; 1688 # else 1689 vai.endness = VexEndness_INVALID; 1690 # endif 1691 1692 VG_(machine_get_cache_info)(&vai); 1693 1694 return True; 1695 } 1696 1697 #elif defined(VGA_tilegx) 1698 { 1699 va = VexArchTILEGX; 1700 vai.hwcaps = VEX_HWCAPS_TILEGX_BASE; 1701 vai.endness = VexEndnessLE; 1702 1703 VG_(machine_get_cache_info)(&vai); 1704 1705 return True; 1706 } 1707 1708 #else 1709 # error "Unknown arch" 1710 #endif 1711 } 1712 1713 /* Notify host cpu instruction cache line size. */ 1714 #if defined(VGA_ppc32) 1715 void VG_(machine_ppc32_set_clszB)( Int szB ) 1716 { 1717 vg_assert(hwcaps_done); 1718 1719 /* Either the value must not have been set yet (zero) or we can 1720 tolerate it being set to the same value multiple times, as the 1721 stack scanning logic in m_main is a bit stupid. */ 1722 vg_assert(vai.ppc_icache_line_szB == 0 1723 || vai.ppc_icache_line_szB == szB); 1724 1725 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128); 1726 vai.ppc_icache_line_szB = szB; 1727 } 1728 #endif 1729 1730 1731 /* Notify host cpu instruction cache line size. */ 1732 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le) 1733 void VG_(machine_ppc64_set_clszB)( Int szB ) 1734 { 1735 vg_assert(hwcaps_done); 1736 1737 /* Either the value must not have been set yet (zero) or we can 1738 tolerate it being set to the same value multiple times, as the 1739 stack scanning logic in m_main is a bit stupid. */ 1740 vg_assert(vai.ppc_icache_line_szB == 0 1741 || vai.ppc_icache_line_szB == szB); 1742 1743 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128); 1744 vai.ppc_icache_line_szB = szB; 1745 } 1746 #endif 1747 1748 1749 /* Notify host's ability to handle NEON instructions. */ 1750 #if defined(VGA_arm) 1751 void VG_(machine_arm_set_has_NEON)( Bool has_neon ) 1752 { 1753 vg_assert(hwcaps_done); 1754 /* There's nothing else we can sanity check. */ 1755 1756 if (has_neon) { 1757 vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1758 } else { 1759 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON; 1760 } 1761 } 1762 #endif 1763 1764 1765 /* Fetch host cpu info, once established. */ 1766 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa, 1767 /*OUT*/VexArchInfo* pVai ) 1768 { 1769 vg_assert(hwcaps_done); 1770 if (pVa) *pVa = va; 1771 if (pVai) *pVai = vai; 1772 } 1773 1774 1775 /* Returns the size of the largest guest register that we will 1776 simulate in this run. This depends on both the guest architecture 1777 and on the specific capabilities we are simulating for that guest 1778 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16 1779 or 32. General rule: if in doubt, return a value larger than 1780 reality. 1781 1782 This information is needed by Cachegrind and Callgrind to decide 1783 what the minimum cache line size they are prepared to simulate is. 1784 Basically require that the minimum cache line size is at least as 1785 large as the largest register that might get transferred to/from 1786 memory, so as to guarantee that any such transaction can straddle 1787 at most 2 cache lines. 1788 */ 1789 Int VG_(machine_get_size_of_largest_guest_register) ( void ) 1790 { 1791 vg_assert(hwcaps_done); 1792 /* Once hwcaps_done is True, we can fish around inside va/vai to 1793 find the information we need. */ 1794 1795 # if defined(VGA_x86) 1796 vg_assert(va == VexArchX86); 1797 /* We don't support AVX, so 32 is out. At the other end, even if 1798 we don't support any SSE, the X87 can generate 10 byte 1799 transfers, so let's say 16 to be on the safe side. Hence the 1800 answer is always 16. */ 1801 return 16; 1802 1803 # elif defined(VGA_amd64) 1804 /* if AVX then 32 else 16 */ 1805 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16; 1806 1807 # elif defined(VGA_ppc32) 1808 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1809 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16; 1810 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16; 1811 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16; 1812 return 8; 1813 1814 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le) 1815 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1816 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16; 1817 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16; 1818 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16; 1819 return 8; 1820 1821 # elif defined(VGA_s390x) 1822 return 8; 1823 1824 # elif defined(VGA_arm) 1825 /* Really it depends whether or not we have NEON, but let's just 1826 assume we always do. */ 1827 return 16; 1828 1829 # elif defined(VGA_arm64) 1830 /* ARM64 always has Neon, AFAICS. */ 1831 return 16; 1832 1833 # elif defined(VGA_mips32) 1834 /* The guest state implies 4, but that can't really be true, can 1835 it? */ 1836 return 8; 1837 1838 # elif defined(VGA_mips64) 1839 return 8; 1840 1841 # elif defined(VGA_tilegx) 1842 return 8; 1843 1844 # else 1845 # error "Unknown arch" 1846 # endif 1847 } 1848 1849 1850 // Given a pointer to a function as obtained by "& functionname" in C, 1851 // produce a pointer to the actual entry point for the function. 1852 void* VG_(fnptr_to_fnentry)( void* f ) 1853 { 1854 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \ 1855 || defined(VGP_arm_linux) || defined(VGO_darwin) \ 1856 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \ 1857 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \ 1858 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \ 1859 || defined(VGP_tilegx_linux) || defined(VGP_x86_solaris) \ 1860 || defined(VGP_amd64_solaris) 1861 return f; 1862 # elif defined(VGP_ppc64be_linux) 1863 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a 1864 3-word function descriptor, of which the first word is the entry 1865 address. */ 1866 UWord* descr = (UWord*)f; 1867 return (void*)(descr[0]); 1868 # else 1869 # error "Unknown platform" 1870 # endif 1871 } 1872 1873 /*--------------------------------------------------------------------*/ 1874 /*--- end ---*/ 1875 /*--------------------------------------------------------------------*/ 1876