1 2 /*--------------------------------------------------------------------*/ 3 /*--- The core dispatch loop, for jumping to a code address. ---*/ 4 /*--- dispatch-ppc64-linux.S ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2005-2012 Cerion Armour-Brown <cerion (at) open-works.co.uk> 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 #if defined(VGP_ppc64_linux) 32 33 #include "pub_core_basics_asm.h" 34 #include "pub_core_dispatch_asm.h" 35 #include "pub_core_transtab_asm.h" 36 #include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */ 37 38 39 /* References to globals via the TOC */ 40 41 /* 42 .globl vgPlain_tt_fast 43 .lcomm vgPlain_tt_fast,4,4 44 .type vgPlain_tt_fast, @object 45 */ 46 .section ".toc","aw" 47 .tocent__vgPlain_tt_fast: 48 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast 49 .tocent__vgPlain_stats__n_xindirs_32: 50 .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32 51 .tocent__vgPlain_stats__n_xindir_misses_32: 52 .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32 53 .tocent__vgPlain_machine_ppc64_has_VMX: 54 .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX 55 56 /*------------------------------------------------------------*/ 57 /*--- ---*/ 58 /*--- The dispatch loop. VG_(disp_run_translations) is ---*/ 59 /*--- used to run all translations, ---*/ 60 /*--- including no-redir ones. ---*/ 61 /*--- ---*/ 62 /*------------------------------------------------------------*/ 63 64 /*----------------------------------------------------*/ 65 /*--- Entry and preamble (set everything up) ---*/ 66 /*----------------------------------------------------*/ 67 68 /* signature: 69 void VG_(disp_run_translations)( UWord* two_words, 70 void* guest_state, 71 Addr host_addr ); 72 */ 73 74 .section ".text" 75 .align 2 76 .globl VG_(disp_run_translations) 77 .section ".opd","aw" 78 .align 3 79 VG_(disp_run_translations): 80 .quad .VG_(disp_run_translations),.TOC.@tocbase,0 81 .previous 82 .type .VG_(disp_run_translations),@function 83 .globl .VG_(disp_run_translations) 84 .VG_(disp_run_translations): 85 /* r3 holds two_words */ 86 /* r4 holds guest_state */ 87 /* r5 holds host_addr */ 88 89 /* ----- entry point to VG_(disp_run_translations) ----- */ 90 /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */ 91 92 /* Save lr, cr */ 93 mflr 6 94 std 6,16(1) 95 mfcr 6 96 std 6,8(1) 97 98 /* New stack frame */ 99 stdu 1,-624(1) /* sp should maintain 16-byte alignment */ 100 101 /* General reg save area : 152 bytes */ 102 std 31,472(1) 103 std 30,464(1) 104 std 29,456(1) 105 std 28,448(1) 106 std 27,440(1) 107 std 26,432(1) 108 std 25,424(1) 109 std 24,416(1) 110 std 23,408(1) 111 std 22,400(1) 112 std 21,392(1) 113 std 20,384(1) 114 std 19,376(1) 115 std 18,368(1) 116 std 17,360(1) 117 std 16,352(1) 118 std 15,344(1) 119 std 14,336(1) 120 std 13,328(1) 121 std 3,104(1) /* save two_words for later */ 122 123 /* Save callee-saved registers... */ 124 /* Floating-point reg save area : 144 bytes */ 125 stfd 31,616(1) 126 stfd 30,608(1) 127 stfd 29,600(1) 128 stfd 28,592(1) 129 stfd 27,584(1) 130 stfd 26,576(1) 131 stfd 25,568(1) 132 stfd 24,560(1) 133 stfd 23,552(1) 134 stfd 22,544(1) 135 stfd 21,536(1) 136 stfd 20,528(1) 137 stfd 19,520(1) 138 stfd 18,512(1) 139 stfd 17,504(1) 140 stfd 16,496(1) 141 stfd 15,488(1) 142 stfd 14,480(1) 143 144 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI. 145 The Linux kernel might not actually use VRSAVE for its intended 146 purpose, but it should be harmless to preserve anyway. */ 147 /* r3, r4, r5 are live here, so use r6 */ 148 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 149 ld 6,0(6) 150 cmpldi 6,0 151 beq .LafterVMX1 152 153 /* VRSAVE save word : 32 bytes */ 154 mfspr 6,256 /* vrsave reg is spr number 256 */ 155 stw 6,324(1) 156 157 /* Alignment padding : 4 bytes */ 158 159 /* Vector reg save area (quadword aligned) : 192 bytes */ 160 li 6,304 161 stvx 31,6,1 162 li 6,288 163 stvx 30,6,1 164 li 6,272 165 stvx 29,6,1 166 li 6,256 167 stvx 28,6,1 168 li 6,240 169 stvx 27,6,1 170 li 6,224 171 stvx 26,6,1 172 li 6,208 173 stvx 25,6,1 174 li 6,192 175 stvx 24,6,1 176 li 6,176 177 stvx 23,6,1 178 li 6,160 179 stvx 22,6,1 180 li 6,144 181 stvx 21,6,1 182 li 6,128 183 stvx 20,6,1 184 .LafterVMX1: 185 186 /* Local variable space... */ 187 188 /* r3 holds two_words */ 189 /* r4 holds guest_state */ 190 /* r5 holds host_addr */ 191 192 /* 96(sp) used later to check FPSCR[RM] */ 193 /* 88(sp) used later to load fpscr with zero */ 194 /* 48:87(sp) free */ 195 196 /* Linkage Area (reserved) 197 40(sp) : TOC 198 32(sp) : link editor doubleword 199 24(sp) : compiler doubleword 200 16(sp) : LR 201 8(sp) : CR 202 0(sp) : back-chain 203 */ 204 205 /* set host FPU control word to the default mode expected 206 by VEX-generated code. See comments in libvex.h for 207 more info. */ 208 /* => get zero into f3 (tedious) 209 fsub 3,3,3 is not a reliable way to do this, since if 210 f3 holds a NaN or similar then we don't necessarily 211 wind up with zero. */ 212 li 6,0 213 stw 6,88(1) 214 lfs 3,88(1) 215 mtfsf 0xFF,3 /* fpscr = lo32 of f3 */ 216 217 /* set host AltiVec control word to the default mode expected 218 by VEX-generated code. */ 219 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 220 ld 6,0(6) 221 cmpldi 6,0 222 beq .LafterVMX2 223 224 vspltisw 3,0x0 /* generate zero */ 225 mtvscr 3 226 .LafterVMX2: 227 228 /* make a stack frame for the code we are calling */ 229 stdu 1,-48(1) 230 231 /* Set up the guest state ptr */ 232 mr 31,4 /* r31 (generated code gsp) = r4 */ 233 234 /* and jump into the code cache. Chained translations in 235 the code cache run, until for whatever reason, they can't 236 continue. When that happens, the translation in question 237 will jump (or call) to one of the continuation points 238 VG_(cp_...) below. */ 239 mtctr 5 240 bctr 241 /*NOTREACHED*/ 242 243 /*----------------------------------------------------*/ 244 /*--- Postamble and exit. ---*/ 245 /*----------------------------------------------------*/ 246 247 .postamble: 248 /* At this point, r6 and r7 contain two 249 words to be returned to the caller. r6 250 holds a TRC value, and r7 optionally may 251 hold another word (for CHAIN_ME exits, the 252 address of the place to patch.) */ 253 254 /* undo the "make a stack frame for the code we are calling" */ 255 addi 1,1,48 256 257 /* We're leaving. Check that nobody messed with 258 VSCR or FPSCR in ways we don't expect. */ 259 /* Using r11 - value used again further on, so don't trash! */ 260 ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2) 261 ld 11,0(11) 262 263 /* Set fpscr back to a known state, since vex-generated code 264 may have messed with fpscr[rm]. */ 265 li 5,0 266 addi 1,1,-16 267 stw 5,0(1) 268 lfs 3,0(1) 269 addi 1,1,16 270 mtfsf 0xFF,3 /* fpscr = f3 */ 271 272 cmpldi 11,0 /* Do we have altivec? */ 273 beq .LafterVMX8 274 275 /* Check VSCR[NJ] == 1 */ 276 /* first generate 4x 0x00010000 */ 277 vspltisw 4,0x1 /* 4x 0x00000001 */ 278 vspltisw 5,0x0 /* zero */ 279 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */ 280 /* retrieve VSCR and mask wanted bits */ 281 mfvscr 7 282 vand 7,7,6 /* gives NJ flag */ 283 vspltw 7,7,0x3 /* flags-word to all lanes */ 284 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */ 285 bt 24,.invariant_violation /* branch if all_equal */ 286 287 .LafterVMX8: 288 /* otherwise we're OK */ 289 b .remove_frame 290 291 .invariant_violation: 292 li 6,VG_TRC_INVARIANT_FAILED 293 li 7,0 294 /* fall through */ 295 296 .remove_frame: 297 /* r11 already holds VG_(machine_ppc32_has_VMX) value */ 298 cmplwi 11,0 299 beq .LafterVMX9 300 301 /* Restore Altivec regs. 302 Use r5 as scratch since r6/r7 are live. */ 303 /* VRSAVE */ 304 lwz 5,324(1) 305 mfspr 5,256 /* VRSAVE reg is spr number 256 */ 306 307 /* Vector regs */ 308 li 5,304 309 lvx 31,5,1 310 li 5,288 311 lvx 30,5,1 312 li 5,272 313 lvx 29,5,1 314 li 5,256 315 lvx 28,5,1 316 li 5,240 317 lvx 27,5,1 318 li 5,224 319 lvx 26,5,1 320 li 5,208 321 lvx 25,5,1 322 li 5,192 323 lvx 24,5,1 324 li 5,176 325 lvx 23,5,1 326 li 5,160 327 lvx 22,5,1 328 li 5,144 329 lvx 21,5,1 330 li 5,128 331 lvx 20,5,1 332 .LafterVMX9: 333 334 /* Restore FP regs */ 335 /* Floating-point regs */ 336 lfd 31,616(1) 337 lfd 30,608(1) 338 lfd 29,600(1) 339 lfd 28,592(1) 340 lfd 27,584(1) 341 lfd 26,576(1) 342 lfd 25,568(1) 343 lfd 24,560(1) 344 lfd 23,552(1) 345 lfd 22,544(1) 346 lfd 21,536(1) 347 lfd 20,528(1) 348 lfd 19,520(1) 349 lfd 18,512(1) 350 lfd 17,504(1) 351 lfd 16,496(1) 352 lfd 15,488(1) 353 lfd 14,480(1) 354 355 /* restore int regs, including importantly r3 (two_words) */ 356 ld 31,472(1) 357 ld 30,464(1) 358 ld 29,456(1) 359 ld 28,448(1) 360 ld 27,440(1) 361 ld 26,432(1) 362 ld 25,424(1) 363 ld 24,416(1) 364 ld 23,408(1) 365 ld 22,400(1) 366 ld 21,392(1) 367 ld 20,384(1) 368 ld 19,376(1) 369 ld 18,368(1) 370 ld 17,360(1) 371 ld 16,352(1) 372 ld 15,344(1) 373 ld 14,336(1) 374 ld 13,328(1) 375 ld 3,104(1) 376 /* Stash return values */ 377 std 6,0(3) 378 std 7,8(3) 379 380 /* restore lr & sp, and leave */ 381 ld 0,632(1) /* stack_size + 8 */ 382 mtcr 0 383 ld 0,640(1) /* stack_size + 16 */ 384 mtlr 0 385 addi 1,1,624 /* stack_size */ 386 blr 387 388 389 /*----------------------------------------------------*/ 390 /*--- Continuation points ---*/ 391 /*----------------------------------------------------*/ 392 393 /* ------ Chain me to slow entry point ------ */ 394 .section ".text" 395 .align 2 396 .globl VG_(disp_cp_chain_me_to_slowEP) 397 .section ".opd","aw" 398 .align 3 399 VG_(disp_cp_chain_me_to_slowEP): 400 .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0 401 .previous 402 .type .VG_(disp_cp_chain_me_to_slowEP),@function 403 .globl .VG_(disp_cp_chain_me_to_slowEP) 404 .VG_(disp_cp_chain_me_to_slowEP): 405 /* We got called. The return address indicates 406 where the patching needs to happen. Collect 407 the return address and, exit back to C land, 408 handing the caller the pair (Chain_me_S, RA) */ 409 li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP 410 mflr 7 411 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP 412 4 = mtctr r30 413 4 = btctr 414 */ 415 subi 7,7,20+4+4 416 b .postamble 417 418 /* ------ Chain me to fast entry point ------ */ 419 .section ".text" 420 .align 2 421 .globl VG_(disp_cp_chain_me_to_fastEP) 422 .section ".opd","aw" 423 .align 3 424 VG_(disp_cp_chain_me_to_fastEP): 425 .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0 426 .previous 427 .type .VG_(disp_cp_chain_me_to_fastEP),@function 428 .globl .VG_(disp_cp_chain_me_to_fastEP) 429 .VG_(disp_cp_chain_me_to_fastEP): 430 /* We got called. The return address indicates 431 where the patching needs to happen. Collect 432 the return address and, exit back to C land, 433 handing the caller the pair (Chain_me_S, RA) */ 434 li 6, VG_TRC_CHAIN_ME_TO_FAST_EP 435 mflr 7 436 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP 437 4 = mtctr r30 438 4 = btctr 439 */ 440 subi 7,7,20+4+4 441 b .postamble 442 443 /* ------ Indirect but boring jump ------ */ 444 .section ".text" 445 .align 2 446 .globl VG_(disp_cp_xindir) 447 .section ".opd","aw" 448 .align 3 449 VG_(disp_cp_xindir): 450 .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0 451 .previous 452 .type .VG_(disp_cp_xindir),@function 453 .globl .VG_(disp_cp_xindir) 454 .VG_(disp_cp_xindir): 455 /* Where are we going? */ 456 ld 3,OFFSET_ppc64_CIA(31) 457 458 /* stats only */ 459 ld 5, .tocent__vgPlain_stats__n_xindirs_32@toc(2) 460 lwz 6,0(5) 461 addi 6,6,1 462 stw 6,0(5) 463 464 /* r5 = &VG_(tt_fast) */ 465 ld 5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */ 466 467 /* try a fast lookup in the translation cache */ 468 /* r4 = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry) 469 = ((r3 >>u 2) & VG_TT_FAST_MASK) << 4 */ 470 rldicl 4,3, 62, 64-VG_TT_FAST_BITS /* entry# */ 471 sldi 4,4,4 /* entry# * sizeof(FastCacheEntry) */ 472 add 5,5,4 /* & VG_(tt_fast)[entry#] */ 473 ld 6,0(5) /* .guest */ 474 ld 7,8(5) /* .host */ 475 cmpd 3,6 476 bne .fast_lookup_failed 477 478 /* Found a match. Jump to .host. */ 479 mtctr 7 480 bctr 481 482 .fast_lookup_failed: 483 /* stats only */ 484 ld 5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2) 485 lwz 6,0(5) 486 addi 6,6,1 487 stw 6,0(5) 488 489 li 6,VG_TRC_INNER_FASTMISS 490 li 7,0 491 b .postamble 492 /*NOTREACHED*/ 493 494 /* ------ Assisted jump ------ */ 495 .section ".text" 496 .align 2 497 .globl VG_(disp_cp_xassisted) 498 .section ".opd","aw" 499 .align 3 500 VG_(disp_cp_xassisted): 501 .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0 502 .previous 503 .type .VG_(disp_cp_xassisted),@function 504 .globl .VG_(disp_cp_xassisted) 505 .VG_(disp_cp_xassisted): 506 /* r31 contains the TRC */ 507 mr 6,31 508 li 7,0 509 b .postamble 510 511 /* ------ Event check failed ------ */ 512 .section ".text" 513 .align 2 514 .globl VG_(disp_cp_evcheck_fail) 515 .section ".opd","aw" 516 .align 3 517 VG_(disp_cp_evcheck_fail): 518 .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0 519 .previous 520 .type .VG_(disp_cp_evcheck_fail),@function 521 .globl .VG_(disp_cp_evcheck_fail) 522 .VG_(disp_cp_evcheck_fail): 523 li 6,VG_TRC_INNER_COUNTERZERO 524 li 7,0 525 b .postamble 526 527 528 .size .VG_(disp_run_translations), .-.VG_(disp_run_translations) 529 530 /* Let the linker know we don't need an executable stack */ 531 .section .note.GNU-stack,"",@progbits 532 533 #endif // defined(VGP_ppc64_linux) 534 535 /*--------------------------------------------------------------------*/ 536 /*--- end ---*/ 537 /*--------------------------------------------------------------------*/ 538