1 2 /* HOW TO USE 3 4 13 Dec '05 - Linker no longer used (apart from mymalloc) 5 Simply compile and link switchback.c with test_xxx.c, 6 e.g. for ppc64: 7 $ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c 8 9 Test file test_xxx.c must have an entry point called "entry", 10 which expects to take a single argument which is a function pointer 11 (to "serviceFn"). 12 13 Test file may not reference any other symbols. 14 15 NOTE: POWERPC: it is critical, when using this on ppc, to set 16 CacheLineSize to the right value. Values we currently know of: 17 18 imac (G3): 32 19 G5 (ppc970): 128 20 */ 21 22 #include <stdio.h> 23 #include <assert.h> 24 #include <stdlib.h> 25 #include <sys/types.h> 26 #include <sys/stat.h> 27 #include <unistd.h> 28 29 #include "../pub/libvex_basictypes.h" 30 #include "../pub/libvex_guest_x86.h" 31 #include "../pub/libvex_guest_amd64.h" 32 #include "../pub/libvex_guest_ppc32.h" 33 #include "../pub/libvex_guest_ppc64.h" 34 #include "../pub/libvex.h" 35 #include "../pub/libvex_trc_values.h" 36 #include "linker.h" 37 38 static ULong n_bbs_done = 0; 39 static Int n_translations_made = 0; 40 41 42 #if defined(__i386__) 43 # define VexGuestState VexGuestX86State 44 # define LibVEX_Guest_initialise LibVEX_GuestX86_initialise 45 # define VexArch VexArchX86 46 # define VexSubArch VexSubArchX86_sse1 47 # define GuestPC guest_EIP 48 # define CacheLineSize 0/*irrelevant*/ 49 #elif defined(__x86_64__) 50 # define VexGuestState VexGuestAMD64State 51 # define LibVEX_Guest_initialise LibVEX_GuestAMD64_initialise 52 # define VexArch VexArchAMD64 53 # define VexSubArch VexSubArch_NONE 54 # define GuestPC guest_RIP 55 # define CacheLineSize 0/*irrelevant*/ 56 #elif defined(__powerpc__) 57 58 #if !defined(__powerpc64__) // ppc32 59 # define VexGuestState VexGuestPPC32State 60 # define LibVEX_Guest_initialise LibVEX_GuestPPC32_initialise 61 # define VexArch VexArchPPC32 62 # define VexSubArch VexSubArchPPC32_FI 63 # define GuestPC guest_CIA 64 # define CacheLineSize 128 65 #else 66 # define VexGuestState VexGuestPPC64State 67 # define LibVEX_Guest_initialise LibVEX_GuestPPC64_initialise 68 # define VexArch VexArchPPC64 69 # define VexSubArch VexSubArchPPC64_FI 70 # define GuestPC guest_CIA 71 # define CacheLineSize 128 72 #endif 73 74 #else 75 # error "Unknown arch" 76 #endif 77 78 /* 7: show conversion into IR */ 79 /* 6: show after initial opt */ 80 /* 5: show after instrumentation */ 81 /* 4: show after second opt */ 82 /* 3: show after tree building */ 83 /* 2: show selected insns */ 84 /* 1: show after reg-alloc */ 85 /* 0: show final assembly */ 86 #define TEST_FLAGS (1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0) 87 #define DEBUG_TRACE_FLAGS 0//(1<<7)|(0<<6)|(0<<5)|(0<<4)|(1<<3)|(1<<2)|(1<<1)|(1<<0) 88 89 90 /* guest state */ 91 UInt gstack[50000]; 92 VexGuestState gst; 93 VexControl vcon; 94 95 /* only used for the switchback transition */ 96 /* i386: helper1 = &gst, helper2 = %EFLAGS */ 97 /* amd64: helper1 = &gst, helper2 = %EFLAGS */ 98 /* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */ 99 HWord sb_helper1 = 0; 100 HWord sb_helper2 = 0; 101 HWord sb_helper3 = 0; 102 103 /* translation cache */ 104 #define N_TRANS_CACHE 1000000 105 #define N_TRANS_TABLE 10000 106 107 ULong trans_cache[N_TRANS_CACHE]; 108 VexGuestExtents trans_table [N_TRANS_TABLE]; 109 ULong* trans_tableP[N_TRANS_TABLE]; 110 111 Int trans_cache_used = 0; 112 Int trans_table_used = 0; 113 114 static Bool chase_into_ok ( Addr64 dst ) { return False; } 115 116 #if 0 117 // local_sys_write_stderr(&c,1); 118 static void local_sys_write_stderr ( HChar* buf, Int n ) 119 { 120 UInt __res; 121 __asm__ volatile ( 122 "li %%r0,4\n\t" /* set %r0 = __NR_write */ 123 "li %%r3,1\n\t" /* set %r3 = stdout */ 124 "mr %%r4,%1\n\t" /* set %r4 = buf */ 125 "mr %%r5,%2\n\t" /* set %r5 = n */ 126 "sc\n\t" /* write(stderr, buf, n) */ 127 "mr %0,%%r3\n" /* set __res = r3 */ 128 : "=mr" (__res) 129 : "g" (buf), "g" (n) 130 : "r0", "r3", "r4", "r5" ); 131 } 132 #endif 133 134 /* For providing services. */ 135 static HWord serviceFn ( HWord arg1, HWord arg2 ) 136 { 137 switch (arg1) { 138 case 0: /* EXIT */ 139 printf("---STOP---\n"); 140 printf("serviceFn:EXIT\n"); 141 printf("%llu bbs simulated\n", n_bbs_done); 142 printf("%d translations made, %d tt bytes\n", 143 n_translations_made, 8*trans_cache_used); 144 exit(0); 145 case 1: /* PUTC */ 146 putchar(arg2); 147 return 0; 148 case 2: /* MALLOC */ 149 return (HWord)malloc(arg2); 150 case 3: /* FREE */ 151 free((void*)arg2); 152 return 0; 153 default: 154 assert(0); 155 } 156 } 157 158 159 /* -------------------- */ 160 /* continue execution on the real CPU (never returns) */ 161 extern void switchback_asm(void); 162 163 #if defined(__i386__) 164 165 asm( 166 "switchback_asm:\n" 167 " movl sb_helper1, %eax\n" // eax = guest state ptr 168 " movl 16(%eax), %esp\n" // switch stacks 169 " pushl 56(%eax)\n" // push continuation addr 170 " movl sb_helper2, %ebx\n" // get eflags 171 " pushl %ebx\n" // eflags:CA 172 " pushl 0(%eax)\n" // EAX:eflags:CA 173 " movl 4(%eax), %ecx\n" 174 " movl 8(%eax), %edx\n" 175 " movl 12(%eax), %ebx\n" 176 " movl 20(%eax), %ebp\n" 177 " movl 24(%eax), %esi\n" 178 " movl 28(%eax), %edi\n" 179 " popl %eax\n" 180 " popfl\n" 181 " ret\n" 182 ); 183 void switchback ( void ) 184 { 185 sb_helper1 = (HWord)&gst; 186 sb_helper2 = LibVEX_GuestX86_get_eflags(&gst); 187 switchback_asm(); // never returns 188 } 189 190 #elif defined(__x86_64__) 191 192 asm( 193 "switchback_asm:\n" 194 " movq sb_helper1, %rax\n" // rax = guest state ptr 195 " movq 32(%rax), %rsp\n" // switch stacks 196 " pushq 168(%rax)\n" // push continuation addr 197 " movq sb_helper2, %rbx\n" // get eflags 198 " pushq %rbx\n" // eflags:CA 199 " pushq 0(%rax)\n" // RAX:eflags:CA 200 " movq 8(%rax), %rcx\n" 201 " movq 16(%rax), %rdx\n" 202 " movq 24(%rax), %rbx\n" 203 " movq 40(%rax), %rbp\n" 204 " movq 48(%rax), %rsi\n" 205 " movq 56(%rax), %rdi\n" 206 207 " movq 64(%rax), %r8\n" 208 " movq 72(%rax), %r9\n" 209 " movq 80(%rax), %r10\n" 210 " movq 88(%rax), %r11\n" 211 " movq 96(%rax), %r12\n" 212 " movq 104(%rax), %r13\n" 213 " movq 112(%rax), %r14\n" 214 " movq 120(%rax), %r15\n" 215 216 " popq %rax\n" 217 " popfq\n" 218 " ret\n" 219 ); 220 void switchback ( void ) 221 { 222 sb_helper1 = (HWord)&gst; 223 sb_helper2 = LibVEX_GuestAMD64_get_rflags(&gst); 224 switchback_asm(); // never returns 225 } 226 227 #elif defined(__powerpc__) 228 229 static void invalidate_icache(void *ptr, int nbytes) 230 { 231 unsigned long startaddr = (unsigned long) ptr; 232 unsigned long endaddr = startaddr + nbytes; 233 unsigned long addr; 234 unsigned long cls = CacheLineSize; 235 236 startaddr &= ~(cls - 1); 237 for (addr = startaddr; addr < endaddr; addr += cls) 238 asm volatile("dcbst 0,%0" : : "r" (addr)); 239 asm volatile("sync"); 240 for (addr = startaddr; addr < endaddr; addr += cls) 241 asm volatile("icbi 0,%0" : : "r" (addr)); 242 asm volatile("sync; isync"); 243 } 244 245 246 #if !defined(__powerpc64__) // ppc32 247 asm( 248 "switchback_asm:\n" 249 // gst 250 " lis %r31,sb_helper1@ha\n" // get hi-wd of guest_state_ptr addr 251 " lwz %r31,sb_helper1@l(%r31)\n" // load word of guest_state_ptr to r31 252 253 // LR 254 " lwz %r3,900(%r31)\n" // guest_LR 255 " mtlr %r3\n" // move to LR 256 257 // CR 258 " lis %r3,sb_helper2@ha\n" // get hi-wd of flags addr 259 " lwz %r3,sb_helper2@l(%r3)\n" // load flags word to r3 260 " mtcr %r3\n" // move r3 to CR 261 262 // CTR 263 " lwz %r3,904(%r31)\n" // guest_CTR 264 " mtctr %r3\n" // move r3 to CTR 265 266 // XER 267 " lis %r3,sb_helper3@ha\n" // get hi-wd of xer addr 268 " lwz %r3,sb_helper3@l(%r3)\n" // load xer word to r3 269 " mtxer %r3\n" // move r3 to XER 270 271 272 // GPR's 273 " lwz %r0, 0(%r31)\n" 274 " lwz %r1, 4(%r31)\n" // switch stacks (r1 = SP) 275 " lwz %r2, 8(%r31)\n" 276 " lwz %r3, 12(%r31)\n" 277 " lwz %r4, 16(%r31)\n" 278 " lwz %r5, 20(%r31)\n" 279 " lwz %r6, 24(%r31)\n" 280 " lwz %r7, 28(%r31)\n" 281 " lwz %r8, 32(%r31)\n" 282 " lwz %r9, 36(%r31)\n" 283 " lwz %r10, 40(%r31)\n" 284 " lwz %r11, 44(%r31)\n" 285 " lwz %r12, 48(%r31)\n" 286 " lwz %r13, 52(%r31)\n" 287 " lwz %r14, 56(%r31)\n" 288 " lwz %r15, 60(%r31)\n" 289 " lwz %r16, 64(%r31)\n" 290 " lwz %r17, 68(%r31)\n" 291 " lwz %r18, 72(%r31)\n" 292 " lwz %r19, 76(%r31)\n" 293 " lwz %r20, 80(%r31)\n" 294 " lwz %r21, 84(%r31)\n" 295 " lwz %r22, 88(%r31)\n" 296 " lwz %r23, 92(%r31)\n" 297 " lwz %r24, 96(%r31)\n" 298 " lwz %r25, 100(%r31)\n" 299 " lwz %r26, 104(%r31)\n" 300 " lwz %r27, 108(%r31)\n" 301 " lwz %r28, 112(%r31)\n" 302 " lwz %r29, 116(%r31)\n" 303 " lwz %r30, 120(%r31)\n" 304 " lwz %r31, 124(%r31)\n" 305 "nop_start_point:\n" 306 " nop\n" 307 " nop\n" 308 " nop\n" 309 " nop\n" 310 " nop\n" 311 "nop_end_point:\n" 312 ); 313 314 #else // ppc64 315 316 asm( 317 ".text\n" 318 " .global switchback_asm\n" 319 " .section \".opd\",\"aw\"\n" 320 " .align 3\n" 321 "switchback_asm:\n" 322 " .quad .switchback_asm,.TOC.@tocbase,0\n" 323 " .previous\n" 324 " .type .switchback_asm,@function\n" 325 " .global .switchback_asm\n" 326 ".switchback_asm:\n" 327 "switchback_asm_undotted:\n" 328 329 // gst: load word of guest_state_ptr to r31 330 " lis %r31,sb_helper1@highest\n" 331 " ori %r31,%r31,sb_helper1@higher\n" 332 " rldicr %r31,%r31,32,31\n" 333 " oris %r31,%r31,sb_helper1@h\n" 334 " ori %r31,%r31,sb_helper1@l\n" 335 " ld %r31,0(%r31)\n" 336 337 338 // LR 339 " ld %r3,1032(%r31)\n" // guest_LR 340 " mtlr %r3\n" // move to LR 341 342 // CR 343 " lis %r3,sb_helper2@highest\n" 344 " ori %r3,%r3,sb_helper2@higher\n" 345 " rldicr %r3,%r3,32,31\n" 346 " oris %r3,%r3,sb_helper2@h\n" 347 " ori %r3,%r3,sb_helper2@l\n" 348 " ld %r3,0(%r3)\n" // load flags word to r3 349 " mtcr %r3\n" // move r3 to CR 350 351 // CTR 352 " ld %r3,1040(%r31)\n" // guest_CTR 353 " mtctr %r3\n" // move r3 to CTR 354 355 // XER 356 " lis %r3,sb_helper3@highest\n" 357 " ori %r3,%r3,sb_helper3@higher\n" 358 " rldicr %r3,%r3,32,31\n" 359 " oris %r3,%r3,sb_helper3@h\n" 360 " ori %r3,%r3,sb_helper3@l\n" 361 " ld %r3,0(%r3)\n" // load xer word to r3 362 " mtxer %r3\n" // move r3 to XER 363 364 // GPR's 365 " ld %r0, 0(%r31)\n" 366 " ld %r1, 8(%r31)\n" // switch stacks (r1 = SP) 367 " ld %r2, 16(%r31)\n" 368 " ld %r3, 24(%r31)\n" 369 " ld %r4, 32(%r31)\n" 370 " ld %r5, 40(%r31)\n" 371 " ld %r6, 48(%r31)\n" 372 " ld %r7, 56(%r31)\n" 373 " ld %r8, 64(%r31)\n" 374 " ld %r9, 72(%r31)\n" 375 " ld %r10, 80(%r31)\n" 376 " ld %r11, 88(%r31)\n" 377 " ld %r12, 96(%r31)\n" 378 " ld %r13, 104(%r31)\n" 379 " ld %r14, 112(%r31)\n" 380 " ld %r15, 120(%r31)\n" 381 " ld %r16, 128(%r31)\n" 382 " ld %r17, 136(%r31)\n" 383 " ld %r18, 144(%r31)\n" 384 " ld %r19, 152(%r31)\n" 385 " ld %r20, 160(%r31)\n" 386 " ld %r21, 168(%r31)\n" 387 " ld %r22, 176(%r31)\n" 388 " ld %r23, 184(%r31)\n" 389 " ld %r24, 192(%r31)\n" 390 " ld %r25, 200(%r31)\n" 391 " ld %r26, 208(%r31)\n" 392 " ld %r27, 216(%r31)\n" 393 " ld %r28, 224(%r31)\n" 394 " ld %r29, 232(%r31)\n" 395 " ld %r30, 240(%r31)\n" 396 " ld %r31, 248(%r31)\n" 397 "nop_start_point:\n" 398 " nop\n" 399 " nop\n" 400 " nop\n" 401 " nop\n" 402 " nop\n" 403 "nop_end_point:\n" 404 ); 405 #endif 406 407 extern void switchback_asm_undotted; 408 extern void nop_start_point; 409 extern void nop_end_point; 410 void switchback ( void ) 411 { 412 Int i; 413 /* blargh. Copy the entire switchback_asm procedure into new 414 memory on which can can set both write and execute permissions, 415 so we can poke around with it and then run the results. */ 416 417 #if defined(__powerpc64__) // ppc32 418 UChar* sa_start = (UChar*)&switchback_asm_undotted; 419 #else 420 UChar* sa_start = (UChar*)&switchback_asm; 421 #endif 422 UChar* sa_nop_start = (UChar*)&nop_start_point; 423 UChar* sa_end = (UChar*)&nop_end_point; 424 425 #if 0 426 printf("sa_start %p\n", sa_start ); 427 printf("sa_nop_start %p\n", sa_nop_start); 428 printf("sa_end %p\n", sa_end); 429 #endif 430 Int nbytes = sa_end - sa_start; 431 Int off_nopstart = sa_nop_start - sa_start; 432 if (0) 433 printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart); 434 435 /* copy it into mallocville */ 436 UChar* copy = mymalloc(nbytes); 437 assert(copy); 438 for (i = 0; i < nbytes; i++) 439 copy[i] = sa_start[i]; 440 441 UInt* p = (UInt*)(©[off_nopstart]); 442 443 #if !defined(__powerpc64__) // ppc32 444 Addr32 addr_of_nop = (Addr32)p; 445 Addr32 where_to_go = gst.guest_CIA; 446 Int diff = ((Int)where_to_go) - ((Int)addr_of_nop); 447 448 #if 0 449 printf("addr of first nop = 0x%x\n", addr_of_nop); 450 printf("where to go = 0x%x\n", where_to_go); 451 printf("diff = 0x%x\n", diff); 452 #endif 453 454 #else // ppc64 455 Addr64 addr_of_nop = (Addr64)p; 456 Addr64 where_to_go = gst.guest_CIA; 457 Long diff = ((Long)where_to_go) - ((Long)addr_of_nop); 458 459 #if 0 460 printf("addr of first nop = 0x%llx\n", addr_of_nop); 461 printf("where to go = 0x%llx\n", where_to_go); 462 printf("diff = 0x%llx\n", diff); 463 #endif 464 #endif 465 466 if (diff < -0x2000000 || diff >= 0x2000000) { 467 // we're hosed. Give up 468 printf("hosed -- offset too large\n"); 469 assert(0); 470 } 471 472 sb_helper1 = (HWord)&gst; 473 #if !defined(__powerpc64__) // ppc32 474 sb_helper2 = LibVEX_GuestPPC32_get_CR(&gst); 475 sb_helper3 = LibVEX_GuestPPC32_get_XER(&gst); 476 #else // ppc64 477 sb_helper2 = LibVEX_GuestPPC64_get_CR(&gst); 478 sb_helper3 = LibVEX_GuestPPC64_get_XER(&gst); 479 #endif 480 481 /* stay sane ... */ 482 assert(p[0] == 24<<26); /* nop */ 483 484 /* branch to diff */ 485 p[0] = ((18<<26) | (((diff >> 2) & 0xFFFFFF) << 2) | (0<<1) | (0<<0)); 486 487 invalidate_icache( copy, nbytes ); 488 489 #if defined(__powerpc64__) 490 //printf("jumping to %p\n", copy); 491 { ULong faketoc[3]; 492 void* v; 493 faketoc[0] = (ULong)copy; 494 v = &faketoc[0]; 495 ( (void(*)(void)) v )(); 496 } 497 #else 498 ( (void(*)(void))copy )(); 499 #endif 500 } 501 502 #else 503 # error "Unknown arch (switchback)" 504 #endif 505 506 /* -------------------- */ 507 static HWord f, gp, res; 508 extern void run_translation_asm(void); 509 510 #if defined(__i386__) 511 asm( 512 "run_translation_asm:\n" 513 " pushal\n" 514 " movl gp, %ebp\n" 515 " movl f, %eax\n" 516 " call *%eax\n" 517 " movl %eax, res\n" 518 " popal\n" 519 " ret\n" 520 ); 521 522 #elif defined(__x86_64__) 523 asm( 524 "run_translation_asm:\n" 525 526 " pushq %rax\n" 527 " pushq %rbx\n" 528 " pushq %rcx\n" 529 " pushq %rdx\n" 530 " pushq %rbp\n" 531 " pushq %rsi\n" 532 " pushq %rdi\n" 533 " pushq %r8\n" 534 " pushq %r9\n" 535 " pushq %r10\n" 536 " pushq %r11\n" 537 " pushq %r12\n" 538 " pushq %r13\n" 539 " pushq %r14\n" 540 " pushq %r15\n" 541 542 " movq gp, %rbp\n" 543 " movq f, %rax\n" 544 " call *%rax\n" 545 " movq %rax, res\n" 546 547 " popq %r15\n" 548 " popq %r14\n" 549 " popq %r13\n" 550 " popq %r12\n" 551 " popq %r11\n" 552 " popq %r10\n" 553 " popq %r9\n" 554 " popq %r8\n" 555 " popq %rdi\n" 556 " popq %rsi\n" 557 " popq %rbp\n" 558 " popq %rdx\n" 559 " popq %rcx\n" 560 " popq %rbx\n" 561 " popq %rax\n" 562 563 " ret\n" 564 ); 565 566 #elif defined(__powerpc__) 567 568 #if !defined(__powerpc64__) // ppc32 569 asm( 570 "run_translation_asm:\n" 571 572 // create new stack: 573 // save old sp at first word & update sp 574 " stwu 1,-256(1)\n" 575 576 // save LR 577 " mflr %r0\n" 578 " stw %r0,260(%r1)\n" 579 580 // leave hole @ 4(%r1) for a callee to save it's LR 581 // no params 582 // no need to save non-volatile CR fields 583 584 // store registers to stack: just the callee-saved regs 585 " stw %r13, 8(%r1)\n" 586 " stw %r14, 12(%r1)\n" 587 " stw %r15, 16(%r1)\n" 588 " stw %r16, 20(%r1)\n" 589 " stw %r17, 24(%r1)\n" 590 " stw %r18, 28(%r1)\n" 591 " stw %r19, 32(%r1)\n" 592 " stw %r20, 36(%r1)\n" 593 " stw %r21, 40(%r1)\n" 594 " stw %r22, 44(%r1)\n" 595 " stw %r23, 48(%r1)\n" 596 " stw %r24, 52(%r1)\n" 597 " stw %r25, 56(%r1)\n" 598 " stw %r26, 60(%r1)\n" 599 " stw %r27, 64(%r1)\n" 600 " stw %r28, 68(%r1)\n" 601 " stw %r29, 72(%r1)\n" 602 " stw %r30, 76(%r1)\n" 603 " stw %r31, 80(%r1)\n" 604 605 // r31 (guest state ptr) := global var "gp" 606 " lis %r31,gp@ha\n" 607 " lwz %r31,gp@l(%r31)\n" 608 609 // call translation address in global var "f" 610 " lis %r4,f@ha\n" 611 " lwz %r4,f@l(%r4)\n" 612 " mtctr %r4\n" 613 " bctrl\n" 614 615 // save return value (in r3) into global var "res" 616 " lis %r5,res@ha\n" 617 " stw %r3,res@l(%r5)\n" 618 619 // save possibly modified guest state ptr (r31) in "gp" 620 " lis %r5,gp@ha\n" 621 " stw %r31,gp@l(%r5)\n" 622 623 // reload registers from stack 624 " lwz %r13, 8(%r1)\n" 625 " lwz %r14, 12(%r1)\n" 626 " lwz %r15, 16(%r1)\n" 627 " lwz %r16, 20(%r1)\n" 628 " lwz %r17, 24(%r1)\n" 629 " lwz %r18, 28(%r1)\n" 630 " lwz %r19, 32(%r1)\n" 631 " lwz %r20, 36(%r1)\n" 632 " lwz %r21, 40(%r1)\n" 633 " lwz %r22, 44(%r1)\n" 634 " lwz %r23, 48(%r1)\n" 635 " lwz %r24, 52(%r1)\n" 636 " lwz %r25, 56(%r1)\n" 637 " lwz %r26, 60(%r1)\n" 638 " lwz %r27, 64(%r1)\n" 639 " lwz %r28, 68(%r1)\n" 640 " lwz %r29, 72(%r1)\n" 641 " lwz %r30, 76(%r1)\n" 642 " lwz %r31, 80(%r1)\n" 643 644 // restore LR 645 " lwz %r0,260(%r1)\n" 646 " mtlr %r0\n" 647 648 // restore previous stack pointer 649 " addi %r1,%r1,256\n" 650 651 // return 652 " blr" 653 ); 654 655 #else // ppc64 656 657 asm( 658 ".text\n" 659 " .global run_translation_asm\n" 660 " .section \".opd\",\"aw\"\n" 661 " .align 3\n" 662 "run_translation_asm:\n" 663 " .quad .run_translation_asm,.TOC.@tocbase,0\n" 664 " .previous\n" 665 " .type .run_translation_asm,@function\n" 666 " .global .run_translation_asm\n" 667 ".run_translation_asm:\n" 668 669 // save LR,CTR 670 " mflr %r0\n" 671 " std %r0,16(%r1)\n" 672 " mfctr %r0\n" 673 " std %r0,8(%r1)\n" 674 675 // create new stack: 676 // save old sp at first word & update sp 677 " stdu 1,-256(1)\n" 678 679 // leave hole @ 4(%r1) for a callee to save it's LR 680 // no params 681 // no need to save non-volatile CR fields 682 683 // store registers to stack: just the callee-saved regs 684 " std %r13, 48(%r1)\n" 685 " std %r14, 56(%r1)\n" 686 " std %r15, 64(%r1)\n" 687 " std %r16, 72(%r1)\n" 688 " std %r17, 80(%r1)\n" 689 " std %r18, 88(%r1)\n" 690 " std %r19, 96(%r1)\n" 691 " std %r20, 104(%r1)\n" 692 " std %r21, 112(%r1)\n" 693 " std %r22, 120(%r1)\n" 694 " std %r23, 128(%r1)\n" 695 " std %r24, 136(%r1)\n" 696 " std %r25, 144(%r1)\n" 697 " std %r26, 152(%r1)\n" 698 " std %r27, 160(%r1)\n" 699 " std %r28, 168(%r1)\n" 700 " std %r29, 176(%r1)\n" 701 " std %r30, 184(%r1)\n" 702 " std %r31, 192(%r1)\n" 703 704 // r31 (guest state ptr) := global var "gp" 705 " lis %r31,gp@highest\n" 706 " ori %r31,%r31,gp@higher\n" 707 " rldicr %r31,%r31,32,31\n" 708 " oris %r31,%r31,gp@h\n" 709 " ori %r31,%r31,gp@l\n" 710 " ld %r31,0(%r31)\n" 711 712 // call translation address in global var "f" 713 " lis %r4,f@highest\n" 714 " ori %r4,%r4,f@higher\n" 715 " rldicr %r4,%r4,32,31\n" 716 " oris %r4,%r4,f@h\n" 717 " ori %r4,%r4,f@l\n" 718 " ld %r4,0(%r4)\n" 719 " mtctr %r4\n" 720 " bctrl\n" 721 722 // save return value (in r3) into global var "res" 723 " lis %r5,res@highest\n" 724 " ori %r5,%r5,res@higher\n" 725 " rldicr %r5,%r5,32,31\n" 726 " oris %r5,%r5,res@h\n" 727 " ori %r5,%r5,res@l\n" 728 " std %r3,0(%r5)\n" 729 730 // save possibly modified guest state ptr (r31) in "gp" 731 " lis %r5,gp@highest\n" 732 " ori %r5,%r5,gp@higher\n" 733 " rldicr %r5,%r5,32,31\n" 734 " oris %r5,%r5,gp@h\n" 735 " ori %r5,%r5,gp@l\n" 736 " std %r31,0(%r5)\n" 737 738 // reload registers from stack 739 " ld %r13, 48(%r1)\n" 740 " ld %r14, 56(%r1)\n" 741 " ld %r15, 64(%r1)\n" 742 " ld %r16, 72(%r1)\n" 743 " ld %r17, 80(%r1)\n" 744 " ld %r18, 88(%r1)\n" 745 " ld %r19, 96(%r1)\n" 746 " ld %r20, 104(%r1)\n" 747 " ld %r21, 112(%r1)\n" 748 " ld %r22, 120(%r1)\n" 749 " ld %r23, 128(%r1)\n" 750 " ld %r24, 136(%r1)\n" 751 " ld %r25, 144(%r1)\n" 752 " ld %r26, 152(%r1)\n" 753 " ld %r27, 160(%r1)\n" 754 " ld %r28, 168(%r1)\n" 755 " ld %r29, 176(%r1)\n" 756 " ld %r30, 184(%r1)\n" 757 " ld %r31, 192(%r1)\n" 758 759 // restore previous stack pointer 760 " addi %r1,%r1,256\n" 761 762 // restore LR,CTR 763 " ld %r0,16(%r1)\n" 764 " mtlr %r0\n" 765 " ld %r0,8(%r1)\n" 766 " mtctr %r0\n" 767 768 // return 769 " blr" 770 ); 771 #endif 772 773 #else 774 775 # error "Unknown arch" 776 #endif 777 778 /* Run a translation at host address 'translation'. Return 779 True if Vex asked for an translation cache flush as a result. 780 */ 781 Bool run_translation ( HWord translation ) 782 { 783 if (0 && DEBUG_TRACE_FLAGS) { 784 printf(" run translation %p\n", (void*)translation ); 785 printf(" simulated bb: %llu\n", n_bbs_done); 786 } 787 f = translation; 788 gp = (HWord)&gst; 789 run_translation_asm(); 790 gst.GuestPC = res; 791 n_bbs_done ++; 792 return gp==VEX_TRC_JMP_TINVAL; 793 } 794 795 HWord find_translation ( Addr64 guest_addr ) 796 { 797 Int i; 798 HWord __res; 799 if (0) 800 printf("find translation %p ... ", ULong_to_Ptr(guest_addr)); 801 for (i = 0; i < trans_table_used; i++) 802 if (trans_table[i].base[0] == guest_addr) 803 break; 804 if (i == trans_table_used) { 805 if (0) printf("none\n"); 806 return 0; /* not found */ 807 } 808 809 /* Move this translation one step towards the front, so finding it 810 next time round is just that little bit cheaper. */ 811 if (i > 2) { 812 VexGuestExtents tmpE = trans_table[i-1]; 813 ULong* tmpP = trans_tableP[i-1]; 814 trans_table[i-1] = trans_table[i]; 815 trans_tableP[i-1] = trans_tableP[i]; 816 trans_table[i] = tmpE; 817 trans_tableP[i] = tmpP; 818 i--; 819 } 820 821 __res = (HWord)trans_tableP[i]; 822 if (0) printf("%p\n", (void*)__res); 823 return __res; 824 } 825 826 #define N_TRANSBUF 5000 827 static UChar transbuf[N_TRANSBUF]; 828 void make_translation ( Addr64 guest_addr, Bool verbose ) 829 { 830 VexTranslateArgs vta; 831 VexTranslateResult tres; 832 VexArchInfo vex_archinfo; 833 Int trans_used, i, ws_needed; 834 835 if (trans_table_used >= N_TRANS_TABLE 836 || trans_cache_used >= N_TRANS_CACHE-1000) { 837 /* If things are looking to full, just dump 838 all the translations. */ 839 trans_cache_used = 0; 840 trans_table_used = 0; 841 } 842 843 assert(trans_table_used < N_TRANS_TABLE); 844 if (0) 845 printf("make translation %p\n", ULong_to_Ptr(guest_addr)); 846 847 LibVEX_default_VexArchInfo(&vex_archinfo); 848 vex_archinfo.subarch = VexSubArch; 849 vex_archinfo.ppc_cache_line_szB = CacheLineSize; 850 851 /* */ 852 vta.arch_guest = VexArch; 853 vta.archinfo_guest = vex_archinfo; 854 vta.arch_host = VexArch; 855 vta.archinfo_host = vex_archinfo; 856 vta.guest_bytes = (UChar*)ULong_to_Ptr(guest_addr); 857 vta.guest_bytes_addr = (Addr64)guest_addr; 858 vta.guest_bytes_addr_noredir = (Addr64)guest_addr; 859 vta.chase_into_ok = chase_into_ok; 860 // vta.guest_extents = &vge; 861 vta.guest_extents = &trans_table[trans_table_used]; 862 vta.host_bytes = transbuf; 863 vta.host_bytes_size = N_TRANSBUF; 864 vta.host_bytes_used = &trans_used; 865 vta.instrument1 = NULL; 866 vta.instrument2 = NULL; 867 vta.do_self_check = False; 868 vta.traceflags = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS; 869 vta.dispatch = NULL; 870 871 tres = LibVEX_Translate ( &vta ); 872 873 assert(tres == VexTransOK); 874 ws_needed = (trans_used+7) / 8; 875 assert(ws_needed > 0); 876 assert(trans_cache_used + ws_needed < N_TRANS_CACHE); 877 n_translations_made++; 878 879 for (i = 0; i < trans_used; i++) { 880 HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i; 881 HChar* src = (HChar*)(&transbuf[i]); 882 *dst = *src; 883 } 884 885 #if defined(__powerpc__) 886 invalidate_icache( &trans_cache[trans_cache_used], trans_used ); 887 #endif 888 889 trans_tableP[trans_table_used] = &trans_cache[trans_cache_used]; 890 trans_table_used++; 891 trans_cache_used += ws_needed; 892 } 893 894 895 static Bool overlap ( Addr64 start, UInt len, VexGuestExtents* vge ) 896 { 897 Int i; 898 for (i = 0; i < vge->n_used; i++) { 899 if (vge->base[i]+vge->len[i] <= start 900 || vge->base[i] >= start+len) { 901 /* ok */ 902 } else { 903 return True; 904 } 905 } 906 return False; /* no overlap */ 907 } 908 909 static void dump_translations ( Addr64 start, UInt len ) 910 { 911 Int i, j; 912 j = 0; 913 for (i = 0; i < trans_table_used; i++) { 914 if (overlap(start, len, &trans_table[i])) { 915 /* do nothing */ 916 } else { 917 assert(j <= i); 918 trans_table[j] = trans_table[i]; 919 trans_tableP[j] = trans_tableP[i]; 920 j++; 921 } 922 } 923 assert(j >= 0 && j <= trans_table_used); 924 if (0) printf("dumped %d translations\n", trans_table_used - j); 925 trans_table_used = j; 926 } 927 928 929 static ULong stopAfter = 0; 930 static UChar* entryP = NULL; 931 932 933 __attribute__ ((noreturn)) 934 static 935 void failure_exit ( void ) 936 { 937 fprintf(stdout, "VEX did failure_exit. Bye.\n"); 938 fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done); 939 exit(1); 940 } 941 942 static 943 void log_bytes ( HChar* bytes, Int nbytes ) 944 { 945 fwrite ( bytes, 1, nbytes, stdout ); 946 fflush ( stdout ); 947 } 948 949 950 /* run simulated code forever (it will exit by calling 951 serviceFn(0)). */ 952 static void run_simulator ( void ) 953 { 954 static Addr64 last_guest = 0; 955 Addr64 next_guest; 956 HWord next_host; 957 Bool need_inval; 958 while (1) { 959 next_guest = gst.GuestPC; 960 961 if (0) 962 printf("\nnext_guest: 0x%x\n", (UInt)next_guest); 963 964 #if defined(__powerpc64__) 965 if (next_guest == Ptr_to_ULong( (void*)(*(ULong*)(&serviceFn)) )) { 966 #else 967 if (next_guest == Ptr_to_ULong(&serviceFn)) { 968 #endif 969 /* "do" the function call to serviceFn */ 970 # if defined(__i386__) 971 { 972 HWord esp = gst.guest_ESP; 973 gst.guest_EIP = *(UInt*)(esp+0); 974 gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) ); 975 gst.guest_ESP = esp+4; 976 next_guest = gst.guest_EIP; 977 } 978 # elif defined(__x86_64__) 979 { 980 HWord esp = gst.guest_RSP; 981 gst.guest_RIP = *(UInt*)(esp+0); 982 gst.guest_RAX = serviceFn( gst.guest_RDI, gst.guest_RSI ); 983 gst.guest_RSP = esp+8; 984 next_guest = gst.guest_RIP; 985 } 986 # elif defined(__powerpc__) 987 { 988 gst.guest_GPR3 = serviceFn( gst.guest_GPR3, gst.guest_GPR4 ); 989 gst.guest_CIA = gst.guest_LR; 990 next_guest = gst.guest_CIA; 991 } 992 # else 993 # error "Unknown arch" 994 # endif 995 } 996 997 next_host = find_translation(next_guest); 998 if (next_host == 0) { 999 make_translation(next_guest,False); 1000 next_host = find_translation(next_guest); 1001 assert(next_host != 0); 1002 } 1003 1004 // Switchback 1005 if (n_bbs_done == stopAfter) { 1006 printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done); 1007 #if 1 1008 if (last_guest) { 1009 printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1); 1010 make_translation(last_guest,True); 1011 } 1012 #endif 1013 #if 0 1014 if (next_guest) { 1015 printf("\n*** Current translation (bb:%llu):\n", n_bbs_done); 1016 make_translation(next_guest,True); 1017 } 1018 #endif 1019 printf("--- end SWITCHBACK at bb:%llu ---\n", n_bbs_done); 1020 switchback(); 1021 assert(0); /*NOTREACHED*/ 1022 } 1023 1024 last_guest = next_guest; 1025 need_inval = run_translation(next_host); 1026 if (need_inval) { 1027 #if defined(__powerpc__) 1028 dump_translations( (Addr64)gst.guest_TISTART, gst.guest_TILEN ); 1029 if (0) printf("dump translations done\n"); 1030 #endif 1031 } 1032 } 1033 } 1034 1035 1036 static void usage ( void ) 1037 { 1038 printf("usage: switchback #bbs\n"); 1039 printf(" - begins switchback for basic block #bbs\n"); 1040 printf(" - use -1 for largest possible run without switchback\n\n"); 1041 exit(1); 1042 } 1043 1044 #if defined(__powerpc__) 1045 1046 #if !defined(__powerpc64__) // ppc32 1047 UInt saved_R2; 1048 asm( 1049 "get_R2:\n" 1050 " lis %r10,saved_R2@ha\n" 1051 " stw %r2,saved_R2@l(%r10)\n" 1052 " blr\n" 1053 ); 1054 #else // ppc64 1055 ULong saved_R2; 1056 ULong saved_R13; 1057 asm( 1058 ".text\n" 1059 " .global get_R2\n" 1060 " .section \".opd\",\"aw\"\n" 1061 " .align 3\n" 1062 "get_R2:\n" 1063 " .quad .get_R2,.TOC.@tocbase,0\n" 1064 " .previous\n" 1065 " .type .get_R2,@function\n" 1066 " .global .get_R2\n" 1067 ".get_R2:\n" 1068 " lis %r10,saved_R2@highest\n" 1069 " ori %r10,%r10,saved_R2@higher\n" 1070 " rldicr %r10,%r10,32,31\n" 1071 " oris %r10,%r10,saved_R2@h\n" 1072 " ori %r10,%r10,saved_R2@l\n" 1073 " std %r2,0(%r10)\n" 1074 " blr\n" 1075 ); 1076 asm( 1077 ".text\n" 1078 " .global get_R13\n" 1079 " .section \".opd\",\"aw\"\n" 1080 " .align 3\n" 1081 "get_R13:\n" 1082 " .quad .get_R13,.TOC.@tocbase,0\n" 1083 " .previous\n" 1084 " .type .get_R13,@function\n" 1085 " .global .get_R13\n" 1086 ".get_R13:\n" 1087 " lis %r10,saved_R13@highest\n" 1088 " ori %r10,%r10,saved_R13@higher\n" 1089 " rldicr %r10,%r10,32,31\n" 1090 " oris %r10,%r10,saved_R13@h\n" 1091 " ori %r10,%r10,saved_R13@l\n" 1092 " std %r13,0(%r10)\n" 1093 " blr\n" 1094 ); 1095 #endif 1096 extern void get_R2 ( void ); 1097 extern void get_R13 ( void ); 1098 #endif 1099 1100 int main ( Int argc, HChar** argv ) 1101 { 1102 if (argc != 2) 1103 usage(); 1104 1105 stopAfter = (ULong)atoll(argv[1]); 1106 1107 extern void entry ( void*(*service)(int,int) ); 1108 entryP = (UChar*)&entry; 1109 1110 if (!entryP) { 1111 printf("switchback: can't find entry point\n"); 1112 exit(1); 1113 } 1114 1115 LibVEX_default_VexControl(&vcon); 1116 vcon.guest_max_insns=50; 1117 vcon.guest_chase_thresh=0; 1118 vcon.iropt_level=2; 1119 1120 LibVEX_Init( failure_exit, log_bytes, 1, False, &vcon ); 1121 LibVEX_Guest_initialise(&gst); 1122 1123 /* set up as if a call to the entry point passing serviceFn as 1124 the one and only parameter */ 1125 # if defined(__i386__) 1126 gst.guest_EIP = (UInt)entryP; 1127 gst.guest_ESP = (UInt)&gstack[25000]; 1128 *(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn; 1129 *(UInt*)(gst.guest_ESP+0) = 0x12345678; 1130 # elif defined(__x86_64__) 1131 gst.guest_RIP = (ULong)entryP; 1132 gst.guest_RSP = (ULong)&gstack[25000]; 1133 gst.guest_RDI = (ULong)serviceFn; 1134 *(ULong*)(gst.guest_RSP+0) = 0x12345678AABBCCDDULL; 1135 # elif defined(__powerpc__) 1136 get_R2(); 1137 1138 #if !defined(__powerpc64__) // ppc32 1139 gst.guest_CIA = (UInt)entryP; 1140 gst.guest_GPR1 = (UInt)&gstack[25000]; /* stack pointer */ 1141 gst.guest_GPR3 = (UInt)serviceFn; /* param to entry */ 1142 gst.guest_GPR2 = saved_R2; 1143 gst.guest_LR = 0x12345678; /* bogus return address */ 1144 #else // ppc64 1145 get_R13(); 1146 gst.guest_CIA = * (ULong*)entryP; 1147 gst.guest_GPR1 = (ULong)&gstack[25000]; /* stack pointer */ 1148 gst.guest_GPR3 = (ULong)serviceFn; /* param to entry */ 1149 gst.guest_GPR2 = saved_R2; 1150 gst.guest_GPR13 = saved_R13; 1151 gst.guest_LR = 0x1234567812345678ULL; /* bogus return address */ 1152 // printf("setting CIA to %p\n", (void*)gst.guest_CIA); 1153 #endif 1154 1155 # else 1156 # error "Unknown arch" 1157 # endif 1158 1159 printf("\n---START---\n"); 1160 1161 #if 1 1162 run_simulator(); 1163 #else 1164 ( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn); 1165 #endif 1166 1167 1168 return 0; 1169 } 1170