Home | History | Annotate | Download | only in switchback
      1 
      2 /* HOW TO USE
      3 
      4 13 Dec '05 - Linker no longer used (apart from mymalloc)
      5 Simply compile and link switchback.c with test_xxx.c,
      6 e.g. for ppc64:
      7 $ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c
      8 
      9 Test file test_xxx.c must have an entry point called "entry",
     10 which expects to take a single argument which is a function pointer
     11 (to "serviceFn").
     12 
     13 Test file may not reference any other symbols.
     14 
     15 NOTE: POWERPC: it is critical, when using this on ppc, to set
     16 CacheLineSize to the right value.  Values we currently know of:
     17 
     18    imac (G3):   32
     19    G5 (ppc970): 128
     20 */
     21 
     22 #include <stdio.h>
     23 #include <assert.h>
     24 #include <stdlib.h>
     25 #include <sys/types.h>
     26 #include <sys/stat.h>
     27 #include <unistd.h>
     28 
     29 #include "../pub/libvex_basictypes.h"
     30 #include "../pub/libvex_guest_x86.h"
     31 #include "../pub/libvex_guest_amd64.h"
     32 #include "../pub/libvex_guest_ppc32.h"
     33 #include "../pub/libvex_guest_ppc64.h"
     34 #include "../pub/libvex.h"
     35 #include "../pub/libvex_trc_values.h"
     36 #include "linker.h"
     37 
     38 static ULong n_bbs_done = 0;
     39 static Int   n_translations_made = 0;
     40 
     41 
     42 #if defined(__i386__)
     43 #  define VexGuestState             VexGuestX86State
     44 #  define LibVEX_Guest_initialise   LibVEX_GuestX86_initialise
     45 #  define VexArch                   VexArchX86
     46 #  define VexSubArch                VexSubArchX86_sse1
     47 #  define GuestPC                   guest_EIP
     48 #  define CacheLineSize             0/*irrelevant*/
     49 #elif defined(__x86_64__)
     50 #  define VexGuestState             VexGuestAMD64State
     51 #  define LibVEX_Guest_initialise   LibVEX_GuestAMD64_initialise
     52 #  define VexArch                   VexArchAMD64
     53 #  define VexSubArch                VexSubArch_NONE
     54 #  define GuestPC                   guest_RIP
     55 #  define CacheLineSize             0/*irrelevant*/
     56 #elif defined(__powerpc__)
     57 
     58 #if !defined(__powerpc64__) // ppc32
     59 #  define VexGuestState             VexGuestPPC32State
     60 #  define LibVEX_Guest_initialise   LibVEX_GuestPPC32_initialise
     61 #  define VexArch                   VexArchPPC32
     62 #  define VexSubArch                VexSubArchPPC32_FI
     63 #  define GuestPC                   guest_CIA
     64 #  define CacheLineSize             128
     65 #else
     66 #  define VexGuestState             VexGuestPPC64State
     67 #  define LibVEX_Guest_initialise   LibVEX_GuestPPC64_initialise
     68 #  define VexArch                   VexArchPPC64
     69 #  define VexSubArch                VexSubArchPPC64_FI
     70 #  define GuestPC                   guest_CIA
     71 #  define CacheLineSize             128
     72 #endif
     73 
     74 #else
     75 #   error "Unknown arch"
     76 #endif
     77 
     78 /* 7: show conversion into IR */
     79 /* 6: show after initial opt */
     80 /* 5: show after instrumentation */
     81 /* 4: show after second opt */
     82 /* 3: show after tree building */
     83 /* 2: show selected insns */
     84 /* 1: show after reg-alloc */
     85 /* 0: show final assembly */
     86 #define TEST_FLAGS (1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
     87 #define DEBUG_TRACE_FLAGS 0//(1<<7)|(0<<6)|(0<<5)|(0<<4)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
     88 
     89 
     90 /* guest state */
     91 UInt gstack[50000];
     92 VexGuestState gst;
     93 VexControl vcon;
     94 
     95 /* only used for the switchback transition */
     96 /* i386:  helper1 = &gst, helper2 = %EFLAGS */
     97 /* amd64: helper1 = &gst, helper2 = %EFLAGS */
     98 /* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */
     99 HWord sb_helper1 = 0;
    100 HWord sb_helper2 = 0;
    101 HWord sb_helper3 = 0;
    102 
    103 /* translation cache */
    104 #define N_TRANS_CACHE 1000000
    105 #define N_TRANS_TABLE 10000
    106 
    107 ULong trans_cache[N_TRANS_CACHE];
    108 VexGuestExtents trans_table [N_TRANS_TABLE];
    109 ULong*          trans_tableP[N_TRANS_TABLE];
    110 
    111 Int trans_cache_used = 0;
    112 Int trans_table_used = 0;
    113 
    114 static Bool chase_into_ok ( Addr64 dst ) { return False; }
    115 
    116 #if 0
    117 // local_sys_write_stderr(&c,1);
    118 static void local_sys_write_stderr ( HChar* buf, Int n )
    119 {
    120    UInt __res;
    121    __asm__ volatile (
    122       "li %%r0,4\n\t"      /* set %r0 = __NR_write */
    123       "li %%r3,1\n\t"      /* set %r3 = stdout */
    124       "mr %%r4,%1\n\t"     /* set %r4 = buf */
    125       "mr %%r5,%2\n\t"     /* set %r5 = n */
    126       "sc\n\t"             /* write(stderr, buf, n) */
    127       "mr %0,%%r3\n"       /* set __res = r3 */
    128       : "=mr" (__res)
    129       : "g" (buf), "g" (n)
    130       : "r0", "r3", "r4", "r5" );
    131 }
    132 #endif
    133 
    134 /* For providing services. */
    135 static HWord serviceFn ( HWord arg1, HWord arg2 )
    136 {
    137    switch (arg1) {
    138       case 0: /* EXIT */
    139          printf("---STOP---\n");
    140          printf("serviceFn:EXIT\n");
    141 	 printf("%llu bbs simulated\n", n_bbs_done);
    142 	 printf("%d translations made, %d tt bytes\n",
    143                 n_translations_made, 8*trans_cache_used);
    144          exit(0);
    145       case 1: /* PUTC */
    146          putchar(arg2);
    147          return 0;
    148       case 2: /* MALLOC */
    149          return (HWord)malloc(arg2);
    150       case 3: /* FREE */
    151          free((void*)arg2);
    152          return 0;
    153       default:
    154          assert(0);
    155    }
    156 }
    157 
    158 
    159 /* -------------------- */
    160 /* continue execution on the real CPU (never returns) */
    161 extern void switchback_asm(void);
    162 
    163 #if defined(__i386__)
    164 
    165 asm(
    166 "switchback_asm:\n"
    167 "   movl sb_helper1, %eax\n"  // eax = guest state ptr
    168 "   movl  16(%eax), %esp\n"   // switch stacks
    169 "   pushl 56(%eax)\n"         // push continuation addr
    170 "   movl sb_helper2, %ebx\n"  // get eflags
    171 "   pushl %ebx\n"             // eflags:CA
    172 "   pushl 0(%eax)\n"          //  EAX:eflags:CA
    173 "   movl 4(%eax), %ecx\n"
    174 "   movl 8(%eax), %edx\n"
    175 "   movl 12(%eax), %ebx\n"
    176 "   movl 20(%eax), %ebp\n"
    177 "   movl 24(%eax), %esi\n"
    178 "   movl 28(%eax), %edi\n"
    179 "   popl %eax\n"
    180 "   popfl\n"
    181 "   ret\n"
    182 );
    183 void switchback ( void )
    184 {
    185    sb_helper1 = (HWord)&gst;
    186    sb_helper2 = LibVEX_GuestX86_get_eflags(&gst);
    187    switchback_asm(); // never returns
    188 }
    189 
    190 #elif defined(__x86_64__)
    191 
    192 asm(
    193 "switchback_asm:\n"
    194 "   movq sb_helper1, %rax\n"  // rax = guest state ptr
    195 "   movq  32(%rax), %rsp\n"   // switch stacks
    196 "   pushq 168(%rax)\n"        // push continuation addr
    197 "   movq sb_helper2, %rbx\n"  // get eflags
    198 "   pushq %rbx\n"             // eflags:CA
    199 "   pushq 0(%rax)\n"          // RAX:eflags:CA
    200 "   movq 8(%rax), %rcx\n"
    201 "   movq 16(%rax), %rdx\n"
    202 "   movq 24(%rax), %rbx\n"
    203 "   movq 40(%rax), %rbp\n"
    204 "   movq 48(%rax), %rsi\n"
    205 "   movq 56(%rax), %rdi\n"
    206 
    207 "   movq 64(%rax), %r8\n"
    208 "   movq 72(%rax), %r9\n"
    209 "   movq 80(%rax), %r10\n"
    210 "   movq 88(%rax), %r11\n"
    211 "   movq 96(%rax), %r12\n"
    212 "   movq 104(%rax), %r13\n"
    213 "   movq 112(%rax), %r14\n"
    214 "   movq 120(%rax), %r15\n"
    215 
    216 "   popq %rax\n"
    217 "   popfq\n"
    218 "   ret\n"
    219 );
    220 void switchback ( void )
    221 {
    222    sb_helper1 = (HWord)&gst;
    223    sb_helper2 = LibVEX_GuestAMD64_get_rflags(&gst);
    224    switchback_asm(); // never returns
    225 }
    226 
    227 #elif defined(__powerpc__)
    228 
    229 static void invalidate_icache(void *ptr, int nbytes)
    230 {
    231    unsigned long startaddr = (unsigned long) ptr;
    232    unsigned long endaddr = startaddr + nbytes;
    233    unsigned long addr;
    234    unsigned long cls = CacheLineSize;
    235 
    236    startaddr &= ~(cls - 1);
    237    for (addr = startaddr; addr < endaddr; addr += cls)
    238       asm volatile("dcbst 0,%0" : : "r" (addr));
    239    asm volatile("sync");
    240    for (addr = startaddr; addr < endaddr; addr += cls)
    241       asm volatile("icbi 0,%0" : : "r" (addr));
    242    asm volatile("sync; isync");
    243 }
    244 
    245 
    246 #if !defined(__powerpc64__) // ppc32
    247 asm(
    248 "switchback_asm:\n"
    249 // gst
    250 "   lis  %r31,sb_helper1@ha\n"      // get hi-wd of guest_state_ptr addr
    251 "   lwz  %r31,sb_helper1@l(%r31)\n" // load word of guest_state_ptr to r31
    252 
    253 // LR
    254 "   lwz  %r3,900(%r31)\n"           // guest_LR
    255 "   mtlr %r3\n"                     // move to LR
    256 
    257 // CR
    258 "   lis  %r3,sb_helper2@ha\n"       // get hi-wd of flags addr
    259 "   lwz  %r3,sb_helper2@l(%r3)\n"   // load flags word to r3
    260 "   mtcr %r3\n"                     // move r3 to CR
    261 
    262 // CTR
    263 "   lwz %r3,904(%r31)\n"       // guest_CTR
    264 "   mtctr %r3\n"               // move r3 to CTR
    265 
    266 // XER
    267 "   lis  %r3,sb_helper3@ha\n"       // get hi-wd of xer addr
    268 "   lwz  %r3,sb_helper3@l(%r3)\n"   // load xer word to r3
    269 "   mtxer %r3\n"                     // move r3 to XER
    270 
    271 
    272 // GPR's
    273 "   lwz %r0,    0(%r31)\n"
    274 "   lwz %r1,    4(%r31)\n"     // switch stacks (r1 = SP)
    275 "   lwz %r2,    8(%r31)\n"
    276 "   lwz %r3,   12(%r31)\n"
    277 "   lwz %r4,   16(%r31)\n"
    278 "   lwz %r5,   20(%r31)\n"
    279 "   lwz %r6,   24(%r31)\n"
    280 "   lwz %r7,   28(%r31)\n"
    281 "   lwz %r8,   32(%r31)\n"
    282 "   lwz %r9,   36(%r31)\n"
    283 "   lwz %r10,  40(%r31)\n"
    284 "   lwz %r11,  44(%r31)\n"
    285 "   lwz %r12,  48(%r31)\n"
    286 "   lwz %r13,  52(%r31)\n"
    287 "   lwz %r14,  56(%r31)\n"
    288 "   lwz %r15,  60(%r31)\n"
    289 "   lwz %r16,  64(%r31)\n"
    290 "   lwz %r17,  68(%r31)\n"
    291 "   lwz %r18,  72(%r31)\n"
    292 "   lwz %r19,  76(%r31)\n"
    293 "   lwz %r20,  80(%r31)\n"
    294 "   lwz %r21,  84(%r31)\n"
    295 "   lwz %r22,  88(%r31)\n"
    296 "   lwz %r23,  92(%r31)\n"
    297 "   lwz %r24,  96(%r31)\n"
    298 "   lwz %r25, 100(%r31)\n"
    299 "   lwz %r26, 104(%r31)\n"
    300 "   lwz %r27, 108(%r31)\n"
    301 "   lwz %r28, 112(%r31)\n"
    302 "   lwz %r29, 116(%r31)\n"
    303 "   lwz %r30, 120(%r31)\n"
    304 "   lwz %r31, 124(%r31)\n"
    305 "nop_start_point:\n"
    306 "   nop\n"
    307 "   nop\n"
    308 "   nop\n"
    309 "   nop\n"
    310 "   nop\n"
    311 "nop_end_point:\n"
    312 );
    313 
    314 #else // ppc64
    315 
    316 asm(
    317 ".text\n"
    318 "   .global switchback_asm\n"
    319 "   .section \".opd\",\"aw\"\n"
    320 "   .align 3\n"
    321 "switchback_asm:\n"
    322 "   .quad .switchback_asm,.TOC.@tocbase,0\n"
    323 "   .previous\n"
    324 "   .type .switchback_asm,@function\n"
    325 "   .global  .switchback_asm\n"
    326 ".switchback_asm:\n"
    327 "switchback_asm_undotted:\n"
    328 
    329 // gst: load word of guest_state_ptr to r31
    330 "   lis    %r31,sb_helper1@highest\n"
    331 "   ori    %r31,%r31,sb_helper1@higher\n"
    332 "   rldicr %r31,%r31,32,31\n"
    333 "   oris   %r31,%r31,sb_helper1@h\n"
    334 "   ori    %r31,%r31,sb_helper1@l\n"
    335 "   ld     %r31,0(%r31)\n"
    336 
    337 
    338 // LR
    339 "   ld   %r3,1032(%r31)\n"          // guest_LR
    340 "   mtlr %r3\n"                     // move to LR
    341 
    342 // CR
    343 "   lis    %r3,sb_helper2@highest\n"
    344 "   ori    %r3,%r3,sb_helper2@higher\n"
    345 "   rldicr %r3,%r3,32,31\n"
    346 "   oris   %r3,%r3,sb_helper2@h\n"
    347 "   ori    %r3,%r3,sb_helper2@l\n"
    348 "   ld     %r3,0(%r3)\n"            // load flags word to r3
    349 "   mtcr   %r3\n"                   // move r3 to CR
    350 
    351 // CTR
    352 "   ld     %r3,1040(%r31)\n"        // guest_CTR
    353 "   mtctr  %r3\n"                   // move r3 to CTR
    354 
    355 // XER
    356 "   lis    %r3,sb_helper3@highest\n"
    357 "   ori    %r3,%r3,sb_helper3@higher\n"
    358 "   rldicr %r3,%r3,32,31\n"
    359 "   oris   %r3,%r3,sb_helper3@h\n"
    360 "   ori    %r3,%r3,sb_helper3@l\n"
    361 "   ld     %r3,0(%r3)\n"            // load xer word to r3
    362 "   mtxer  %r3\n"                   // move r3 to XER
    363 
    364 // GPR's
    365 "   ld %r0,    0(%r31)\n"
    366 "   ld %r1,    8(%r31)\n"     // switch stacks (r1 = SP)
    367 "   ld %r2,   16(%r31)\n"
    368 "   ld %r3,   24(%r31)\n"
    369 "   ld %r4,   32(%r31)\n"
    370 "   ld %r5,   40(%r31)\n"
    371 "   ld %r6,   48(%r31)\n"
    372 "   ld %r7,   56(%r31)\n"
    373 "   ld %r8,   64(%r31)\n"
    374 "   ld %r9,   72(%r31)\n"
    375 "   ld %r10,  80(%r31)\n"
    376 "   ld %r11,  88(%r31)\n"
    377 "   ld %r12,  96(%r31)\n"
    378 "   ld %r13, 104(%r31)\n"
    379 "   ld %r14, 112(%r31)\n"
    380 "   ld %r15, 120(%r31)\n"
    381 "   ld %r16, 128(%r31)\n"
    382 "   ld %r17, 136(%r31)\n"
    383 "   ld %r18, 144(%r31)\n"
    384 "   ld %r19, 152(%r31)\n"
    385 "   ld %r20, 160(%r31)\n"
    386 "   ld %r21, 168(%r31)\n"
    387 "   ld %r22, 176(%r31)\n"
    388 "   ld %r23, 184(%r31)\n"
    389 "   ld %r24, 192(%r31)\n"
    390 "   ld %r25, 200(%r31)\n"
    391 "   ld %r26, 208(%r31)\n"
    392 "   ld %r27, 216(%r31)\n"
    393 "   ld %r28, 224(%r31)\n"
    394 "   ld %r29, 232(%r31)\n"
    395 "   ld %r30, 240(%r31)\n"
    396 "   ld %r31, 248(%r31)\n"
    397 "nop_start_point:\n"
    398 "   nop\n"
    399 "   nop\n"
    400 "   nop\n"
    401 "   nop\n"
    402 "   nop\n"
    403 "nop_end_point:\n"
    404 );
    405 #endif
    406 
    407 extern void switchback_asm_undotted;
    408 extern void nop_start_point;
    409 extern void nop_end_point;
    410 void switchback ( void )
    411 {
    412    Int i;
    413    /* blargh.  Copy the entire switchback_asm procedure into new
    414       memory on which can can set both write and execute permissions,
    415       so we can poke around with it and then run the results. */
    416 
    417 #if defined(__powerpc64__) // ppc32
    418    UChar* sa_start     = (UChar*)&switchback_asm_undotted;
    419 #else
    420    UChar* sa_start     = (UChar*)&switchback_asm;
    421 #endif
    422    UChar* sa_nop_start = (UChar*)&nop_start_point;
    423    UChar* sa_end       = (UChar*)&nop_end_point;
    424 
    425 #if 0
    426    printf("sa_start     %p\n", sa_start );
    427    printf("sa_nop_start %p\n", sa_nop_start);
    428    printf("sa_end       %p\n", sa_end);
    429 #endif
    430    Int nbytes       = sa_end - sa_start;
    431    Int off_nopstart = sa_nop_start - sa_start;
    432    if (0)
    433       printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart);
    434 
    435    /* copy it into mallocville */
    436    UChar* copy = mymalloc(nbytes);
    437    assert(copy);
    438    for (i = 0; i < nbytes; i++)
    439       copy[i] = sa_start[i];
    440 
    441    UInt* p = (UInt*)(&copy[off_nopstart]);
    442 
    443 #if !defined(__powerpc64__) // ppc32
    444    Addr32 addr_of_nop = (Addr32)p;
    445    Addr32 where_to_go = gst.guest_CIA;
    446    Int    diff = ((Int)where_to_go) - ((Int)addr_of_nop);
    447 
    448 #if 0
    449    printf("addr of first nop = 0x%x\n", addr_of_nop);
    450    printf("where to go       = 0x%x\n", where_to_go);
    451    printf("diff = 0x%x\n", diff);
    452 #endif
    453 
    454 #else // ppc64
    455    Addr64 addr_of_nop = (Addr64)p;
    456    Addr64 where_to_go = gst.guest_CIA;
    457    Long   diff = ((Long)where_to_go) - ((Long)addr_of_nop);
    458 
    459 #if 0
    460    printf("addr of first nop = 0x%llx\n", addr_of_nop);
    461    printf("where to go       = 0x%llx\n", where_to_go);
    462    printf("diff = 0x%llx\n", diff);
    463 #endif
    464 #endif
    465 
    466    if (diff < -0x2000000 || diff >= 0x2000000) {
    467      // we're hosed.  Give up
    468      printf("hosed -- offset too large\n");
    469      assert(0);
    470    }
    471 
    472    sb_helper1 = (HWord)&gst;
    473 #if !defined(__powerpc64__) // ppc32
    474    sb_helper2 = LibVEX_GuestPPC32_get_CR(&gst);
    475    sb_helper3 = LibVEX_GuestPPC32_get_XER(&gst);
    476 #else // ppc64
    477    sb_helper2 = LibVEX_GuestPPC64_get_CR(&gst);
    478    sb_helper3 = LibVEX_GuestPPC64_get_XER(&gst);
    479 #endif
    480 
    481    /* stay sane ... */
    482    assert(p[0] == 24<<26); /* nop */
    483 
    484    /* branch to diff */
    485    p[0] = ((18<<26) | (((diff >> 2) & 0xFFFFFF) << 2) | (0<<1) | (0<<0));
    486 
    487    invalidate_icache( copy, nbytes );
    488 
    489 #if defined(__powerpc64__)
    490    //printf("jumping to %p\n", copy);
    491    { ULong faketoc[3];
    492      void* v;
    493      faketoc[0] = (ULong)copy;
    494      v = &faketoc[0];
    495      ( (void(*)(void)) v )();
    496    }
    497 #else
    498    ( (void(*)(void))copy )();
    499 #endif
    500 }
    501 
    502 #else
    503 #   error "Unknown arch (switchback)"
    504 #endif
    505 
    506 /* -------------------- */
    507 static HWord f, gp, res;
    508 extern void run_translation_asm(void);
    509 
    510 #if defined(__i386__)
    511 asm(
    512 "run_translation_asm:\n"
    513 "   pushal\n"
    514 "   movl gp, %ebp\n"
    515 "   movl f, %eax\n"
    516 "   call *%eax\n"
    517 "   movl %eax, res\n"
    518 "   popal\n"
    519 "   ret\n"
    520 );
    521 
    522 #elif defined(__x86_64__)
    523 asm(
    524 "run_translation_asm:\n"
    525 
    526 "   pushq %rax\n"
    527 "   pushq %rbx\n"
    528 "   pushq %rcx\n"
    529 "   pushq %rdx\n"
    530 "   pushq %rbp\n"
    531 "   pushq %rsi\n"
    532 "   pushq %rdi\n"
    533 "   pushq %r8\n"
    534 "   pushq %r9\n"
    535 "   pushq %r10\n"
    536 "   pushq %r11\n"
    537 "   pushq %r12\n"
    538 "   pushq %r13\n"
    539 "   pushq %r14\n"
    540 "   pushq %r15\n"
    541 
    542 "   movq gp, %rbp\n"
    543 "   movq f, %rax\n"
    544 "   call *%rax\n"
    545 "   movq %rax, res\n"
    546 
    547 "   popq  %r15\n"
    548 "   popq  %r14\n"
    549 "   popq  %r13\n"
    550 "   popq  %r12\n"
    551 "   popq  %r11\n"
    552 "   popq  %r10\n"
    553 "   popq  %r9\n"
    554 "   popq  %r8\n"
    555 "   popq  %rdi\n"
    556 "   popq  %rsi\n"
    557 "   popq  %rbp\n"
    558 "   popq  %rdx\n"
    559 "   popq  %rcx\n"
    560 "   popq  %rbx\n"
    561 "   popq  %rax\n"
    562 
    563 "   ret\n"
    564 );
    565 
    566 #elif defined(__powerpc__)
    567 
    568 #if !defined(__powerpc64__) // ppc32
    569 asm(
    570 "run_translation_asm:\n"
    571 
    572 // create new stack:
    573 // save old sp at first word & update sp
    574 "   stwu 1,-256(1)\n"
    575 
    576 // save LR
    577 "   mflr %r0\n"
    578 "   stw  %r0,260(%r1)\n"
    579 
    580 // leave hole @ 4(%r1) for a callee to save it's LR
    581 // no params
    582 // no need to save non-volatile CR fields
    583 
    584 // store registers to stack: just the callee-saved regs
    585 "   stw %r13,  8(%r1)\n"
    586 "   stw %r14, 12(%r1)\n"
    587 "   stw %r15, 16(%r1)\n"
    588 "   stw %r16, 20(%r1)\n"
    589 "   stw %r17, 24(%r1)\n"
    590 "   stw %r18, 28(%r1)\n"
    591 "   stw %r19, 32(%r1)\n"
    592 "   stw %r20, 36(%r1)\n"
    593 "   stw %r21, 40(%r1)\n"
    594 "   stw %r22, 44(%r1)\n"
    595 "   stw %r23, 48(%r1)\n"
    596 "   stw %r24, 52(%r1)\n"
    597 "   stw %r25, 56(%r1)\n"
    598 "   stw %r26, 60(%r1)\n"
    599 "   stw %r27, 64(%r1)\n"
    600 "   stw %r28, 68(%r1)\n"
    601 "   stw %r29, 72(%r1)\n"
    602 "   stw %r30, 76(%r1)\n"
    603 "   stw %r31, 80(%r1)\n"
    604 
    605 // r31 (guest state ptr) := global var "gp"
    606 "   lis %r31,gp@ha\n"
    607 "   lwz %r31,gp@l(%r31)\n"
    608 
    609 // call translation address in global var "f"
    610 "   lis %r4,f@ha\n"
    611 "   lwz %r4,f@l(%r4)\n"
    612 "   mtctr %r4\n"
    613 "   bctrl\n"
    614 
    615 // save return value (in r3) into global var "res"
    616 "   lis %r5,res@ha\n"
    617 "   stw %r3,res@l(%r5)\n"
    618 
    619 // save possibly modified guest state ptr (r31) in "gp"
    620 "   lis %r5,gp@ha\n"
    621 "   stw %r31,gp@l(%r5)\n"
    622 
    623 // reload registers from stack
    624 "   lwz %r13,  8(%r1)\n"
    625 "   lwz %r14, 12(%r1)\n"
    626 "   lwz %r15, 16(%r1)\n"
    627 "   lwz %r16, 20(%r1)\n"
    628 "   lwz %r17, 24(%r1)\n"
    629 "   lwz %r18, 28(%r1)\n"
    630 "   lwz %r19, 32(%r1)\n"
    631 "   lwz %r20, 36(%r1)\n"
    632 "   lwz %r21, 40(%r1)\n"
    633 "   lwz %r22, 44(%r1)\n"
    634 "   lwz %r23, 48(%r1)\n"
    635 "   lwz %r24, 52(%r1)\n"
    636 "   lwz %r25, 56(%r1)\n"
    637 "   lwz %r26, 60(%r1)\n"
    638 "   lwz %r27, 64(%r1)\n"
    639 "   lwz %r28, 68(%r1)\n"
    640 "   lwz %r29, 72(%r1)\n"
    641 "   lwz %r30, 76(%r1)\n"
    642 "   lwz %r31, 80(%r1)\n"
    643 
    644 // restore LR
    645 "   lwz  %r0,260(%r1)\n"
    646 "   mtlr %r0\n"
    647 
    648 // restore previous stack pointer
    649 "   addi %r1,%r1,256\n"
    650 
    651 // return
    652 "   blr"
    653 );
    654 
    655 #else // ppc64
    656 
    657 asm(
    658 ".text\n"
    659 "   .global run_translation_asm\n"
    660 "   .section \".opd\",\"aw\"\n"
    661 "   .align 3\n"
    662 "run_translation_asm:\n"
    663 "   .quad .run_translation_asm,.TOC.@tocbase,0\n"
    664 "   .previous\n"
    665 "   .type .run_translation_asm,@function\n"
    666 "   .global  .run_translation_asm\n"
    667 ".run_translation_asm:\n"
    668 
    669 // save LR,CTR
    670 "   mflr  %r0\n"
    671 "   std   %r0,16(%r1)\n"
    672 "   mfctr %r0\n"
    673 "   std   %r0,8(%r1)\n"
    674 
    675 // create new stack:
    676 // save old sp at first word & update sp
    677 "   stdu 1,-256(1)\n"
    678 
    679 // leave hole @ 4(%r1) for a callee to save it's LR
    680 // no params
    681 // no need to save non-volatile CR fields
    682 
    683 // store registers to stack: just the callee-saved regs
    684 "   std %r13,  48(%r1)\n"
    685 "   std %r14,  56(%r1)\n"
    686 "   std %r15,  64(%r1)\n"
    687 "   std %r16,  72(%r1)\n"
    688 "   std %r17,  80(%r1)\n"
    689 "   std %r18,  88(%r1)\n"
    690 "   std %r19,  96(%r1)\n"
    691 "   std %r20, 104(%r1)\n"
    692 "   std %r21, 112(%r1)\n"
    693 "   std %r22, 120(%r1)\n"
    694 "   std %r23, 128(%r1)\n"
    695 "   std %r24, 136(%r1)\n"
    696 "   std %r25, 144(%r1)\n"
    697 "   std %r26, 152(%r1)\n"
    698 "   std %r27, 160(%r1)\n"
    699 "   std %r28, 168(%r1)\n"
    700 "   std %r29, 176(%r1)\n"
    701 "   std %r30, 184(%r1)\n"
    702 "   std %r31, 192(%r1)\n"
    703 
    704 // r31 (guest state ptr) := global var "gp"
    705 "   lis    %r31,gp@highest\n"
    706 "   ori    %r31,%r31,gp@higher\n"
    707 "   rldicr %r31,%r31,32,31\n"
    708 "   oris   %r31,%r31,gp@h\n"
    709 "   ori    %r31,%r31,gp@l\n"
    710 "   ld     %r31,0(%r31)\n"
    711 
    712 // call translation address in global var "f"
    713 "   lis    %r4,f@highest\n"
    714 "   ori    %r4,%r4,f@higher\n"
    715 "   rldicr %r4,%r4,32,31\n"
    716 "   oris   %r4,%r4,f@h\n"
    717 "   ori    %r4,%r4,f@l\n"
    718 "   ld     %r4,0(%r4)\n"
    719 "   mtctr  %r4\n"
    720 "   bctrl\n"
    721 
    722 // save return value (in r3) into global var "res"
    723 "   lis    %r5,res@highest\n"
    724 "   ori    %r5,%r5,res@higher\n"
    725 "   rldicr %r5,%r5,32,31\n"
    726 "   oris   %r5,%r5,res@h\n"
    727 "   ori    %r5,%r5,res@l\n"
    728 "   std    %r3,0(%r5)\n"
    729 
    730 // save possibly modified guest state ptr (r31) in "gp"
    731 "   lis    %r5,gp@highest\n"
    732 "   ori    %r5,%r5,gp@higher\n"
    733 "   rldicr %r5,%r5,32,31\n"
    734 "   oris   %r5,%r5,gp@h\n"
    735 "   ori    %r5,%r5,gp@l\n"
    736 "   std    %r31,0(%r5)\n"
    737 
    738 // reload registers from stack
    739 "   ld %r13,  48(%r1)\n"
    740 "   ld %r14,  56(%r1)\n"
    741 "   ld %r15,  64(%r1)\n"
    742 "   ld %r16,  72(%r1)\n"
    743 "   ld %r17,  80(%r1)\n"
    744 "   ld %r18,  88(%r1)\n"
    745 "   ld %r19,  96(%r1)\n"
    746 "   ld %r20, 104(%r1)\n"
    747 "   ld %r21, 112(%r1)\n"
    748 "   ld %r22, 120(%r1)\n"
    749 "   ld %r23, 128(%r1)\n"
    750 "   ld %r24, 136(%r1)\n"
    751 "   ld %r25, 144(%r1)\n"
    752 "   ld %r26, 152(%r1)\n"
    753 "   ld %r27, 160(%r1)\n"
    754 "   ld %r28, 168(%r1)\n"
    755 "   ld %r29, 176(%r1)\n"
    756 "   ld %r30, 184(%r1)\n"
    757 "   ld %r31, 192(%r1)\n"
    758 
    759 // restore previous stack pointer
    760 "   addi %r1,%r1,256\n"
    761 
    762 // restore LR,CTR
    763 "   ld    %r0,16(%r1)\n"
    764 "   mtlr  %r0\n"
    765 "   ld    %r0,8(%r1)\n"
    766 "   mtctr %r0\n"
    767 
    768 // return
    769 "   blr"
    770 );
    771 #endif
    772 
    773 #else
    774 
    775 #   error "Unknown arch"
    776 #endif
    777 
    778 /* Run a translation at host address 'translation'.  Return
    779    True if Vex asked for an translation cache flush as a result.
    780 */
    781 Bool run_translation ( HWord translation )
    782 {
    783    if (0 && DEBUG_TRACE_FLAGS) {
    784       printf(" run translation %p\n", (void*)translation );
    785       printf(" simulated bb: %llu\n", n_bbs_done);
    786    }
    787    f = translation;
    788    gp = (HWord)&gst;
    789    run_translation_asm();
    790    gst.GuestPC = res;
    791    n_bbs_done ++;
    792    return gp==VEX_TRC_JMP_TINVAL;
    793 }
    794 
    795 HWord find_translation ( Addr64 guest_addr )
    796 {
    797    Int i;
    798    HWord __res;
    799    if (0)
    800       printf("find translation %p ... ", ULong_to_Ptr(guest_addr));
    801    for (i = 0; i < trans_table_used; i++)
    802      if (trans_table[i].base[0] == guest_addr)
    803         break;
    804    if (i == trans_table_used) {
    805       if (0) printf("none\n");
    806       return 0; /* not found */
    807    }
    808 
    809    /* Move this translation one step towards the front, so finding it
    810       next time round is just that little bit cheaper. */
    811    if (i > 2) {
    812       VexGuestExtents tmpE = trans_table[i-1];
    813       ULong*          tmpP = trans_tableP[i-1];
    814       trans_table[i-1]  = trans_table[i];
    815       trans_tableP[i-1] = trans_tableP[i];
    816       trans_table[i] = tmpE;
    817       trans_tableP[i] = tmpP;
    818       i--;
    819    }
    820 
    821    __res = (HWord)trans_tableP[i];
    822    if (0) printf("%p\n", (void*)__res);
    823    return __res;
    824 }
    825 
    826 #define N_TRANSBUF 5000
    827 static UChar transbuf[N_TRANSBUF];
    828 void make_translation ( Addr64 guest_addr, Bool verbose )
    829 {
    830    VexTranslateArgs   vta;
    831    VexTranslateResult tres;
    832    VexArchInfo vex_archinfo;
    833    Int trans_used, i, ws_needed;
    834 
    835    if (trans_table_used >= N_TRANS_TABLE
    836        || trans_cache_used >= N_TRANS_CACHE-1000) {
    837       /* If things are looking to full, just dump
    838          all the translations. */
    839       trans_cache_used = 0;
    840       trans_table_used = 0;
    841    }
    842 
    843    assert(trans_table_used < N_TRANS_TABLE);
    844    if (0)
    845       printf("make translation %p\n", ULong_to_Ptr(guest_addr));
    846 
    847    LibVEX_default_VexArchInfo(&vex_archinfo);
    848    vex_archinfo.subarch = VexSubArch;
    849    vex_archinfo.ppc_cache_line_szB = CacheLineSize;
    850 
    851    /* */
    852    vta.arch_guest       = VexArch;
    853    vta.archinfo_guest   = vex_archinfo;
    854    vta.arch_host        = VexArch;
    855    vta.archinfo_host    = vex_archinfo;
    856    vta.guest_bytes      = (UChar*)ULong_to_Ptr(guest_addr);
    857    vta.guest_bytes_addr = (Addr64)guest_addr;
    858    vta.guest_bytes_addr_noredir = (Addr64)guest_addr;
    859    vta.chase_into_ok    = chase_into_ok;
    860 //   vta.guest_extents    = &vge;
    861    vta.guest_extents    = &trans_table[trans_table_used];
    862    vta.host_bytes       = transbuf;
    863    vta.host_bytes_size  = N_TRANSBUF;
    864    vta.host_bytes_used  = &trans_used;
    865    vta.instrument1      = NULL;
    866    vta.instrument2      = NULL;
    867    vta.do_self_check    = False;
    868    vta.traceflags       = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS;
    869    vta.dispatch         = NULL;
    870 
    871    tres = LibVEX_Translate ( &vta );
    872 
    873    assert(tres == VexTransOK);
    874    ws_needed = (trans_used+7) / 8;
    875    assert(ws_needed > 0);
    876    assert(trans_cache_used + ws_needed < N_TRANS_CACHE);
    877    n_translations_made++;
    878 
    879    for (i = 0; i < trans_used; i++) {
    880       HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i;
    881       HChar* src = (HChar*)(&transbuf[i]);
    882       *dst = *src;
    883    }
    884 
    885 #if defined(__powerpc__)
    886    invalidate_icache( &trans_cache[trans_cache_used], trans_used );
    887 #endif
    888 
    889    trans_tableP[trans_table_used] = &trans_cache[trans_cache_used];
    890    trans_table_used++;
    891    trans_cache_used += ws_needed;
    892 }
    893 
    894 
    895 static Bool overlap ( Addr64 start, UInt len, VexGuestExtents* vge )
    896 {
    897    Int i;
    898    for (i = 0; i < vge->n_used; i++) {
    899      if (vge->base[i]+vge->len[i] <= start
    900          || vge->base[i] >= start+len) {
    901        /* ok */
    902      } else {
    903         return True;
    904      }
    905    }
    906    return False; /* no overlap */
    907 }
    908 
    909 static void dump_translations ( Addr64 start, UInt len )
    910 {
    911    Int i, j;
    912    j = 0;
    913    for (i = 0; i < trans_table_used; i++) {
    914       if (overlap(start, len, &trans_table[i])) {
    915          /* do nothing */
    916       } else {
    917          assert(j <= i);
    918          trans_table[j] = trans_table[i];
    919          trans_tableP[j] = trans_tableP[i];
    920 	 j++;
    921       }
    922    }
    923    assert(j >= 0 && j <= trans_table_used);
    924    if (0) printf("dumped %d translations\n", trans_table_used - j);
    925    trans_table_used = j;
    926 }
    927 
    928 
    929 static ULong  stopAfter = 0;
    930 static UChar* entryP    = NULL;
    931 
    932 
    933 __attribute__ ((noreturn))
    934 static
    935 void failure_exit ( void )
    936 {
    937    fprintf(stdout, "VEX did failure_exit.  Bye.\n");
    938    fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done);
    939    exit(1);
    940 }
    941 
    942 static
    943 void log_bytes ( HChar* bytes, Int nbytes )
    944 {
    945    fwrite ( bytes, 1, nbytes, stdout );
    946    fflush ( stdout );
    947 }
    948 
    949 
    950 /* run simulated code forever (it will exit by calling
    951    serviceFn(0)). */
    952 static void run_simulator ( void )
    953 {
    954    static Addr64 last_guest = 0;
    955    Addr64 next_guest;
    956    HWord next_host;
    957    Bool need_inval;
    958    while (1) {
    959       next_guest = gst.GuestPC;
    960 
    961       if (0)
    962          printf("\nnext_guest: 0x%x\n", (UInt)next_guest);
    963 
    964 #if defined(__powerpc64__)
    965       if (next_guest == Ptr_to_ULong( (void*)(*(ULong*)(&serviceFn)) )) {
    966 #else
    967       if (next_guest == Ptr_to_ULong(&serviceFn)) {
    968 #endif
    969          /* "do" the function call to serviceFn */
    970 #        if defined(__i386__)
    971          {
    972             HWord esp = gst.guest_ESP;
    973             gst.guest_EIP = *(UInt*)(esp+0);
    974             gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) );
    975             gst.guest_ESP = esp+4;
    976             next_guest = gst.guest_EIP;
    977          }
    978 #        elif defined(__x86_64__)
    979          {
    980             HWord esp = gst.guest_RSP;
    981             gst.guest_RIP = *(UInt*)(esp+0);
    982             gst.guest_RAX = serviceFn( gst.guest_RDI, gst.guest_RSI );
    983             gst.guest_RSP = esp+8;
    984             next_guest = gst.guest_RIP;
    985          }
    986 #        elif defined(__powerpc__)
    987          {
    988             gst.guest_GPR3 = serviceFn( gst.guest_GPR3, gst.guest_GPR4 );
    989             gst.guest_CIA  = gst.guest_LR;
    990             next_guest     = gst.guest_CIA;
    991          }
    992 #        else
    993 #        error "Unknown arch"
    994 #        endif
    995       }
    996 
    997       next_host = find_translation(next_guest);
    998       if (next_host == 0) {
    999          make_translation(next_guest,False);
   1000          next_host = find_translation(next_guest);
   1001          assert(next_host != 0);
   1002       }
   1003 
   1004       // Switchback
   1005       if (n_bbs_done == stopAfter) {
   1006          printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done);
   1007 #if 1
   1008          if (last_guest) {
   1009             printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1);
   1010             make_translation(last_guest,True);
   1011          }
   1012 #endif
   1013 #if 0
   1014          if (next_guest) {
   1015             printf("\n*** Current translation (bb:%llu):\n", n_bbs_done);
   1016             make_translation(next_guest,True);
   1017          }
   1018 #endif
   1019          printf("---  end SWITCHBACK at bb:%llu ---\n", n_bbs_done);
   1020          switchback();
   1021          assert(0); /*NOTREACHED*/
   1022       }
   1023 
   1024       last_guest = next_guest;
   1025       need_inval = run_translation(next_host);
   1026       if (need_inval) {
   1027 #if defined(__powerpc__)
   1028          dump_translations( (Addr64)gst.guest_TISTART, gst.guest_TILEN );
   1029 	 if (0) printf("dump translations done\n");
   1030 #endif
   1031       }
   1032    }
   1033 }
   1034 
   1035 
   1036 static void usage ( void )
   1037 {
   1038    printf("usage: switchback #bbs\n");
   1039    printf("   - begins switchback for basic block #bbs\n");
   1040    printf("   - use -1 for largest possible run without switchback\n\n");
   1041    exit(1);
   1042 }
   1043 
   1044 #if defined(__powerpc__)
   1045 
   1046 #if !defined(__powerpc64__) // ppc32
   1047 UInt saved_R2;
   1048 asm(
   1049 "get_R2:\n"
   1050 "   lis  %r10,saved_R2@ha\n"
   1051 "   stw  %r2,saved_R2@l(%r10)\n"
   1052 "   blr\n"
   1053 );
   1054 #else // ppc64
   1055 ULong saved_R2;
   1056 ULong saved_R13;
   1057 asm(
   1058 ".text\n"
   1059 "   .global get_R2\n"
   1060 "   .section \".opd\",\"aw\"\n"
   1061 "   .align 3\n"
   1062 "get_R2:\n"
   1063 "   .quad .get_R2,.TOC.@tocbase,0\n"
   1064 "   .previous\n"
   1065 "   .type .get_R2,@function\n"
   1066 "   .global  .get_R2\n"
   1067 ".get_R2:\n"
   1068 "   lis    %r10,saved_R2@highest\n"
   1069 "   ori    %r10,%r10,saved_R2@higher\n"
   1070 "   rldicr %r10,%r10,32,31\n"
   1071 "   oris   %r10,%r10,saved_R2@h\n"
   1072 "   ori    %r10,%r10,saved_R2@l\n"
   1073 "   std    %r2,0(%r10)\n"
   1074 "   blr\n"
   1075 );
   1076 asm(
   1077 ".text\n"
   1078 "   .global get_R13\n"
   1079 "   .section \".opd\",\"aw\"\n"
   1080 "   .align 3\n"
   1081 "get_R13:\n"
   1082 "   .quad .get_R13,.TOC.@tocbase,0\n"
   1083 "   .previous\n"
   1084 "   .type .get_R13,@function\n"
   1085 "   .global  .get_R13\n"
   1086 ".get_R13:\n"
   1087 "   lis    %r10,saved_R13@highest\n"
   1088 "   ori    %r10,%r10,saved_R13@higher\n"
   1089 "   rldicr %r10,%r10,32,31\n"
   1090 "   oris   %r10,%r10,saved_R13@h\n"
   1091 "   ori    %r10,%r10,saved_R13@l\n"
   1092 "   std    %r13,0(%r10)\n"
   1093 "   blr\n"
   1094 );
   1095 #endif
   1096 extern void get_R2 ( void );
   1097 extern void get_R13 ( void );
   1098 #endif
   1099 
   1100 int main ( Int argc, HChar** argv )
   1101 {
   1102    if (argc != 2)
   1103       usage();
   1104 
   1105    stopAfter = (ULong)atoll(argv[1]);
   1106 
   1107    extern void entry ( void*(*service)(int,int) );
   1108    entryP = (UChar*)&entry;
   1109 
   1110    if (!entryP) {
   1111       printf("switchback: can't find entry point\n");
   1112       exit(1);
   1113    }
   1114 
   1115    LibVEX_default_VexControl(&vcon);
   1116    vcon.guest_max_insns=50;
   1117    vcon.guest_chase_thresh=0;
   1118    vcon.iropt_level=2;
   1119 
   1120    LibVEX_Init( failure_exit, log_bytes, 1, False, &vcon );
   1121    LibVEX_Guest_initialise(&gst);
   1122 
   1123    /* set up as if a call to the entry point passing serviceFn as
   1124       the one and only parameter */
   1125 #  if defined(__i386__)
   1126    gst.guest_EIP = (UInt)entryP;
   1127    gst.guest_ESP = (UInt)&gstack[25000];
   1128    *(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn;
   1129    *(UInt*)(gst.guest_ESP+0) = 0x12345678;
   1130 #  elif defined(__x86_64__)
   1131    gst.guest_RIP = (ULong)entryP;
   1132    gst.guest_RSP = (ULong)&gstack[25000];
   1133    gst.guest_RDI = (ULong)serviceFn;
   1134    *(ULong*)(gst.guest_RSP+0) = 0x12345678AABBCCDDULL;
   1135 #  elif defined(__powerpc__)
   1136    get_R2();
   1137 
   1138 #if !defined(__powerpc64__) // ppc32
   1139    gst.guest_CIA   = (UInt)entryP;
   1140    gst.guest_GPR1  = (UInt)&gstack[25000]; /* stack pointer */
   1141    gst.guest_GPR3  = (UInt)serviceFn; /* param to entry */
   1142    gst.guest_GPR2  = saved_R2;
   1143    gst.guest_LR    = 0x12345678; /* bogus return address */
   1144 #else // ppc64
   1145    get_R13();
   1146    gst.guest_CIA   = * (ULong*)entryP;
   1147    gst.guest_GPR1  = (ULong)&gstack[25000]; /* stack pointer */
   1148    gst.guest_GPR3  = (ULong)serviceFn;      /* param to entry */
   1149    gst.guest_GPR2  = saved_R2;
   1150    gst.guest_GPR13 = saved_R13;
   1151    gst.guest_LR    = 0x1234567812345678ULL; /* bogus return address */
   1152 //   printf("setting CIA to %p\n", (void*)gst.guest_CIA);
   1153 #endif
   1154 
   1155 #  else
   1156 #  error "Unknown arch"
   1157 #  endif
   1158 
   1159    printf("\n---START---\n");
   1160 
   1161 #if 1
   1162    run_simulator();
   1163 #else
   1164    ( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn);
   1165 #endif
   1166 
   1167 
   1168    return 0;
   1169 }
   1170