Home | History | Annotate | Download | only in switchback
      1 
      2 /* HOW TO USE
      3 
      4 13 Dec '05 - Linker no longer used (apart from mymalloc)
      5 Simply compile and link switchback.c with test_xxx.c,
      6 e.g. for ppc64:
      7 $ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c
      8 
      9 Test file test_xxx.c must have an entry point called "entry",
     10 which expects to take a single argument which is a function pointer
     11 (to "serviceFn").
     12 
     13 Test file may not reference any other symbols.
     14 
     15 NOTE: POWERPC: it is critical, when using this on ppc, to set
     16 CacheLineSize to the right value.  Values we currently know of:
     17 
     18    imac (G3):   32
     19    G5 (ppc970): 128
     20 
     21 ARM64:
     22   (cd .. && make -f Makefile-gcc libvex-arm64-linux.a) \
     23      && $CC -Wall -O -g -o switchback switchback.c linker.c \
     24      ../libvex-arm64-linux.a test_emfloat.c
     25 */
     26 
     27 #include <stdio.h>
     28 #include <assert.h>
     29 #include <stdlib.h>
     30 #include <string.h>
     31 #include <sys/types.h>
     32 #include <sys/stat.h>
     33 #include <unistd.h>
     34 
     35 #include "../pub/libvex_basictypes.h"
     36 #include "../pub/libvex_guest_x86.h"
     37 #include "../pub/libvex_guest_amd64.h"
     38 #include "../pub/libvex_guest_ppc32.h"
     39 #include "../pub/libvex_guest_ppc64.h"
     40 #include "../pub/libvex_guest_arm64.h"
     41 #include "../pub/libvex.h"
     42 #include "../pub/libvex_trc_values.h"
     43 #include "linker.h"
     44 
     45 static ULong n_bbs_done = 0;
     46 static Int   n_translations_made = 0;
     47 
     48 
     49 #if defined(__i386__)
     50 #  define VexGuestState             VexGuestX86State
     51 #  define LibVEX_Guest_initialise   LibVEX_GuestX86_initialise
     52 #  define VexArch                   VexArchX86
     53 #  define VexSubArch                VexSubArchX86_sse1
     54 #  define GuestPC                   guest_EIP
     55 #  define CacheLineSize             0/*irrelevant*/
     56 
     57 #elif defined(__aarch64__) && !defined(__arm__)
     58 #  define VexGuestState             VexGuestARM64State
     59 #  define LibVEX_Guest_initialise   LibVEX_GuestARM64_initialise
     60 #  define VexArch                   VexArchARM64
     61 #  define VexSubArch                VexSubArch_NONE
     62 #  define GuestPC                   guest_PC
     63 #  define CacheLineSize             0/*irrelevant*/
     64 
     65 #else
     66 #   error "Unknown arch"
     67 #endif
     68 
     69 /* 7: show conversion into IR */
     70 /* 6: show after initial opt */
     71 /* 5: show after instrumentation */
     72 /* 4: show after second opt */
     73 /* 3: show after tree building */
     74 /* 2: show selected insns */
     75 /* 1: show after reg-alloc */
     76 /* 0: show final assembly */
     77 #define TEST_FLAGS ((1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0))
     78 #define DEBUG_TRACE_FLAGS ((0<<7)|(0<<6)|(0<<5)|(0<<4)| \
     79                            (0<<3)|(0<<2)|(0<<1)|(0<<0))
     80 
     81 typedef  unsigned long int  Addr;
     82 
     83 
     84 /* guest state */
     85 ULong gstack[64000] __attribute__((aligned(16)));
     86 VexGuestState gst;
     87 VexControl vcon;
     88 
     89 /* only used for the switchback transition */
     90 /* i386:  helper1 = &gst, helper2 = %EFLAGS */
     91 /* amd64: helper1 = &gst, helper2 = %EFLAGS */
     92 /* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */
     93 /* arm64: helper1 = &gst, helper2 = 32x0:NZCV:28x0 */
     94 HWord sb_helper1 = 0;
     95 HWord sb_helper2 = 0;
     96 HWord sb_helper3 = 0;
     97 
     98 /* translation cache */
     99 #define N_TRANS_CACHE 1000000
    100 #define N_TRANS_TABLE 10000
    101 
    102 ULong trans_cache[N_TRANS_CACHE];
    103 VexGuestExtents trans_table [N_TRANS_TABLE];
    104 ULong*          trans_tableP[N_TRANS_TABLE];
    105 
    106 Int trans_cache_used = 0;
    107 Int trans_table_used = 0;
    108 
    109 static Bool chase_into_ok ( void* opaque, Addr64 dst ) {
    110    return False;
    111 }
    112 
    113 static UInt needs_self_check ( void* opaque, const VexGuestExtents* vge ) {
    114    return 0;
    115 }
    116 
    117 
    118 /* For providing services. */
    119 static HWord serviceFn ( HWord arg1, HWord arg2 )
    120 {
    121    switch (arg1) {
    122       case 0: /* EXIT */
    123          printf("---STOP---\n");
    124          printf("serviceFn:EXIT\n");
    125 	 printf("%llu bbs simulated\n", n_bbs_done);
    126 	 printf("%d translations made, %d tt bytes\n",
    127                 n_translations_made, 8*trans_cache_used);
    128          exit(0);
    129       case 1: /* PUTC */
    130          putchar(arg2);
    131          return 0;
    132       case 2: /* MALLOC */
    133          return (HWord)malloc(arg2);
    134       case 3: /* FREE */
    135          free((void*)arg2);
    136          return 0;
    137       default:
    138          assert(0);
    139    }
    140 }
    141 
    142 
    143 // needed for arm64 ?
    144 static void invalidate_icache(void *ptr, unsigned long nbytes)
    145 {
    146    // This function, invalidate_icache, for arm64_linux,
    147    // is copied from
    148    // https://github.com/armvixl/vixl/blob/master/src/a64/cpu-a64.cc
    149    // which has the following copyright notice:
    150    /*
    151    Copyright 2013, ARM Limited
    152    All rights reserved.
    153 
    154    Redistribution and use in source and binary forms, with or without
    155    modification, are permitted provided that the following conditions are met:
    156 
    157    * Redistributions of source code must retain the above copyright notice,
    158      this list of conditions and the following disclaimer.
    159    * Redistributions in binary form must reproduce the above copyright notice,
    160      this list of conditions and the following disclaimer in the documentation
    161      and/or other materials provided with the distribution.
    162    * Neither the name of ARM Limited nor the names of its contributors may be
    163      used to endorse or promote products derived from this software without
    164      specific prior written permission.
    165 
    166    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
    167    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    168    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    169    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
    170    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    171    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    172    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    173    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    174    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    175    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    176    */
    177 
    178    // Ask what the I and D line sizes are
    179    UInt cache_type_register;
    180    // Copy the content of the cache type register to a core register.
    181    __asm__ __volatile__ ("mrs %[ctr], ctr_el0" // NOLINT
    182                          : [ctr] "=r" (cache_type_register));
    183 
    184    const Int kDCacheLineSizeShift = 16;
    185    const Int kICacheLineSizeShift = 0;
    186    const UInt kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
    187    const UInt kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
    188 
    189    // The cache type register holds the size of the I and D caches as a power of
    190    // two.
    191    const UInt dcache_line_size_power_of_two =
    192        (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
    193    const UInt icache_line_size_power_of_two =
    194        (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
    195 
    196    const UInt dcache_line_size_ = 1 << dcache_line_size_power_of_two;
    197    const UInt icache_line_size_ = 1 << icache_line_size_power_of_two;
    198 
    199    Addr start = (Addr)ptr;
    200    // Sizes will be used to generate a mask big enough to cover a pointer.
    201    Addr dsize = (Addr)dcache_line_size_;
    202    Addr isize = (Addr)icache_line_size_;
    203 
    204    // Cache line sizes are always a power of 2.
    205    Addr dstart = start & ~(dsize - 1);
    206    Addr istart = start & ~(isize - 1);
    207    Addr end    = start + nbytes;
    208 
    209    __asm__ __volatile__ (
    210      // Clean every line of the D cache containing the target data.
    211      "0: \n\t"
    212      // dc : Data Cache maintenance
    213      // c : Clean
    214      // va : by (Virtual) Address
    215      // u : to the point of Unification
    216      // The point of unification for a processor is the point by which the
    217      // instruction and data caches are guaranteed to see the same copy of a
    218      // memory location. See ARM DDI 0406B page B2-12 for more information.
    219      "dc cvau, %[dline] \n\t"
    220      "add %[dline], %[dline], %[dsize] \n\t"
    221      "cmp %[dline], %[end] \n\t"
    222      "b.lt 0b \n\t"
    223      // Barrier to make sure the effect of the code above is visible to the rest
    224      // of the world.
    225      // dsb : Data Synchronisation Barrier
    226      // ish : Inner SHareable domain
    227      // The point of unification for an Inner Shareable shareability domain is
    228      // the point by which the instruction and data caches of all the processors
    229      // in that Inner Shareable shareability domain are guaranteed to see the
    230      // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
    231      // information.
    232      "dsb ish \n\t"
    233      // Invalidate every line of the I cache containing the target data.
    234      "1: \n\t"
    235      // ic : instruction cache maintenance
    236      // i : invalidate
    237      // va : by address
    238      // u : to the point of unification
    239      "ic ivau, %[iline] \n\t"
    240      "add %[iline], %[iline], %[isize] \n\t"
    241      "cmp %[iline], %[end] \n\t"
    242      "b.lt 1b \n\t"
    243      // Barrier to make sure the effect of the code above is visible to the rest
    244      // of the world.
    245      "dsb ish \n\t"
    246      // Barrier to ensure any prefetching which happened before this code is
    247      // discarded.
    248      // isb : Instruction Synchronisation Barrier
    249      "isb \n\t"
    250      : [dline] "+r" (dstart),
    251        [iline] "+r" (istart)
    252      : [dsize] "r" (dsize),
    253        [isize] "r" (isize),
    254        [end] "r" (end)
    255      // This code does not write to memory but without the dependency gcc might
    256      // move this code before the code is generated.
    257      : "cc", "memory"
    258    );
    259 
    260 }
    261 
    262 
    263 /* -------------------- */
    264 /* continue execution on the real CPU (never returns) */
    265 
    266 #if defined(__i386__)
    267 
    268 extern void switchback_asm(void);
    269 asm(
    270 "switchback_asm:\n"
    271 "   movl sb_helper1, %eax\n"  // eax = guest state ptr
    272 "   movl  16(%eax), %esp\n"   // switch stacks
    273 "   pushl 56(%eax)\n"         // push continuation addr
    274 "   movl sb_helper2, %ebx\n"  // get eflags
    275 "   pushl %ebx\n"             // eflags:CA
    276 "   pushl 0(%eax)\n"          //  EAX:eflags:CA
    277 "   movl 4(%eax), %ecx\n"
    278 "   movl 8(%eax), %edx\n"
    279 "   movl 12(%eax), %ebx\n"
    280 "   movl 20(%eax), %ebp\n"
    281 "   movl 24(%eax), %esi\n"
    282 "   movl 28(%eax), %edi\n"
    283 "   popl %eax\n"
    284 "   popfl\n"
    285 "   ret\n"
    286 );
    287 void switchback ( void )
    288 {
    289    sb_helper1 = (HWord)&gst;
    290    sb_helper2 = LibVEX_GuestX86_get_eflags(&gst);
    291    switchback_asm(); // never returns
    292 }
    293 
    294 #elif defined(__aarch64__)
    295 
    296 extern void switchback_asm(HWord x0_gst, HWord x1_pstate);
    297 asm(
    298 "switchback_asm:"
    299 "   mrs x30, nzcv"  "\n"
    300 "   and x30, x30, #0xFFFFFFFF0FFFFFFF"  "\n"
    301 "   and x1,  x1,  #0x00000000F0000000"  "\n"
    302 "   orr x30, x30, x1"  "\n"
    303 "   msr nzcv, x30"  "\n"
    304 
    305 "   ldr x30, [x0, #16 + 8*37]"  "\n"
    306 "   msr tpidr_el0, x30"  "\n"
    307 
    308 "   ldr x30, [x0, #16 + 8*31]"  "\n"
    309 "   mov sp,  x30"  "\n"
    310 
    311 "   add x30, x0, #(16 + 8*38 + 16*0)"  "\n"
    312 "   ldr q0,  [x30], #16"   "\n"
    313 "   ldr q1,  [x30], #16"   "\n"
    314 "   ldr q2,  [x30], #16"   "\n"
    315 "   ldr q3,  [x30], #16"   "\n"
    316 "   ldr q4,  [x30], #16"   "\n"
    317 "   ldr q5,  [x30], #16"   "\n"
    318 "   ldr q6,  [x30], #16"   "\n"
    319 "   ldr q7,  [x30], #16"   "\n"
    320 "   ldr q8,  [x30], #16"   "\n"
    321 "   ldr q9,  [x30], #16"   "\n"
    322 "   ldr q10, [x30], #16"   "\n"
    323 "   ldr q11, [x30], #16"   "\n"
    324 "   ldr q12, [x30], #16"   "\n"
    325 "   ldr q13, [x30], #16"   "\n"
    326 "   ldr q14, [x30], #16"   "\n"
    327 "   ldr q15, [x30], #16"   "\n"
    328 "   ldr q16, [x30], #16"   "\n"
    329 "   ldr q17, [x30], #16"   "\n"
    330 "   ldr q18, [x30], #16"   "\n"
    331 "   ldr q19, [x30], #16"   "\n"
    332 "   ldr q20, [x30], #16"   "\n"
    333 "   ldr q21, [x30], #16"   "\n"
    334 "   ldr q22, [x30], #16"   "\n"
    335 "   ldr q23, [x30], #16"   "\n"
    336 "   ldr q24, [x30], #16"   "\n"
    337 "   ldr q25, [x30], #16"   "\n"
    338 "   ldr q26, [x30], #16"   "\n"
    339 "   ldr q27, [x30], #16"   "\n"
    340 "   ldr q28, [x30], #16"   "\n"
    341 "   ldr q29, [x30], #16"   "\n"
    342 "   ldr q30, [x30], #16"   "\n"
    343 "   ldr q31, [x30], #16"   "\n"
    344 
    345 "   ldr x30, [x0, #16+8*30]"  "\n"
    346 "   ldr x29, [x0, #16+8*29]"  "\n"
    347 "   ldr x28, [x0, #16+8*28]"  "\n"
    348 "   ldr x27, [x0, #16+8*27]"  "\n"
    349 "   ldr x26, [x0, #16+8*26]"  "\n"
    350 "   ldr x25, [x0, #16+8*25]"  "\n"
    351 "   ldr x24, [x0, #16+8*24]"  "\n"
    352 "   ldr x23, [x0, #16+8*23]"  "\n"
    353 "   ldr x22, [x0, #16+8*22]"  "\n"
    354 "   ldr x21, [x0, #16+8*21]"  "\n"
    355 "   ldr x20, [x0, #16+8*20]"  "\n"
    356 "   ldr x19, [x0, #16+8*19]"  "\n"
    357 "   ldr x18, [x0, #16+8*18]"  "\n"
    358 "   ldr x17, [x0, #16+8*17]"  "\n"
    359 "   ldr x16, [x0, #16+8*16]"  "\n"
    360 "   ldr x15, [x0, #16+8*15]"  "\n"
    361 "   ldr x14, [x0, #16+8*14]"  "\n"
    362 "   ldr x13, [x0, #16+8*13]"  "\n"
    363 "   ldr x12, [x0, #16+8*12]"  "\n"
    364 "   ldr x11, [x0, #16+8*11]"  "\n"
    365 "   ldr x10, [x0, #16+8*10]"  "\n"
    366 "   ldr x9,  [x0, #16+8*9]"   "\n"
    367 "   ldr x8,  [x0, #16+8*8]"   "\n"
    368 "   ldr x7,  [x0, #16+8*7]"   "\n"
    369 "   ldr x6,  [x0, #16+8*6]"   "\n"
    370 "   ldr x5,  [x0, #16+8*5]"   "\n"
    371 "   ldr x4,  [x0, #16+8*4]"   "\n"
    372 "   ldr x3,  [x0, #16+8*3]"   "\n"
    373 "   ldr x2,  [x0, #16+8*2]"   "\n"
    374 "   ldr x1,  [x0, #16+8*1]"   "\n"
    375 "   ldr x0,  [x0, #16+8*0]"   "\n"
    376 
    377 "nop_start_point:"            "\n"
    378 "   nop"  "\n" // this will be converted into a relative jump
    379 "nop_end_point:"              "\n"
    380 );
    381 
    382 extern void nop_start_point(void);
    383 extern void nop_end_point(void);
    384 
    385 void switchback ( void )
    386 {
    387   assert(offsetof(VexGuestARM64State, guest_X0)  == 16 + 8*0);
    388   assert(offsetof(VexGuestARM64State, guest_X30) == 16 + 8*30);
    389   assert(offsetof(VexGuestARM64State, guest_SP)  == 16 + 8*31);
    390   assert(offsetof(VexGuestARM64State, guest_TPIDR_EL0) == 16 + 8*37);
    391   assert(offsetof(VexGuestARM64State, guest_Q0)  == 16 + 8*38 + 16*0);
    392 
    393   HWord arg0 = (HWord)&gst;
    394   HWord arg1 = LibVEX_GuestARM64_get_nzcv(&gst);
    395 
    396   /* Copy the entire switchback_asm procedure into writable and
    397      executable memory. */
    398 
    399   UChar* sa_start     = (UChar*)&switchback_asm;
    400   UChar* sa_nop_start = (UChar*)&nop_start_point;
    401   UChar* sa_end       = (UChar*)&nop_end_point;
    402 
    403   Int i;
    404   Int nbytes       = sa_end - sa_start;
    405   Int off_nopstart = sa_nop_start - sa_start;
    406   if (0)
    407      printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart);
    408 
    409    /* copy it into mallocville */
    410    UChar* copy = mymalloc(nbytes);
    411    assert(copy);
    412    for (i = 0; i < nbytes; i++)
    413       copy[i] = sa_start[i];
    414 
    415    UInt* p = (UInt*)(&copy[off_nopstart]);
    416 
    417    Addr addr_of_nop = (Addr)p;
    418    Addr where_to_go = gst.guest_PC;
    419    Long   diff = ((Long)where_to_go) - ((Long)addr_of_nop);
    420 
    421    if (0) {
    422      printf("addr of first nop = 0x%llx\n", addr_of_nop);
    423      printf("where to go       = 0x%llx\n", where_to_go);
    424      printf("diff = 0x%llx\n", diff);
    425    }
    426 
    427    if (diff < -0x8000000LL || diff >= 0x8000000LL) {
    428      // we're hosed.  Give up
    429      printf("hosed -- offset too large\n");
    430      assert(0);
    431    }
    432 
    433    /* stay sane ... */
    434    assert(p[0] == 0xd503201f); /* nop */
    435 
    436    /* branch to diff */
    437    p[0] = 0x14000000 | ((diff >> 2) & 0x3FFFFFF);
    438 
    439    invalidate_icache( copy, nbytes );
    440 
    441    ( (void(*)(HWord,HWord))copy )(arg0, arg1);
    442 }
    443 
    444 #else
    445 # error "Unknown plat"
    446 #endif
    447 
    448 
    449 
    450 /* -------------------- */
    451 // f    holds is the host code address
    452 // gp   holds the guest state pointer to use
    453 // res  is to hold the result.  Or some such.
    454 static HWord block[2]; // f, gp;
    455 extern HWord run_translation_asm(void);
    456 
    457 extern void disp_chain_assisted(void);
    458 
    459 #if defined(__aarch64__)
    460 asm(
    461 "run_translation_asm:"            "\n"
    462 "   stp  x29, x30, [sp, #-16]!"   "\n"
    463 "   stp  x27, x28, [sp, #-16]!"   "\n"
    464 "   stp  x25, x26, [sp, #-16]!"   "\n"
    465 "   stp  x23, x24, [sp, #-16]!"   "\n"
    466 "   stp  x21, x22, [sp, #-16]!"   "\n"
    467 "   stp  x19, x20, [sp, #-16]!"   "\n"
    468 "   stp  x0,  xzr, [sp, #-16]!"   "\n"
    469 "   adrp x0, block"               "\n"
    470 "   add  x0, x0, :lo12:block"     "\n"
    471 "   ldr  x21, [x0, #8]"           "\n"  // load GSP
    472 "   ldr  x1,  [x0, #0]"           "\n"  // Host address
    473 "   br   x1"                 "\n"  // go (we wind up at disp_chain_assisted)
    474 
    475 "disp_chain_assisted:"            "\n" // x21 holds the trc.  Return it.
    476 "   mov  x1, x21" "\n"
    477     /* Restore int regs, but not x1. */
    478 "   ldp  x0,  xzr, [sp], #16"    "\n"
    479 "   ldp  x19, x20, [sp], #16"    "\n"
    480 "   ldp  x21, x22, [sp], #16"    "\n"
    481 "   ldp  x23, x24, [sp], #16"    "\n"
    482 "   ldp  x25, x26, [sp], #16"    "\n"
    483 "   ldp  x27, x28, [sp], #16"    "\n"
    484 "   ldp  x29, x30, [sp], #16"    "\n"
    485 "   mov  x0, x1"                 "\n"
    486 "   ret"                         "\n"
    487 );
    488 
    489 #elif defined(__i386__)
    490 
    491 asm(
    492 "run_translation_asm:\n"
    493 "   pushal\n"
    494 "   movl gp, %ebp\n"
    495 "   movl f, %eax\n"
    496 "   call *%eax\n"
    497 "   movl %eax, res\n"
    498 "   popal\n"
    499 "   ret\n"
    500 );
    501 
    502 #else
    503 # error "Unknown arch"
    504 #endif
    505 
    506 
    507 /* Run a translation at host address 'translation' and return the TRC.
    508 */
    509 HWord run_translation ( HWord translation )
    510 {
    511    if (0 && DEBUG_TRACE_FLAGS) {
    512       printf(" run translation %p\n", (void*)translation );
    513       printf(" simulated bb: %llu\n", n_bbs_done);
    514    }
    515    block[0] = translation;
    516    block[1] = (HWord)&gst;
    517    HWord trc = run_translation_asm();
    518    n_bbs_done ++;
    519    return trc;
    520 }
    521 
    522 HWord find_translation ( Addr guest_addr )
    523 {
    524    Int i;
    525    HWord __res;
    526    if (0)
    527      printf("find translation %p ... ", (void *)(guest_addr));
    528    for (i = 0; i < trans_table_used; i++)
    529      if (trans_table[i].base[0] == guest_addr)
    530         break;
    531    if (i == trans_table_used) {
    532       if (0) printf("none\n");
    533       return 0; /* not found */
    534    }
    535 
    536    /* Move this translation one step towards the front, so finding it
    537       next time round is just that little bit cheaper. */
    538    if (i > 2) {
    539       VexGuestExtents tmpE = trans_table[i-1];
    540       ULong*          tmpP = trans_tableP[i-1];
    541       trans_table[i-1]  = trans_table[i];
    542       trans_tableP[i-1] = trans_tableP[i];
    543       trans_table[i] = tmpE;
    544       trans_tableP[i] = tmpP;
    545       i--;
    546    }
    547 
    548    __res = (HWord)trans_tableP[i];
    549    if (0) printf("%p\n", (void*)__res);
    550    return __res;
    551 }
    552 
    553 #define N_TRANSBUF 5000
    554 static UChar transbuf[N_TRANSBUF];
    555 void make_translation ( Addr guest_addr, Bool verbose )
    556 {
    557    VexTranslateArgs   vta;
    558    VexTranslateResult tres;
    559    VexArchInfo vex_archinfo;
    560    Int trans_used, i, ws_needed;
    561 
    562    memset(&vta, 0, sizeof(vta));
    563    memset(&tres, 0, sizeof(tres));
    564    memset(&vex_archinfo, 0, sizeof(vex_archinfo));
    565 
    566    if (trans_table_used >= N_TRANS_TABLE
    567        || trans_cache_used >= N_TRANS_CACHE-1000) {
    568       /* If things are looking to full, just dump
    569          all the translations. */
    570       trans_cache_used = 0;
    571       trans_table_used = 0;
    572    }
    573 
    574    assert(trans_table_used < N_TRANS_TABLE);
    575    if (0)
    576      printf("make translation %p\n", (void *)guest_addr);
    577 
    578    LibVEX_default_VexArchInfo(&vex_archinfo);
    579    //vex_archinfo.subarch = VexSubArch;
    580    //vex_archinfo.ppc_icache_line_szB = CacheLineSize;
    581 
    582    /* */
    583    vta.arch_guest       = VexArch;
    584    vta.archinfo_guest   = vex_archinfo;
    585    vta.arch_host        = VexArch;
    586    vta.archinfo_host    = vex_archinfo;
    587    vta.guest_bytes      = (UChar*)guest_addr;
    588    vta.guest_bytes_addr = guest_addr;
    589    vta.chase_into_ok    = chase_into_ok;
    590 //   vta.guest_extents    = &vge;
    591    vta.guest_extents    = &trans_table[trans_table_used];
    592    vta.host_bytes       = transbuf;
    593    vta.host_bytes_size  = N_TRANSBUF;
    594    vta.host_bytes_used  = &trans_used;
    595    vta.instrument1      = NULL;
    596    vta.instrument2      = NULL;
    597    vta.needs_self_check = needs_self_check;
    598    vta.traceflags       = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS;
    599 
    600    vta.disp_cp_chain_me_to_slowEP = NULL; //disp_chain_fast;
    601    vta.disp_cp_chain_me_to_fastEP = NULL; //disp_chain_slow;
    602    vta.disp_cp_xindir             = NULL; //disp_chain_indir;
    603    vta.disp_cp_xassisted          = disp_chain_assisted;
    604 
    605    vta.addProfInc       = False;
    606 
    607    tres = LibVEX_Translate ( &vta );
    608 
    609    assert(tres.status == VexTransOK);
    610    assert(tres.offs_profInc == -1);
    611 
    612    ws_needed = (trans_used+7) / 8;
    613    assert(ws_needed > 0);
    614    assert(trans_cache_used + ws_needed < N_TRANS_CACHE);
    615    n_translations_made++;
    616 
    617    for (i = 0; i < trans_used; i++) {
    618       HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i;
    619       HChar* src = (HChar*)(&transbuf[i]);
    620       *dst = *src;
    621    }
    622 
    623 #if defined(__aarch64__)
    624    invalidate_icache( &trans_cache[trans_cache_used], trans_used );
    625 #endif
    626 
    627    trans_tableP[trans_table_used] = &trans_cache[trans_cache_used];
    628    trans_table_used++;
    629    trans_cache_used += ws_needed;
    630 }
    631 
    632 
    633 __attribute__((unused))
    634 static Bool overlap ( Addr start, UInt len, VexGuestExtents* vge )
    635 {
    636    Int i;
    637    for (i = 0; i < vge->n_used; i++) {
    638      if (vge->base[i]+vge->len[i] <= start
    639          || vge->base[i] >= start+len) {
    640        /* ok */
    641      } else {
    642         return True;
    643      }
    644    }
    645    return False; /* no overlap */
    646 }
    647 
    648 static ULong  stopAfter = 0;
    649 static UChar* entryP    = NULL;
    650 
    651 
    652 __attribute__ ((noreturn))
    653 static
    654 void failure_exit ( void )
    655 {
    656    fprintf(stdout, "VEX did failure_exit.  Bye.\n");
    657    fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done);
    658    exit(1);
    659 }
    660 
    661 static
    662 void log_bytes ( HChar* bytes, Int nbytes )
    663 {
    664    fwrite ( bytes, 1, nbytes, stdout );
    665    fflush ( stdout );
    666 }
    667 
    668 
    669 /* run simulated code forever (it will exit by calling
    670    serviceFn(0)). */
    671 static void run_simulator ( void )
    672 {
    673    static Addr last_guest = 0;
    674    Addr  next_guest;
    675    HWord next_host;
    676    while (1) {
    677       next_guest = gst.GuestPC;
    678 
    679       if (0)
    680          printf("\nnext_guest: 0x%x\n", (UInt)next_guest);
    681 
    682       if (next_guest == (Addr)&serviceFn) {
    683 
    684          /* "do" the function call to serviceFn */
    685 #        if defined(__i386__)
    686          {
    687             HWord esp = gst.guest_ESP;
    688             gst.guest_EIP = *(UInt*)(esp+0);
    689             gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) );
    690             gst.guest_ESP = esp+4;
    691             next_guest = gst.guest_EIP;
    692          }
    693 #        elif defined(__aarch64__)
    694          {
    695             gst.guest_X0 = serviceFn( gst.guest_X0, gst.guest_X1 );
    696             gst.guest_PC = gst.guest_X30;
    697             next_guest   = gst.guest_PC;
    698          }
    699 #        else
    700 #        error "Unknown arch"
    701 #        endif
    702       }
    703 
    704       next_host = find_translation(next_guest);
    705       if (next_host == 0) {
    706          make_translation(next_guest,False);
    707          next_host = find_translation(next_guest);
    708          assert(next_host != 0);
    709       }
    710 
    711       // Switchback
    712       if (n_bbs_done == stopAfter) {
    713          printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done);
    714 #if 1
    715          if (last_guest) {
    716             printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1);
    717             make_translation(last_guest,True);
    718          }
    719 #endif
    720 #if 0
    721          if (next_guest) {
    722             printf("\n*** Current translation (bb:%llu):\n", n_bbs_done);
    723             make_translation(next_guest,True);
    724          }
    725 #endif
    726          printf("---  end SWITCHBACK at bb:%llu ---\n", n_bbs_done);
    727          switchback();
    728          assert(0); /*NOTREACHED*/
    729       }
    730 
    731       last_guest = next_guest;
    732       HWord trc = run_translation(next_host);
    733       if (0) printf("------- trc = %lu\n", trc);
    734       if (trc != VEX_TRC_JMP_BORING) {
    735         if (1) printf("------- trc = %lu\n", trc);
    736       }
    737       assert(trc == VEX_TRC_JMP_BORING);
    738    }
    739 }
    740 
    741 
    742 static void usage ( void )
    743 {
    744    printf("usage: switchback #bbs\n");
    745    printf("   - begins switchback for basic block #bbs\n");
    746    printf("   - use -1 for largest possible run without switchback\n\n");
    747    exit(1);
    748 }
    749 
    750 
    751 int main ( Int argc, HChar** argv )
    752 {
    753    if (argc != 2)
    754       usage();
    755 
    756    stopAfter = (ULong)atoll(argv[1]);
    757 
    758    extern void entry ( void*(*service)(int,int) );
    759    entryP = (UChar*)&entry;
    760 
    761    if (!entryP) {
    762       printf("switchback: can't find entry point\n");
    763       exit(1);
    764    }
    765 
    766    LibVEX_default_VexControl(&vcon);
    767    vcon.guest_max_insns=50 - 49;
    768    vcon.guest_chase_thresh=0;
    769    vcon.iropt_level=2;
    770 
    771    LibVEX_Init( failure_exit, log_bytes, 1, &vcon );
    772    LibVEX_Guest_initialise(&gst);
    773    gst.host_EvC_COUNTER  = 999999999; // so we should never get an exit
    774    gst.host_EvC_FAILADDR = 0x5a5a5a5a5a5a5a5a;
    775 
    776    /* set up as if a call to the entry point passing serviceFn as
    777       the one and only parameter */
    778 #  if defined(__i386__)
    779    gst.guest_EIP = (UInt)entryP;
    780    gst.guest_ESP = (UInt)&gstack[32000];
    781    *(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn;
    782    *(UInt*)(gst.guest_ESP+0) = 0x12345678;
    783 
    784 #  elif defined(__aarch64__)
    785    gst.guest_PC = (ULong)entryP;
    786    gst.guest_SP = (ULong)&gstack[32000];
    787    gst.guest_X0 = (ULong)serviceFn;
    788    HWord tpidr_el0 = 0;
    789    __asm__ __volatile__("mrs %0, tpidr_el0" : "=r"(tpidr_el0));
    790    gst.guest_TPIDR_EL0 = tpidr_el0;
    791 
    792 #  else
    793 #  error "Unknown arch"
    794 #  endif
    795 
    796    printf("\n---START---\n");
    797 
    798 #if 1
    799    run_simulator();
    800 #else
    801    ( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn);
    802 #endif
    803 
    804 
    805    return 0;
    806 }
    807