Home | History | Annotate | Download | only in m_dispatch
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- The core dispatch loop, for jumping to a code address.       ---*/
      4 /*---                                       dispatch-ppc64-linux.S ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8   This file is part of Valgrind, a dynamic binary instrumentation
      9   framework.
     10 
     11   Copyright (C) 2005-2012 Cerion Armour-Brown <cerion (at) open-works.co.uk>
     12 
     13   This program is free software; you can redistribute it and/or
     14   modify it under the terms of the GNU General Public License as
     15   published by the Free Software Foundation; either version 2 of the
     16   License, or (at your option) any later version.
     17 
     18   This program is distributed in the hope that it will be useful, but
     19   WITHOUT ANY WARRANTY; without even the implied warranty of
     20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21   General Public License for more details.
     22 
     23   You should have received a copy of the GNU General Public License
     24   along with this program; if not, write to the Free Software
     25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26   02111-1307, USA.
     27 
     28   The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #if defined(VGP_ppc64_linux)
     32 
     33 #include "pub_core_basics_asm.h"
     34 #include "pub_core_dispatch_asm.h"
     35 #include "pub_core_transtab_asm.h"
     36 #include "libvex_guest_offsets.h"	/* for OFFSET_ppc64_CIA */
     37 
     38 
     39 /* References to globals via the TOC */
     40 
     41 /*
     42         .globl  vgPlain_tt_fast
     43         .lcomm  vgPlain_tt_fast,4,4
     44         .type   vgPlain_tt_fast, @object
     45 */
     46 .section ".toc","aw"
     47 .tocent__vgPlain_tt_fast:
     48         .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
     49 .tocent__vgPlain_stats__n_xindirs_32:
     50         .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32
     51 .tocent__vgPlain_stats__n_xindir_misses_32:
     52         .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32
     53 .tocent__vgPlain_machine_ppc64_has_VMX:
     54         .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
     55 
     56 /*------------------------------------------------------------*/
     57 /*---                                                      ---*/
     58 /*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
     59 /*--- used to run all translations,                        ---*/
     60 /*--- including no-redir ones.                             ---*/
     61 /*---                                                      ---*/
     62 /*------------------------------------------------------------*/
     63 
     64 /*----------------------------------------------------*/
     65 /*--- Entry and preamble (set everything up)       ---*/
     66 /*----------------------------------------------------*/
     67 
     68 /* signature:
     69 void VG_(disp_run_translations)( UWord* two_words,
     70                                  void*  guest_state,
     71                                  Addr   host_addr );
     72 */
     73 
     74 .section ".text"
     75 .align   2
     76 .globl   VG_(disp_run_translations)
     77 .section ".opd","aw"
     78 .align   3
     79 VG_(disp_run_translations):
     80 .quad    .VG_(disp_run_translations),.TOC.@tocbase,0
     81 .previous
     82 .type    .VG_(disp_run_translations),@function
     83 .globl   .VG_(disp_run_translations)
     84 .VG_(disp_run_translations):
     85 	/* r3 holds two_words */
     86 	/* r4 holds guest_state */
     87         /* r5 holds host_addr */
     88 
     89         /* ----- entry point to VG_(disp_run_translations) ----- */
     90         /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
     91 
     92         /* Save lr, cr */
     93         mflr    6
     94         std     6,16(1)
     95         mfcr    6
     96         std     6,8(1)
     97 
     98         /* New stack frame */
     99         stdu    1,-624(1)  /* sp should maintain 16-byte alignment */
    100 
    101         /* General reg save area : 152 bytes */
    102         std     31,472(1)
    103         std     30,464(1)
    104         std     29,456(1)
    105         std     28,448(1)
    106         std     27,440(1)
    107         std     26,432(1)
    108         std     25,424(1)
    109         std     24,416(1)
    110         std     23,408(1)
    111         std     22,400(1)
    112         std     21,392(1)
    113         std     20,384(1)
    114         std     19,376(1)
    115         std     18,368(1)
    116         std     17,360(1)
    117         std     16,352(1)
    118         std     15,344(1)
    119         std     14,336(1)
    120         std     13,328(1)
    121         std     3,104(1)  /* save two_words for later */
    122 
    123         /* Save callee-saved registers... */
    124         /* Floating-point reg save area : 144 bytes */
    125         stfd    31,616(1)
    126         stfd    30,608(1)
    127         stfd    29,600(1)
    128         stfd    28,592(1)
    129         stfd    27,584(1)
    130         stfd    26,576(1)
    131         stfd    25,568(1)
    132         stfd    24,560(1)
    133         stfd    23,552(1)
    134         stfd    22,544(1)
    135         stfd    21,536(1)
    136         stfd    20,528(1)
    137         stfd    19,520(1)
    138         stfd    18,512(1)
    139         stfd    17,504(1)
    140         stfd    16,496(1)
    141         stfd    15,488(1)
    142         stfd    14,480(1)
    143 
    144         /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
    145            The Linux kernel might not actually use VRSAVE for its intended
    146            purpose, but it should be harmless to preserve anyway. */
    147 	/* r3, r4, r5 are live here, so use r6 */
    148 	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
    149 	ld	6,0(6)
    150         cmpldi  6,0
    151         beq     .LafterVMX1
    152 
    153         /* VRSAVE save word : 32 bytes */
    154         mfspr   6,256         /* vrsave reg is spr number 256 */
    155         stw     6,324(1)
    156 
    157         /* Alignment padding : 4 bytes */
    158 
    159         /* Vector reg save area (quadword aligned) : 192 bytes */
    160         li      6,304
    161         stvx    31,6,1
    162         li      6,288
    163         stvx    30,6,1
    164         li      6,272
    165         stvx    29,6,1
    166         li      6,256
    167         stvx    28,6,1
    168         li      6,240
    169         stvx    27,6,1
    170         li      6,224
    171         stvx    26,6,1
    172         li      6,208
    173         stvx    25,6,1
    174         li      6,192
    175         stvx    24,6,1
    176         li      6,176
    177         stvx    23,6,1
    178         li      6,160
    179         stvx    22,6,1
    180         li      6,144
    181         stvx    21,6,1
    182         li      6,128
    183         stvx    20,6,1
    184 .LafterVMX1:
    185 
    186         /* Local variable space... */
    187 
    188 	/* r3 holds two_words */
    189 	/* r4 holds guest_state */
    190         /* r5 holds host_addr */
    191 
    192         /* 96(sp) used later to check FPSCR[RM] */
    193         /* 88(sp) used later to load fpscr with zero */
    194 	/* 48:87(sp) free */
    195 
    196         /* Linkage Area (reserved)
    197            40(sp) : TOC
    198            32(sp) : link editor doubleword
    199            24(sp) : compiler doubleword
    200            16(sp) : LR
    201            8(sp)  : CR
    202            0(sp)  : back-chain
    203         */
    204 
    205         /* set host FPU control word to the default mode expected
    206            by VEX-generated code.  See comments in libvex.h for
    207            more info. */
    208         /* => get zero into f3 (tedious)
    209            fsub 3,3,3 is not a reliable way to do this, since if
    210            f3 holds a NaN or similar then we don't necessarily
    211            wind up with zero. */
    212         li      6,0
    213         stw     6,88(1)
    214         lfs     3,88(1)
    215         mtfsf   0xFF,3   /* fpscr = lo32 of f3 */
    216 
    217         /* set host AltiVec control word to the default mode expected
    218            by VEX-generated code. */
    219 	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
    220 	ld	6,0(6)
    221         cmpldi  6,0
    222         beq     .LafterVMX2
    223 
    224         vspltisw 3,0x0  /* generate zero */
    225         mtvscr  3
    226 .LafterVMX2:
    227 
    228         /* make a stack frame for the code we are calling */
    229         stdu    1,-48(1)
    230 
    231         /* Set up the guest state ptr */
    232         mr      31,4      /* r31 (generated code gsp) = r4 */
    233 
    234         /* and jump into the code cache.  Chained translations in
    235            the code cache run, until for whatever reason, they can't
    236            continue.  When that happens, the translation in question
    237            will jump (or call) to one of the continuation points
    238            VG_(cp_...) below. */
    239         mtctr   5
    240         bctr
    241 	/*NOTREACHED*/
    242 
    243 /*----------------------------------------------------*/
    244 /*--- Postamble and exit.                          ---*/
    245 /*----------------------------------------------------*/
    246 
    247 .postamble:
    248         /* At this point, r6 and r7 contain two
    249            words to be returned to the caller.  r6
    250            holds a TRC value, and r7 optionally may
    251            hold another word (for CHAIN_ME exits, the
    252            address of the place to patch.) */
    253 
    254         /* undo the "make a stack frame for the code we are calling" */
    255         addi    1,1,48
    256 
    257         /* We're leaving.  Check that nobody messed with
    258            VSCR or FPSCR in ways we don't expect. */
    259 	/* Using r11 - value used again further on, so don't trash! */
    260 	ld	11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
    261 	ld	11,0(11)
    262 
    263 	/* Set fpscr back to a known state, since vex-generated code
    264 	   may have messed with fpscr[rm]. */
    265         li      5,0
    266         addi    1,1,-16
    267         stw     5,0(1)
    268         lfs     3,0(1)
    269         addi    1,1,16
    270         mtfsf   0xFF,3   /* fpscr = f3 */
    271 
    272         cmpldi  11,0    /* Do we have altivec? */
    273         beq     .LafterVMX8
    274 
    275         /* Check VSCR[NJ] == 1 */
    276         /* first generate 4x 0x00010000 */
    277         vspltisw  4,0x1                   /* 4x 0x00000001 */
    278         vspltisw  5,0x0                   /* zero */
    279         vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
    280         /* retrieve VSCR and mask wanted bits */
    281         mfvscr    7
    282         vand      7,7,6                   /* gives NJ flag */
    283         vspltw    7,7,0x3                 /* flags-word to all lanes */
    284         vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
    285         bt        24,.invariant_violation /* branch if all_equal */
    286 
    287 .LafterVMX8:
    288 	/* otherwise we're OK */
    289         b       .remove_frame
    290 
    291 .invariant_violation:
    292         li      6,VG_TRC_INVARIANT_FAILED
    293         li      7,0
    294         /* fall through */
    295 
    296 .remove_frame:
    297         /* r11 already holds VG_(machine_ppc32_has_VMX) value */
    298         cmplwi  11,0
    299         beq     .LafterVMX9
    300 
    301         /* Restore Altivec regs.
    302            Use r5 as scratch since r6/r7 are live. */
    303         /* VRSAVE */
    304         lwz     5,324(1)
    305         mfspr   5,256         /* VRSAVE reg is spr number 256 */
    306 
    307         /* Vector regs */
    308         li      5,304
    309         lvx     31,5,1
    310         li      5,288
    311         lvx     30,5,1
    312         li      5,272
    313         lvx     29,5,1
    314         li      5,256
    315         lvx     28,5,1
    316         li      5,240
    317         lvx     27,5,1
    318         li      5,224
    319         lvx     26,5,1
    320         li      5,208
    321         lvx     25,5,1
    322         li      5,192
    323         lvx     24,5,1
    324         li      5,176
    325         lvx     23,5,1
    326         li      5,160
    327         lvx     22,5,1
    328         li      5,144
    329         lvx     21,5,1
    330         li      5,128
    331         lvx     20,5,1
    332 .LafterVMX9:
    333 
    334         /* Restore FP regs */
    335         /* Floating-point regs */
    336         lfd     31,616(1)
    337         lfd     30,608(1)
    338         lfd     29,600(1)
    339         lfd     28,592(1)
    340         lfd     27,584(1)
    341         lfd     26,576(1)
    342         lfd     25,568(1)
    343         lfd     24,560(1)
    344         lfd     23,552(1)
    345         lfd     22,544(1)
    346         lfd     21,536(1)
    347         lfd     20,528(1)
    348         lfd     19,520(1)
    349         lfd     18,512(1)
    350         lfd     17,504(1)
    351         lfd     16,496(1)
    352         lfd     15,488(1)
    353         lfd     14,480(1)
    354 
    355         /* restore int regs, including importantly r3 (two_words) */
    356         ld      31,472(1)
    357         ld      30,464(1)
    358         ld      29,456(1)
    359         ld      28,448(1)
    360         ld      27,440(1)
    361         ld      26,432(1)
    362         ld      25,424(1)
    363         ld      24,416(1)
    364         ld      23,408(1)
    365         ld      22,400(1)
    366         ld      21,392(1)
    367         ld      20,384(1)
    368         ld      19,376(1)
    369         ld      18,368(1)
    370         ld      17,360(1)
    371         ld      16,352(1)
    372         ld      15,344(1)
    373         ld      14,336(1)
    374         ld      13,328(1)
    375         ld      3,104(1)
    376         /* Stash return values */
    377         std     6,0(3)
    378         std     7,8(3)
    379 
    380         /* restore lr & sp, and leave */
    381         ld      0,632(1)  /* stack_size + 8 */
    382         mtcr    0
    383         ld      0,640(1)  /* stack_size + 16 */
    384         mtlr    0
    385         addi    1,1,624   /* stack_size */
    386         blr
    387 
    388 
    389 /*----------------------------------------------------*/
    390 /*--- Continuation points                          ---*/
    391 /*----------------------------------------------------*/
    392 
    393 /* ------ Chain me to slow entry point ------ */
    394         .section ".text"
    395         .align   2
    396         .globl   VG_(disp_cp_chain_me_to_slowEP)
    397         .section ".opd","aw"
    398         .align   3
    399 VG_(disp_cp_chain_me_to_slowEP):
    400         .quad    .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
    401         .previous
    402         .type    .VG_(disp_cp_chain_me_to_slowEP),@function
    403         .globl   .VG_(disp_cp_chain_me_to_slowEP)
    404 .VG_(disp_cp_chain_me_to_slowEP):
    405         /* We got called.  The return address indicates
    406            where the patching needs to happen.  Collect
    407            the return address and, exit back to C land,
    408            handing the caller the pair (Chain_me_S, RA) */
    409         li   6, VG_TRC_CHAIN_ME_TO_SLOW_EP
    410         mflr 7
    411         /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
    412            4  = mtctr r30
    413            4  = btctr
    414         */
    415         subi 7,7,20+4+4
    416         b    .postamble
    417 
    418 /* ------ Chain me to fast entry point ------ */
    419         .section ".text"
    420         .align   2
    421         .globl   VG_(disp_cp_chain_me_to_fastEP)
    422         .section ".opd","aw"
    423         .align   3
    424 VG_(disp_cp_chain_me_to_fastEP):
    425         .quad    .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
    426         .previous
    427         .type    .VG_(disp_cp_chain_me_to_fastEP),@function
    428         .globl   .VG_(disp_cp_chain_me_to_fastEP)
    429 .VG_(disp_cp_chain_me_to_fastEP):
    430         /* We got called.  The return address indicates
    431            where the patching needs to happen.  Collect
    432            the return address and, exit back to C land,
    433            handing the caller the pair (Chain_me_S, RA) */
    434         li   6, VG_TRC_CHAIN_ME_TO_FAST_EP
    435         mflr 7
    436         /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
    437            4  = mtctr r30
    438            4  = btctr
    439         */
    440         subi 7,7,20+4+4
    441         b    .postamble
    442 
    443 /* ------ Indirect but boring jump ------ */
    444         .section ".text"
    445         .align   2
    446         .globl   VG_(disp_cp_xindir)
    447         .section ".opd","aw"
    448         .align   3
    449 VG_(disp_cp_xindir):
    450         .quad    .VG_(disp_cp_xindir),.TOC.@tocbase,0
    451         .previous
    452         .type    .VG_(disp_cp_xindir),@function
    453         .globl   .VG_(disp_cp_xindir)
    454 .VG_(disp_cp_xindir):
    455         /* Where are we going? */
    456         ld      3,OFFSET_ppc64_CIA(31)
    457 
    458         /* stats only */
    459 	ld	5, .tocent__vgPlain_stats__n_xindirs_32@toc(2)
    460         lwz     6,0(5)
    461         addi    6,6,1
    462         stw     6,0(5)
    463 
    464 	/* r5 = &VG_(tt_fast) */
    465 	ld	5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
    466 
    467         /* try a fast lookup in the translation cache */
    468         /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
    469               = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
    470 	rldicl	4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
    471 	sldi	4,4,4      /* entry# * sizeof(FastCacheEntry) */
    472 	add	5,5,4      /* & VG_(tt_fast)[entry#] */
    473 	ld	6,0(5)     /* .guest */
    474 	ld	7,8(5)     /* .host */
    475         cmpd    3,6
    476         bne     .fast_lookup_failed
    477 
    478         /* Found a match.  Jump to .host. */
    479         mtctr   7
    480         bctr
    481 
    482 .fast_lookup_failed:
    483         /* stats only */
    484 	ld	5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2)
    485         lwz     6,0(5)
    486         addi    6,6,1
    487         stw     6,0(5)
    488 
    489         li      6,VG_TRC_INNER_FASTMISS
    490         li      7,0
    491         b       .postamble
    492 	/*NOTREACHED*/
    493 
    494 /* ------ Assisted jump ------ */
    495 .section ".text"
    496         .align   2
    497         .globl   VG_(disp_cp_xassisted)
    498         .section ".opd","aw"
    499         .align   3
    500 VG_(disp_cp_xassisted):
    501         .quad    .VG_(disp_cp_xassisted),.TOC.@tocbase,0
    502         .previous
    503         .type    .VG_(disp_cp_xassisted),@function
    504         .globl   .VG_(disp_cp_xassisted)
    505 .VG_(disp_cp_xassisted):
    506         /* r31 contains the TRC */
    507         mr      6,31
    508         li      7,0
    509         b       .postamble
    510 
    511 /* ------ Event check failed ------ */
    512         .section ".text"
    513         .align   2
    514         .globl   VG_(disp_cp_evcheck_fail)
    515         .section ".opd","aw"
    516         .align   3
    517 VG_(disp_cp_evcheck_fail):
    518         .quad    .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
    519         .previous
    520         .type    .VG_(disp_cp_evcheck_fail),@function
    521         .globl   .VG_(disp_cp_evcheck_fail)
    522 .VG_(disp_cp_evcheck_fail):
    523         li      6,VG_TRC_INNER_COUNTERZERO
    524         li      7,0
    525         b       .postamble
    526 
    527 
    528 .size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
    529 
    530 /* Let the linker know we don't need an executable stack */
    531 .section .note.GNU-stack,"",@progbits
    532 
    533 #endif // defined(VGP_ppc64_linux)
    534 
    535 /*--------------------------------------------------------------------*/
    536 /*--- end                                                          ---*/
    537 /*--------------------------------------------------------------------*/
    538