Home | History | Annotate | Download | only in m_dispatch
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- The core dispatch loop, for jumping to a code address.       ---*/
      4 /*---                                       dispatch-ppc64-linux.S ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8   This file is part of Valgrind, a dynamic binary instrumentation
      9   framework.
     10 
     11   Copyright (C) 2005-2017 Cerion Armour-Brown <cerion (at) open-works.co.uk>
     12 
     13   This program is free software; you can redistribute it and/or
     14   modify it under the terms of the GNU General Public License as
     15   published by the Free Software Foundation; either version 2 of the
     16   License, or (at your option) any later version.
     17 
     18   This program is distributed in the hope that it will be useful, but
     19   WITHOUT ANY WARRANTY; without even the implied warranty of
     20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21   General Public License for more details.
     22 
     23   You should have received a copy of the GNU General Public License
     24   along with this program; if not, write to the Free Software
     25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26   02111-1307, USA.
     27 
     28   The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "pub_core_basics_asm.h"
     32 
     33 #if defined(VGP_ppc64be_linux)
     34 
     35 #include "pub_core_dispatch_asm.h"
     36 #include "pub_core_transtab_asm.h"
     37 #include "libvex_guest_offsets.h"	/* for OFFSET_ppc64_CIA */
     38 
     39 
     40 /* References to globals via the TOC */
     41 
     42 /*
     43         .globl  vgPlain_tt_fast
     44         .lcomm  vgPlain_tt_fast,4,4
     45         .type   vgPlain_tt_fast, @object
     46 */
     47 .section ".toc","aw"
     48 .tocent__vgPlain_tt_fast:
     49         .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
     50 .tocent__vgPlain_stats__n_xindirs_32:
     51         .tc vgPlain_stats__n_xindirs_32[TC],vgPlain_stats__n_xindirs_32
     52 .tocent__vgPlain_stats__n_xindir_misses_32:
     53         .tc vgPlain_stats__n_xindir_misses_32[TC],vgPlain_stats__n_xindir_misses_32
     54 .tocent__vgPlain_machine_ppc64_has_VMX:
     55         .tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
     56 
     57 /*------------------------------------------------------------*/
     58 /*---                                                      ---*/
     59 /*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
     60 /*--- used to run all translations,                        ---*/
     61 /*--- including no-redir ones.                             ---*/
     62 /*---                                                      ---*/
     63 /*------------------------------------------------------------*/
     64 
     65 /*----------------------------------------------------*/
     66 /*--- Entry and preamble (set everything up)       ---*/
     67 /*----------------------------------------------------*/
     68 
     69 /* signature:
     70 void VG_(disp_run_translations)( UWord* two_words,
     71                                  void*  guest_state,
     72                                  Addr   host_addr );
     73 */
     74 
     75 .section ".text"
     76 .align   2
     77 .globl   VG_(disp_run_translations)
     78 .section ".opd","aw"
     79 .align   3
     80 VG_(disp_run_translations):
     81 .quad    .VG_(disp_run_translations),.TOC.@tocbase,0
     82 .previous
     83 .type    .VG_(disp_run_translations),@function
     84 .globl   .VG_(disp_run_translations)
     85 .VG_(disp_run_translations):
     86 	/* r3 holds two_words */
     87 	/* r4 holds guest_state */
     88         /* r5 holds host_addr */
     89 
     90         /* ----- entry point to VG_(disp_run_translations) ----- */
     91         /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
     92 
     93         /* Save lr, cr */
     94         mflr    6
     95         std     6,16(1)
     96         mfcr    6
     97         std     6,8(1)
     98 
     99         /* New stack frame */
    100         stdu    1,-624(1)  /* sp should maintain 16-byte alignment */
    101 
    102         /* General reg save area : 152 bytes */
    103         std     31,472(1)
    104         std     30,464(1)
    105         std     29,456(1)
    106         std     28,448(1)
    107         std     27,440(1)
    108         std     26,432(1)
    109         std     25,424(1)
    110         std     24,416(1)
    111         std     23,408(1)
    112         std     22,400(1)
    113         std     21,392(1)
    114         std     20,384(1)
    115         std     19,376(1)
    116         std     18,368(1)
    117         std     17,360(1)
    118         std     16,352(1)
    119         std     15,344(1)
    120         std     14,336(1)
    121         std     13,328(1)
    122         std     3,104(1)  /* save two_words for later */
    123 
    124         /* Save callee-saved registers... */
    125         /* Floating-point reg save area : 144 bytes */
    126         stfd    31,616(1)
    127         stfd    30,608(1)
    128         stfd    29,600(1)
    129         stfd    28,592(1)
    130         stfd    27,584(1)
    131         stfd    26,576(1)
    132         stfd    25,568(1)
    133         stfd    24,560(1)
    134         stfd    23,552(1)
    135         stfd    22,544(1)
    136         stfd    21,536(1)
    137         stfd    20,528(1)
    138         stfd    19,520(1)
    139         stfd    18,512(1)
    140         stfd    17,504(1)
    141         stfd    16,496(1)
    142         stfd    15,488(1)
    143         stfd    14,480(1)
    144 
    145         /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
    146            The Linux kernel might not actually use VRSAVE for its intended
    147            purpose, but it should be harmless to preserve anyway. */
    148 	/* r3, r4, r5 are live here, so use r6 */
    149 	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
    150 	ld	6,0(6)
    151         cmpldi  6,0
    152         beq     .LafterVMX1
    153 
    154         /* VRSAVE save word : 32 bytes */
    155         mfspr   6,256         /* vrsave reg is spr number 256 */
    156         stw     6,324(1)
    157 
    158         /* Alignment padding : 4 bytes */
    159 
    160         /* Vector reg save area (quadword aligned) : 192 bytes */
    161         li      6,304
    162         stvx    31,6,1
    163         li      6,288
    164         stvx    30,6,1
    165         li      6,272
    166         stvx    29,6,1
    167         li      6,256
    168         stvx    28,6,1
    169         li      6,240
    170         stvx    27,6,1
    171         li      6,224
    172         stvx    26,6,1
    173         li      6,208
    174         stvx    25,6,1
    175         li      6,192
    176         stvx    24,6,1
    177         li      6,176
    178         stvx    23,6,1
    179         li      6,160
    180         stvx    22,6,1
    181         li      6,144
    182         stvx    21,6,1
    183         li      6,128
    184         stvx    20,6,1
    185 .LafterVMX1:
    186 
    187         /* Local variable space... */
    188 
    189 	/* r3 holds two_words */
    190 	/* r4 holds guest_state */
    191         /* r5 holds host_addr */
    192 
    193         /* 96(sp) used later to check FPSCR[RM] */
    194         /* 88(sp) used later to load fpscr with zero */
    195 	/* 48:87(sp) free */
    196 
    197         /* Linkage Area (reserved)  BE ABI
    198            40(sp) : TOC
    199            32(sp) : link editor doubleword
    200            24(sp) : compiler doubleword
    201            16(sp) : LR
    202            8(sp)  : CR
    203            0(sp)  : back-chain
    204         */
    205 
    206         /* set host FPU control word to the default mode expected
    207            by VEX-generated code.  See comments in libvex.h for
    208            more info. */
    209         /* => get zero into f3 (tedious)
    210            fsub 3,3,3 is not a reliable way to do this, since if
    211            f3 holds a NaN or similar then we don't necessarily
    212            wind up with zero. */
    213         li      6,0
    214         stw     6,88(1)
    215         lfs     3,88(1)
    216         mtfsf   0xFF,3   /* fpscr = lo32 of f3 */
    217 
    218         /* set host AltiVec control word to the default mode expected
    219            by VEX-generated code. */
    220 	ld	6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
    221 	ld	6,0(6)
    222         cmpldi  6,0
    223         beq     .LafterVMX2
    224 
    225         vspltisw 3,0x0  /* generate zero */
    226         mtvscr  3
    227 .LafterVMX2:
    228 
    229         /* make a stack frame for the code we are calling */
    230         stdu    1,-48(1)
    231 
    232         /* Set up the guest state ptr */
    233         mr      31,4      /* r31 (generated code gsp) = r4 */
    234 
    235         /* and jump into the code cache.  Chained translations in
    236            the code cache run, until for whatever reason, they can't
    237            continue.  When that happens, the translation in question
    238            will jump (or call) to one of the continuation points
    239            VG_(cp_...) below. */
    240         mtctr   5
    241         bctr
    242 	/*NOTREACHED*/
    243 
    244 /*----------------------------------------------------*/
    245 /*--- Postamble and exit.                          ---*/
    246 /*----------------------------------------------------*/
    247 
    248 .postamble:
    249         /* At this point, r6 and r7 contain two
    250            words to be returned to the caller.  r6
    251            holds a TRC value, and r7 optionally may
    252            hold another word (for CHAIN_ME exits, the
    253            address of the place to patch.) */
    254 
    255         /* undo the "make a stack frame for the code we are calling" */
    256         addi    1,1,48
    257 
    258         /* We're leaving.  Check that nobody messed with
    259            VSCR or FPSCR in ways we don't expect. */
    260 	/* Using r11 - value used again further on, so don't trash! */
    261 	ld	11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
    262 	ld	11,0(11)
    263 
    264 	/* Set fpscr back to a known state, since vex-generated code
    265 	   may have messed with fpscr[rm]. */
    266         li      5,0
    267         addi    1,1,-16
    268         stw     5,0(1)
    269         lfs     3,0(1)
    270         addi    1,1,16
    271         mtfsf   0xFF,3   /* fpscr = f3 */
    272 
    273         cmpldi  11,0    /* Do we have altivec? */
    274         beq     .LafterVMX8
    275 
    276         /* Check VSCR[NJ] == 1 */
    277         /* first generate 4x 0x00010000 */
    278         vspltisw  4,0x1                   /* 4x 0x00000001 */
    279         vspltisw  5,0x0                   /* zero */
    280         vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
    281         /* retrieve VSCR and mask wanted bits */
    282         mfvscr    7
    283         vand      7,7,6                   /* gives NJ flag */
    284         vspltw    7,7,0x3                 /* flags-word to all lanes */
    285         vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
    286         bt        24,.invariant_violation /* branch if all_equal */
    287 
    288 .LafterVMX8:
    289 	/* otherwise we're OK */
    290         b       .remove_frame
    291 
    292 .invariant_violation:
    293         li      6,VG_TRC_INVARIANT_FAILED
    294         li      7,0
    295         /* fall through */
    296 
    297 .remove_frame:
    298         /* r11 already holds VG_(machine_ppc32_has_VMX) value */
    299         cmplwi  11,0
    300         beq     .LafterVMX9
    301 
    302         /* Restore Altivec regs.
    303            Use r5 as scratch since r6/r7 are live. */
    304         /* VRSAVE */
    305         lwz     5,324(1)
    306         mfspr   5,256         /* VRSAVE reg is spr number 256 */
    307 
    308         /* Vector regs */
    309         li      5,304
    310         lvx     31,5,1
    311         li      5,288
    312         lvx     30,5,1
    313         li      5,272
    314         lvx     29,5,1
    315         li      5,256
    316         lvx     28,5,1
    317         li      5,240
    318         lvx     27,5,1
    319         li      5,224
    320         lvx     26,5,1
    321         li      5,208
    322         lvx     25,5,1
    323         li      5,192
    324         lvx     24,5,1
    325         li      5,176
    326         lvx     23,5,1
    327         li      5,160
    328         lvx     22,5,1
    329         li      5,144
    330         lvx     21,5,1
    331         li      5,128
    332         lvx     20,5,1
    333 .LafterVMX9:
    334 
    335         /* Restore FP regs */
    336         /* Floating-point regs */
    337         lfd     31,616(1)
    338         lfd     30,608(1)
    339         lfd     29,600(1)
    340         lfd     28,592(1)
    341         lfd     27,584(1)
    342         lfd     26,576(1)
    343         lfd     25,568(1)
    344         lfd     24,560(1)
    345         lfd     23,552(1)
    346         lfd     22,544(1)
    347         lfd     21,536(1)
    348         lfd     20,528(1)
    349         lfd     19,520(1)
    350         lfd     18,512(1)
    351         lfd     17,504(1)
    352         lfd     16,496(1)
    353         lfd     15,488(1)
    354         lfd     14,480(1)
    355 
    356         /* restore int regs, including importantly r3 (two_words) */
    357         ld      31,472(1)
    358         ld      30,464(1)
    359         ld      29,456(1)
    360         ld      28,448(1)
    361         ld      27,440(1)
    362         ld      26,432(1)
    363         ld      25,424(1)
    364         ld      24,416(1)
    365         ld      23,408(1)
    366         ld      22,400(1)
    367         ld      21,392(1)
    368         ld      20,384(1)
    369         ld      19,376(1)
    370         ld      18,368(1)
    371         ld      17,360(1)
    372         ld      16,352(1)
    373         ld      15,344(1)
    374         ld      14,336(1)
    375         ld      13,328(1)
    376         ld      3,104(1)
    377         /* Stash return values */
    378         std     6,0(3)
    379         std     7,8(3)
    380 
    381         /* restore lr & sp, and leave */
    382         ld      0,632(1)  /* stack_size + 8 */
    383         mtcr    0
    384         ld      0,640(1)  /* stack_size + 16 */
    385         mtlr    0
    386         addi    1,1,624   /* stack_size */
    387         blr
    388 
    389 
    390 /*----------------------------------------------------*/
    391 /*--- Continuation points                          ---*/
    392 /*----------------------------------------------------*/
    393 
    394 /* ------ Chain me to slow entry point ------ */
    395         .section ".text"
    396         .align   2
    397         .globl   VG_(disp_cp_chain_me_to_slowEP)
    398         .section ".opd","aw"
    399         .align   3
    400 VG_(disp_cp_chain_me_to_slowEP):
    401         .quad    .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
    402         .previous
    403         .type    .VG_(disp_cp_chain_me_to_slowEP),@function
    404         .globl   .VG_(disp_cp_chain_me_to_slowEP)
    405 .VG_(disp_cp_chain_me_to_slowEP):
    406         /* We got called.  The return address indicates
    407            where the patching needs to happen.  Collect
    408            the return address and, exit back to C land,
    409            handing the caller the pair (Chain_me_S, RA) */
    410         li   6, VG_TRC_CHAIN_ME_TO_SLOW_EP
    411         mflr 7
    412         /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
    413            4  = mtctr r30
    414            4  = btctr
    415         */
    416         subi 7,7,20+4+4
    417         b    .postamble
    418 
    419 /* ------ Chain me to fast entry point ------ */
    420         .section ".text"
    421         .align   2
    422         .globl   VG_(disp_cp_chain_me_to_fastEP)
    423         .section ".opd","aw"
    424         .align   3
    425 VG_(disp_cp_chain_me_to_fastEP):
    426         .quad    .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
    427         .previous
    428         .type    .VG_(disp_cp_chain_me_to_fastEP),@function
    429         .globl   .VG_(disp_cp_chain_me_to_fastEP)
    430 .VG_(disp_cp_chain_me_to_fastEP):
    431         /* We got called.  The return address indicates
    432            where the patching needs to happen.  Collect
    433            the return address and, exit back to C land,
    434            handing the caller the pair (Chain_me_S, RA) */
    435         li   6, VG_TRC_CHAIN_ME_TO_FAST_EP
    436         mflr 7
    437         /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
    438            4  = mtctr r30
    439            4  = btctr
    440         */
    441         subi 7,7,20+4+4
    442         b    .postamble
    443 
    444 /* ------ Indirect but boring jump ------ */
    445         .section ".text"
    446         .align   2
    447         .globl   VG_(disp_cp_xindir)
    448         .section ".opd","aw"
    449         .align   3
    450 VG_(disp_cp_xindir):
    451         .quad    .VG_(disp_cp_xindir),.TOC.@tocbase,0
    452         .previous
    453         .type    .VG_(disp_cp_xindir),@function
    454         .globl   .VG_(disp_cp_xindir)
    455 .VG_(disp_cp_xindir):
    456         /* Where are we going? */
    457         ld      3,OFFSET_ppc64_CIA(31)
    458 
    459         /* stats only */
    460 	ld	5, .tocent__vgPlain_stats__n_xindirs_32@toc(2)
    461         lwz     6,0(5)
    462         addi    6,6,1
    463         stw     6,0(5)
    464 
    465 	/* r5 = &VG_(tt_fast) */
    466 	ld	5, .tocent__vgPlain_tt_fast@toc(2) /* &VG_(tt_fast) */
    467 
    468         /* try a fast lookup in the translation cache */
    469         /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
    470               = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 4 */
    471 	rldicl	4,3, 62, 64-VG_TT_FAST_BITS   /* entry# */
    472 	sldi	4,4,4      /* entry# * sizeof(FastCacheEntry) */
    473 	add	5,5,4      /* & VG_(tt_fast)[entry#] */
    474 	ld	6,0(5)     /* .guest */
    475 	ld	7,8(5)     /* .host */
    476         cmpd    3,6
    477         bne     .fast_lookup_failed
    478 
    479         /* Found a match.  Jump to .host. */
    480         mtctr   7
    481         bctr
    482 
    483 .fast_lookup_failed:
    484         /* stats only */
    485 	ld	5, .tocent__vgPlain_stats__n_xindir_misses_32@toc(2)
    486         lwz     6,0(5)
    487         addi    6,6,1
    488         stw     6,0(5)
    489 
    490         li      6,VG_TRC_INNER_FASTMISS
    491         li      7,0
    492         b       .postamble
    493 	/*NOTREACHED*/
    494 
    495 /* ------ Assisted jump ------ */
    496 .section ".text"
    497         .align   2
    498         .globl   VG_(disp_cp_xassisted)
    499         .section ".opd","aw"
    500         .align   3
    501 VG_(disp_cp_xassisted):
    502         .quad    .VG_(disp_cp_xassisted),.TOC.@tocbase,0
    503         .previous
    504         .type    .VG_(disp_cp_xassisted),@function
    505         .globl   .VG_(disp_cp_xassisted)
    506 .VG_(disp_cp_xassisted):
    507         /* r31 contains the TRC */
    508         mr      6,31
    509         li      7,0
    510         b       .postamble
    511 
    512 /* ------ Event check failed ------ */
    513         .section ".text"
    514         .align   2
    515         .globl   VG_(disp_cp_evcheck_fail)
    516         .section ".opd","aw"
    517         .align   3
    518 VG_(disp_cp_evcheck_fail):
    519         .quad    .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
    520         .previous
    521         .type    .VG_(disp_cp_evcheck_fail),@function
    522         .globl   .VG_(disp_cp_evcheck_fail)
    523 .VG_(disp_cp_evcheck_fail):
    524         li      6,VG_TRC_INNER_COUNTERZERO
    525         li      7,0
    526         b       .postamble
    527 
    528 
    529 .size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
    530 
    531 #endif // defined(VGP_ppc64be_linux)
    532 
    533 /* Let the linker know we don't need an executable stack */
    534 MARK_STACK_NO_EXEC
    535 
    536 /*--------------------------------------------------------------------*/
    537 /*--- end                                                          ---*/
    538 /*--------------------------------------------------------------------*/
    539