Home | History | Annotate | Download | only in m_dispatch
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- The core dispatch loop, for jumping to a code address.       ---*/
      4 /*---                                       dispatch-ppc32-linux.S ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8   This file is part of Valgrind, a dynamic binary instrumentation
      9   framework.
     10 
     11   Copyright (C) 2005-2012 Cerion Armour-Brown <cerion (at) open-works.co.uk>
     12 
     13   This program is free software; you can redistribute it and/or
     14   modify it under the terms of the GNU General Public License as
     15   published by the Free Software Foundation; either version 2 of the
     16   License, or (at your option) any later version.
     17 
     18   This program is distributed in the hope that it will be useful, but
     19   WITHOUT ANY WARRANTY; without even the implied warranty of
     20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21   General Public License for more details.
     22 
     23   You should have received a copy of the GNU General Public License
     24   along with this program; if not, write to the Free Software
     25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26   02111-1307, USA.
     27 
     28   The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #if defined(VGP_ppc32_linux)
     32 
     33 #include "config.h"
     34 #include "pub_core_basics_asm.h"
     35 #include "pub_core_dispatch_asm.h"
     36 #include "pub_core_transtab_asm.h"
     37 #include "libvex_guest_offsets.h"	/* for OFFSET_ppc32_CIA */
     38 
     39 
     40 /*------------------------------------------------------------*/
     41 /*---                                                      ---*/
     42 /*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
     43 /*--- used to run all translations,                        ---*/
     44 /*--- including no-redir ones.                             ---*/
     45 /*---                                                      ---*/
     46 /*------------------------------------------------------------*/
     47 
     48 /*----------------------------------------------------*/
     49 /*--- Entry and preamble (set everything up)       ---*/
     50 /*----------------------------------------------------*/
     51 
     52 /* signature:
     53 void VG_(disp_run_translations)( UWord* two_words,
     54                                  void*  guest_state,
     55                                  Addr   host_addr );
     56 */
     57 .text
     58 .globl  VG_(disp_run_translations)
     59 .type  VG_(disp_run_translations), @function
     60 VG_(disp_run_translations):
     61 	/* r3 holds two_words */
     62 	/* r4 holds guest_state */
     63         /* r5 holds host_addr */
     64 
     65         /* ----- entry point to VG_(disp_run_translations) ----- */
     66         /* For Linux/ppc32 we need the SysV ABI, which uses
     67            LR->4(parent_sp), CR->anywhere.
     68            (The AIX ABI, used on Darwin,
     69            uses LR->8(prt_sp), CR->4(prt_sp))
     70         */
     71 
     72         /* Save lr */
     73         mflr    6
     74         stw     6,4(1)
     75 
     76         /* New stack frame */
     77         stwu    1,-496(1)  /* sp should maintain 16-byte alignment */
     78 
     79         /* Save callee-saved registers... */
     80 	/* r3, r4, r5 are live here, so use r6 */
     81         lis     6,VG_(machine_ppc32_has_FP)@ha
     82         lwz     6,VG_(machine_ppc32_has_FP)@l(6)
     83         cmplwi  6,0
     84         beq     LafterFP1
     85 
     86         /* Floating-point reg save area : 144 bytes */
     87         stfd    31,488(1)
     88         stfd    30,480(1)
     89         stfd    29,472(1)
     90         stfd    28,464(1)
     91         stfd    27,456(1)
     92         stfd    26,448(1)
     93         stfd    25,440(1)
     94         stfd    24,432(1)
     95         stfd    23,424(1)
     96         stfd    22,416(1)
     97         stfd    21,408(1)
     98         stfd    20,400(1)
     99         stfd    19,392(1)
    100         stfd    18,384(1)
    101         stfd    17,376(1)
    102         stfd    16,368(1)
    103         stfd    15,360(1)
    104         stfd    14,352(1)
    105 LafterFP1:
    106 
    107         /* General reg save area : 76 bytes */
    108         stw     31,348(1)
    109         stw     30,344(1)
    110         stw     29,340(1)
    111         stw     28,336(1)
    112         stw     27,332(1)
    113         stw     26,328(1)
    114         stw     25,324(1)
    115         stw     24,320(1)
    116         stw     23,316(1)
    117         stw     22,312(1)
    118         stw     21,308(1)
    119         stw     20,304(1)
    120         stw     19,300(1)
    121         stw     18,296(1)
    122         stw     17,292(1)
    123         stw     16,288(1)
    124         stw     15,284(1)
    125         stw     14,280(1)
    126         stw     13,276(1)
    127         stw     3,272(1)  /* save two_words for later */
    128 
    129         /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
    130            The Linux kernel might not actually use VRSAVE for its intended
    131            purpose, but it should be harmless to preserve anyway. */
    132 	/* r3, r4, r5 are live here, so use r6 */
    133         lis     6,VG_(machine_ppc32_has_VMX)@ha
    134         lwz     6,VG_(machine_ppc32_has_VMX)@l(6)
    135         cmplwi  6,0
    136         beq     LafterVMX1
    137 
    138 #ifdef HAS_ALTIVEC
    139         /* VRSAVE save word : 32 bytes */
    140         mfspr   6,256         /* vrsave reg is spr number 256 */
    141         stw     6,244(1)
    142 
    143         /* Alignment padding : 4 bytes */
    144 
    145         /* Vector reg save area (quadword aligned) : 192 bytes */
    146         li      6,224
    147         stvx    31,6,1
    148         li      6,208
    149         stvx    30,6,1
    150         li      6,192
    151         stvx    29,6,1
    152         li      6,176
    153         stvx    28,6,1
    154         li      6,160
    155         stvx    27,6,1
    156         li      6,144
    157         stvx    26,6,1
    158         li      6,128
    159         stvx    25,6,1
    160         li      6,112
    161         stvx    25,6,1
    162         li      6,96
    163         stvx    23,6,1
    164         li      6,80
    165         stvx    22,6,1
    166         li      6,64
    167         stvx    21,6,1
    168         li      6,48
    169         stvx    20,6,1
    170 #endif
    171 
    172 LafterVMX1:
    173 
    174         /* Save cr */
    175         mfcr    6
    176         stw     6,44(1)
    177 
    178         /* Local variable space... */
    179 
    180         /* 32(sp) used later to check FPSCR[RM] */
    181 
    182 	/* r3 holds two_words */
    183 	/* r4 holds guest_state */
    184         /* r5 holds host_addr */
    185 
    186         /* 24(sp) used later to stop ctr reg being clobbered */
    187         /* 20(sp) used later to load fpscr with zero */
    188         /* 8:16(sp) free */
    189 
    190         /* Linkage Area (reserved)
    191            4(sp)  : LR
    192            0(sp)  : back-chain
    193         */
    194 
    195         /* set host FPU control word to the default mode expected
    196            by VEX-generated code.  See comments in libvex.h for
    197            more info. */
    198         lis     6,VG_(machine_ppc32_has_FP)@ha
    199         lwz     6,VG_(machine_ppc32_has_FP)@l(6)
    200         cmplwi  6,0
    201         beq     LafterFP2
    202 
    203         /* get zero into f3 (tedious) */
    204         /* note: fsub 3,3,3 is not a reliable way to do this,
    205            since if f3 holds a NaN or similar then we don't necessarily
    206            wind up with zero. */
    207         li      6,0
    208         stw     6,20(1)
    209         lfs     3,20(1)
    210         mtfsf   0xFF,3   /* fpscr = f3 */
    211 LafterFP2:
    212 
    213         /* set host AltiVec control word to the default mode expected
    214            by VEX-generated code. */
    215         lis     6,VG_(machine_ppc32_has_VMX)@ha
    216         lwz     6,VG_(machine_ppc32_has_VMX)@l(6)
    217         cmplwi  6,0
    218         beq     LafterVMX2
    219 
    220 #ifdef HAS_ALTIVEC
    221         vspltisw 3,0x0  /* generate zero */
    222         mtvscr  3
    223 #endif
    224 
    225 LafterVMX2:
    226 
    227         /* make a stack frame for the code we are calling */
    228         stwu    1,-16(1)
    229 
    230         /* Set up the guest state ptr */
    231         mr      31,4      /* r31 (generated code gsp) = r4 */
    232 
    233         /* and jump into the code cache.  Chained translations in
    234            the code cache run, until for whatever reason, they can't
    235            continue.  When that happens, the translation in question
    236            will jump (or call) to one of the continuation points
    237            VG_(cp_...) below. */
    238         mtctr   5
    239         bctr
    240 	/*NOTREACHED*/
    241 
    242 /*----------------------------------------------------*/
    243 /*--- Postamble and exit.                          ---*/
    244 /*----------------------------------------------------*/
    245 
    246 postamble:
    247         /* At this point, r6 and r7 contain two
    248            words to be returned to the caller.  r6
    249            holds a TRC value, and r7 optionally may
    250            hold another word (for CHAIN_ME exits, the
    251            address of the place to patch.) */
    252 
    253         /* We're leaving.  Check that nobody messed with
    254            VSCR or FPSCR in ways we don't expect. */
    255         /* Using r10 - value used again further on, so don't trash! */
    256         lis     10,VG_(machine_ppc32_has_FP)@ha
    257         lwz     10,VG_(machine_ppc32_has_FP)@l(10)
    258 
    259 	/* Using r11 - value used again further on, so don't trash! */
    260         lis     11,VG_(machine_ppc32_has_VMX)@ha
    261         lwz     11,VG_(machine_ppc32_has_VMX)@l(11)
    262 
    263         cmplwi  10,0    /* Do we have FP ? */
    264         beq     LafterFP8
    265 
    266 	/* Set fpscr back to a known state, since vex-generated code
    267 	   may have messed with fpscr[rm]. */
    268         li      5,0
    269         addi    1,1,-16
    270         stw     5,0(1)
    271         lfs     3,0(1)
    272         addi    1,1,16
    273         mtfsf   0xFF,3   /* fpscr = f3 */
    274 LafterFP8:
    275 
    276         cmplwi  11,0    /* Do we have altivec? */
    277         beq     LafterVMX8
    278 
    279 #ifdef HAS_ALTIVEC
    280         /* Check VSCR[NJ] == 1 */
    281         /* first generate 4x 0x00010000 */
    282         vspltisw  4,0x1                   /* 4x 0x00000001 */
    283         vspltisw  5,0x0                   /* zero */
    284         vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
    285         /* retrieve VSCR and mask wanted bits */
    286         mfvscr    7
    287         vand      7,7,6                   /* gives NJ flag */
    288         vspltw    7,7,0x3                 /* flags-word to all lanes */
    289         vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
    290         bt        24,invariant_violation  /* branch if all_equal */
    291 #endif
    292 
    293 LafterVMX8:
    294 	/* otherwise we're OK */
    295         b       remove_frame
    296 
    297 invariant_violation:
    298         li      6,VG_TRC_INVARIANT_FAILED
    299         li      7,0
    300         /* fall through */
    301 
    302 remove_frame:
    303         /* Restore FP regs */
    304         /* r10 already holds VG_(machine_ppc32_has_FP) value */
    305         cmplwi  10,0
    306         beq     LafterFP9
    307 
    308         /* Floating-point regs */
    309         lfd     31,488(1)
    310         lfd     30,480(1)
    311         lfd     29,472(1)
    312         lfd     28,464(1)
    313         lfd     27,456(1)
    314         lfd     26,448(1)
    315         lfd     25,440(1)
    316         lfd     24,432(1)
    317         lfd     23,424(1)
    318         lfd     22,416(1)
    319         lfd     21,408(1)
    320         lfd     20,400(1)
    321         lfd     19,392(1)
    322         lfd     18,384(1)
    323         lfd     17,376(1)
    324         lfd     16,368(1)
    325         lfd     15,360(1)
    326         lfd     14,352(1)
    327 LafterFP9:
    328 
    329         /* r11 already holds VG_(machine_ppc32_has_VMX) value */
    330         cmplwi  11,0
    331         beq     LafterVMX9
    332 
    333         /* Restore Altivec regs */
    334 #ifdef HAS_ALTIVEC
    335         /* VRSAVE */
    336         lwz     4,244(1)
    337         mfspr   4,256         /* VRSAVE reg is spr number 256 */
    338 
    339         /* Vector regs */
    340         li      4,224
    341         lvx     31,4,1
    342         li      4,208
    343         lvx     30,4,1
    344         li      4,192
    345         lvx     29,4,1
    346         li      4,176
    347         lvx     28,4,1
    348         li      4,160
    349         lvx     27,4,1
    350         li      4,144
    351         lvx     26,4,1
    352         li      4,128
    353         lvx     25,4,1
    354         li      4,112
    355         lvx     24,4,1
    356         li      4,96
    357         lvx     23,4,1
    358         li      4,80
    359         lvx     22,4,1
    360         li      4,64
    361         lvx     21,4,1
    362         li      4,48
    363         lvx     20,4,1
    364 #endif
    365 LafterVMX9:
    366 
    367         /* restore int regs, including importantly r3 (two_words) */
    368         addi    1,1,16
    369         lwz     31,348(1)
    370         lwz     30,344(1)
    371         lwz     29,340(1)
    372         lwz     28,336(1)
    373         lwz     27,332(1)
    374         lwz     26,328(1)
    375         lwz     25,324(1)
    376         lwz     24,320(1)
    377         lwz     23,316(1)
    378         lwz     22,312(1)
    379         lwz     21,308(1)
    380         lwz     20,304(1)
    381         lwz     19,300(1)
    382         lwz     18,296(1)
    383         lwz     17,292(1)
    384         lwz     16,288(1)
    385         lwz     15,284(1)
    386         lwz     14,280(1)
    387         lwz     13,276(1)
    388         lwz     3,272(1)
    389         /* Stash return values */
    390         stw     6,0(3)
    391         stw     7,4(3)
    392 
    393         /* restore lr & sp, and leave */
    394         lwz     0,500(1)  /* stack_size + 4 */
    395         mtlr    0
    396         addi    1,1,496   /* stack_size */
    397         blr
    398 
    399 
    400 /*----------------------------------------------------*/
    401 /*--- Continuation points                          ---*/
    402 /*----------------------------------------------------*/
    403 
    404 /* ------ Chain me to slow entry point ------ */
    405 .global VG_(disp_cp_chain_me_to_slowEP)
    406 VG_(disp_cp_chain_me_to_slowEP):
    407         /* We got called.  The return address indicates
    408            where the patching needs to happen.  Collect
    409            the return address and, exit back to C land,
    410            handing the caller the pair (Chain_me_S, RA) */
    411         li   6, VG_TRC_CHAIN_ME_TO_SLOW_EP
    412         mflr 7
    413         /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_slowEP
    414            4 = mtctr r30
    415            4 = btctr
    416         */
    417         subi 7,7,8+4+4
    418         b    postamble
    419 
    420 /* ------ Chain me to fast entry point ------ */
    421 .global VG_(disp_cp_chain_me_to_fastEP)
    422 VG_(disp_cp_chain_me_to_fastEP):
    423         /* We got called.  The return address indicates
    424            where the patching needs to happen.  Collect
    425            the return address and, exit back to C land,
    426            handing the caller the pair (Chain_me_S, RA) */
    427         li   6, VG_TRC_CHAIN_ME_TO_FAST_EP
    428         mflr 7
    429         /* 8 = imm32-fixed2 r30, disp_cp_chain_me_to_fastEP
    430            4 = mtctr r30
    431            4 = btctr
    432         */
    433         subi 7,7,8+4+4
    434         b    postamble
    435 
    436 /* ------ Indirect but boring jump ------ */
    437 .global VG_(disp_cp_xindir)
    438 VG_(disp_cp_xindir):
    439         /* Where are we going? */
    440         lwz     3,OFFSET_ppc32_CIA(31)
    441 
    442         /* stats only */
    443         lis     5,VG_(stats__n_xindirs_32)@ha
    444         addi    5,5,VG_(stats__n_xindirs_32)@l
    445         lwz     6,0(5)
    446         addi    6,6,1
    447         stw     6,0(5)
    448 
    449         /* r5 = &VG_(tt_fast) */
    450         lis	5,VG_(tt_fast)@ha
    451         addi    5,5,VG_(tt_fast)@l   /* & VG_(tt_fast) */
    452 
    453         /* try a fast lookup in the translation cache */
    454         /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
    455               = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 3 */
    456 	rlwinm	4,3,1, 29-VG_TT_FAST_BITS, 28	/* entry# * 8 */
    457 	add	5,5,4	/* & VG_(tt_fast)[entry#] */
    458 	lwz	6,0(5)   /* .guest */
    459 	lwz	7,4(5)   /* .host */
    460         cmpw    3,6
    461         bne     fast_lookup_failed
    462 
    463         /* Found a match.  Jump to .host. */
    464         mtctr   7
    465         bctr
    466 
    467 fast_lookup_failed:
    468         /* stats only */
    469         lis     5,VG_(stats__n_xindir_misses_32)@ha
    470         addi    5,5,VG_(stats__n_xindir_misses_32)@l
    471         lwz     6,0(5)
    472         addi    6,6,1
    473         stw     6,0(5)
    474 
    475         li      6,VG_TRC_INNER_FASTMISS
    476         li      7,0
    477         b       postamble
    478 	/*NOTREACHED*/
    479 
    480 /* ------ Assisted jump ------ */
    481 .global VG_(disp_cp_xassisted)
    482 VG_(disp_cp_xassisted):
    483         /* r31 contains the TRC */
    484         mr      6,31
    485         li      7,0
    486         b       postamble
    487 
    488 /* ------ Event check failed ------ */
    489 .global VG_(disp_cp_evcheck_fail)
    490 VG_(disp_cp_evcheck_fail):
    491         li      6,VG_TRC_INNER_COUNTERZERO
    492         li      7,0
    493         b       postamble
    494 
    495 
    496 .size VG_(disp_run_translations), .-VG_(disp_run_translations)
    497 
    498 /* Let the linker know we don't need an executable stack */
    499 .section .note.GNU-stack,"",@progbits
    500 
    501 #endif // defined(VGP_ppc32_linux)
    502 
    503 /*--------------------------------------------------------------------*/
    504 /*--- end                                                          ---*/
    505 /*--------------------------------------------------------------------*/
    506