Home | History | Annotate | Download | only in m_dispatch
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- The core dispatch loop, for jumping to a code address.       ---*/
      4 /*---                                       dispatch-ppc32-linux.S ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8   This file is part of Valgrind, a dynamic binary instrumentation
      9   framework.
     10 
     11   Copyright (C) 2005-2010 Cerion Armour-Brown <cerion (at) open-works.co.uk>
     12 
     13   This program is free software; you can redistribute it and/or
     14   modify it under the terms of the GNU General Public License as
     15   published by the Free Software Foundation; either version 2 of the
     16   License, or (at your option) any later version.
     17 
     18   This program is distributed in the hope that it will be useful, but
     19   WITHOUT ANY WARRANTY; without even the implied warranty of
     20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21   General Public License for more details.
     22 
     23   You should have received a copy of the GNU General Public License
     24   along with this program; if not, write to the Free Software
     25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26   02111-1307, USA.
     27 
     28   The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #if defined(VGP_ppc32_linux)
     32 
     33 #include "config.h"
     34 #include "pub_core_basics_asm.h"
     35 #include "pub_core_dispatch_asm.h"
     36 #include "pub_core_transtab_asm.h"
     37 #include "libvex_guest_offsets.h"	/* for OFFSET_ppc32_CIA */
     38 
     39 
     40 /*------------------------------------------------------------*/
     41 /*---                                                      ---*/
     42 /*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
     43 /*--- run all translations except no-redir ones.           ---*/
     44 /*---                                                      ---*/
     45 /*------------------------------------------------------------*/
     46 
     47 /*----------------------------------------------------*/
     48 /*--- Preamble (set everything up)                 ---*/
     49 /*----------------------------------------------------*/
     50 
     51 /* signature:
     52 UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
     53 */
     54 .text
     55 .globl  VG_(run_innerloop)
     56 .type  VG_(run_innerloop), @function
     57 VG_(run_innerloop):
     58 	/* r3 holds guest_state */
     59 	/* r4 holds do_profiling */
     60 
     61         /* ----- entry point to VG_(run_innerloop) ----- */
     62         /* For Linux/ppc32 we need the SysV ABI, which uses
     63            LR->4(parent_sp), CR->anywhere.
     64            (The AIX ABI, used on Darwin,
     65            uses LR->8(prt_sp), CR->4(prt_sp))
     66         */
     67 
     68         /* Save lr */
     69         mflr    0
     70         stw     0,4(1)
     71 
     72         /* New stack frame */
     73         stwu    1,-496(1)  /* sp should maintain 16-byte alignment */
     74 
     75         /* Save callee-saved registers... */
     76 	/* r3, r4 are live here, so use r5 */
     77         lis     5,VG_(machine_ppc32_has_FP)@ha
     78         lwz     5,VG_(machine_ppc32_has_FP)@l(5)
     79         cmplwi  5,0
     80         beq     LafterFP1
     81 
     82         /* Floating-point reg save area : 144 bytes */
     83         stfd    31,488(1)
     84         stfd    30,480(1)
     85         stfd    29,472(1)
     86         stfd    28,464(1)
     87         stfd    27,456(1)
     88         stfd    26,448(1)
     89         stfd    25,440(1)
     90         stfd    24,432(1)
     91         stfd    23,424(1)
     92         stfd    22,416(1)
     93         stfd    21,408(1)
     94         stfd    20,400(1)
     95         stfd    19,392(1)
     96         stfd    18,384(1)
     97         stfd    17,376(1)
     98         stfd    16,368(1)
     99         stfd    15,360(1)
    100         stfd    14,352(1)
    101 LafterFP1:
    102 
    103         /* General reg save area : 72 bytes */
    104         stw     31,348(1)
    105         stw     30,344(1)
    106         stw     29,340(1)
    107         stw     28,336(1)
    108         stw     27,332(1)
    109         stw     26,328(1)
    110         stw     25,324(1)
    111         stw     24,320(1)
    112         stw     23,316(1)
    113         stw     22,312(1)
    114         stw     21,308(1)
    115         stw     20,304(1)
    116         stw     19,300(1)
    117         stw     18,296(1)
    118         stw     17,292(1)
    119         stw     16,288(1)
    120         stw     15,284(1)
    121         stw     14,280(1)
    122         /* Probably not necessary to save r13 (thread-specific ptr),
    123            as VEX stays clear of it... but what the hey. */
    124         stw     13,276(1)
    125 
    126         /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
    127            The Linux kernel might not actually use VRSAVE for its intended
    128            purpose, but it should be harmless to preserve anyway. */
    129 	/* r3, r4 are live here, so use r5 */
    130         lis     5,VG_(machine_ppc32_has_VMX)@ha
    131         lwz     5,VG_(machine_ppc32_has_VMX)@l(5)
    132         cmplwi  5,0
    133         beq     LafterVMX1
    134 
    135 #ifdef HAS_ALTIVEC
    136         /* VRSAVE save word : 32 bytes */
    137         mfspr   5,256         /* vrsave reg is spr number 256 */
    138         stw     5,244(1)
    139 
    140         /* Alignment padding : 4 bytes */
    141 
    142         /* Vector reg save area (quadword aligned) : 192 bytes */
    143         li      5,224
    144         stvx    31,5,1
    145         li      5,208
    146         stvx    30,5,1
    147         li      5,192
    148         stvx    29,5,1
    149         li      5,176
    150         stvx    28,5,1
    151         li      5,160
    152         stvx    27,5,1
    153         li      5,144
    154         stvx    26,5,1
    155         li      5,128
    156         stvx    25,5,1
    157         li      5,112
    158         stvx    25,5,1
    159         li      5,96
    160         stvx    23,5,1
    161         li      5,80
    162         stvx    22,5,1
    163         li      5,64
    164         stvx    21,5,1
    165         li      5,48
    166         stvx    20,5,1
    167 #endif
    168 
    169 LafterVMX1:
    170 
    171         /* Save cr */
    172         mfcr    0
    173         stw     0,44(1)
    174 
    175         /* Local variable space... */
    176 
    177         /* 32(sp) used later to check FPSCR[RM] */
    178 
    179         /* r3 holds guest_state */
    180         /* r4 holds do_profiling */
    181         mr      31,3      /* r31 (generated code gsp) = r3 */
    182         stw     3,28(1)   /* spill orig guest_state ptr */
    183 
    184         /* 24(sp) used later to stop ctr reg being clobbered */
    185         /* 20(sp) used later to load fpscr with zero */
    186         /* 8:16(sp) free */
    187 
    188         /* Linkage Area (reserved)
    189            4(sp)  : LR
    190            0(sp)  : back-chain
    191         */
    192 
    193         /* CAB TODO: Use a caller-saved reg for orig guest_state ptr
    194            - rem to set non-allocateable in isel.c */
    195 
    196         /* hold dispatch_ctr in r29 */
    197         lis     5,VG_(dispatch_ctr)@ha
    198         lwz     29,VG_(dispatch_ctr)@l(5)
    199 
    200         /* set host FPU control word to the default mode expected
    201            by VEX-generated code.  See comments in libvex.h for
    202            more info. */
    203         lis     5,VG_(machine_ppc32_has_FP)@ha
    204         lwz     5,VG_(machine_ppc32_has_FP)@l(5)
    205         cmplwi  5,0
    206         beq     LafterFP2
    207 
    208         /* get zero into f3 (tedious) */
    209         /* note: fsub 3,3,3 is not a reliable way to do this,
    210            since if f3 holds a NaN or similar then we don't necessarily
    211            wind up with zero. */
    212         li      5,0
    213         stw     5,20(1)
    214         lfs     3,20(1)
    215         mtfsf   0xFF,3   /* fpscr = f3 */
    216 LafterFP2:
    217 
    218         /* set host AltiVec control word to the default mode expected
    219            by VEX-generated code. */
    220         lis     5,VG_(machine_ppc32_has_VMX)@ha
    221         lwz     5,VG_(machine_ppc32_has_VMX)@l(5)
    222         cmplwi  5,0
    223         beq     LafterVMX2
    224 
    225 #ifdef HAS_ALTIVEC
    226         vspltisw 3,0x0  /* generate zero */
    227         mtvscr  3
    228 #endif
    229 
    230 LafterVMX2:
    231 
    232         /* make a stack frame for the code we are calling */
    233         stwu    1,-16(1)
    234 
    235         /* fetch %CIA into r3 */
    236         lwz     3,OFFSET_ppc32_CIA(31)
    237 
    238         /* fall into main loop (the right one) */
    239 	/* r4 = do_profiling.  It's probably trashed after here,
    240            but that's OK: we don't need it after here. */
    241 	cmplwi	4,0
    242 	beq	VG_(run_innerloop__dispatch_unprofiled)
    243 	b	VG_(run_innerloop__dispatch_profiled)
    244 	/*NOTREACHED*/
    245 
    246 /*----------------------------------------------------*/
    247 /*--- NO-PROFILING (standard) dispatcher           ---*/
    248 /*----------------------------------------------------*/
    249 
    250 .global	VG_(run_innerloop__dispatch_unprofiled)
    251 VG_(run_innerloop__dispatch_unprofiled):
    252 	/* At entry: Live regs:
    253 		r1  (=sp)
    254 		r3  (=CIA = next guest address)
    255 		r29 (=dispatch_ctr)
    256 		r31 (=guest_state)
    257 	   Stack state:
    258 		44(r1) (=orig guest_state)
    259 	*/
    260 	/* Has the guest state pointer been messed with?  If yes, exit.
    261            Also set up & VG_(tt_fast) early in an attempt at better
    262            scheduling. */
    263         lwz     9,44(1)              /* original guest_state ptr */
    264         lis	5,VG_(tt_fast)@ha
    265         addi    5,5,VG_(tt_fast)@l   /* & VG_(tt_fast) */
    266         cmpw    9,31
    267         bne	gsp_changed
    268 
    269         /* save the jump address in the guest state */
    270         stw     3,OFFSET_ppc32_CIA(31)
    271 
    272         /* Are we out of timeslice?  If yes, defer to scheduler. */
    273 	subi	29,29,1
    274 	cmplwi	29,0
    275         beq	counter_is_zero
    276 
    277         /* try a fast lookup in the translation cache */
    278         /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
    279               = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 3 */
    280 	rlwinm	4,3,1, 29-VG_TT_FAST_BITS, 28	/* entry# * 8 */
    281 	add	5,5,4	/* & VG_(tt_fast)[entry#] */
    282 	lwz	6,0(5)   /* .guest */
    283 	lwz	7,4(5)   /* .host */
    284         cmpw    3,6
    285         bne     fast_lookup_failed
    286 
    287         /* Found a match.  Call .host. */
    288         mtctr   7
    289         bctrl
    290 
    291         /* On return from guest code:
    292 	   r3  holds destination (original) address.
    293            r31 may be unchanged (guest_state), or may indicate further
    294            details of the control transfer requested to *r3.
    295         */
    296 	/* start over */
    297 	b	VG_(run_innerloop__dispatch_unprofiled)
    298 	/*NOTREACHED*/
    299 
    300 /*----------------------------------------------------*/
    301 /*--- PROFILING dispatcher (can be much slower)    ---*/
    302 /*----------------------------------------------------*/
    303 
    304 .global	VG_(run_innerloop__dispatch_profiled)
    305 VG_(run_innerloop__dispatch_profiled):
    306 	/* At entry: Live regs:
    307 		r1 (=sp)
    308 		r3  (=CIA = next guest address)
    309 		r29 (=dispatch_ctr)
    310 		r31 (=guest_state)
    311 	   Stack state:
    312 		44(r1) (=orig guest_state)
    313 	*/
    314 	/* Has the guest state pointer been messed with?  If yes, exit.
    315            Also set up & VG_(tt_fast) early in an attempt at better
    316            scheduling. */
    317         lwz     9,44(1)              /* original guest_state ptr */
    318         lis	5,VG_(tt_fast)@ha
    319         addi    5,5,VG_(tt_fast)@l   /* & VG_(tt_fast) */
    320         cmpw    9,31
    321         bne	gsp_changed
    322 
    323         /* save the jump address in the guest state */
    324         stw     3,OFFSET_ppc32_CIA(31)
    325 
    326         /* Are we out of timeslice?  If yes, defer to scheduler. */
    327 	subi	29,29,1
    328 	cmplwi	29,0
    329         beq	counter_is_zero
    330 
    331         /* try a fast lookup in the translation cache */
    332         /* r4 = VG_TT_FAST_HASH(addr)           * sizeof(FastCacheEntry)
    333               = ((r3 >>u 2) & VG_TT_FAST_MASK)  << 3 */
    334 	rlwinm	4,3,1, 29-VG_TT_FAST_BITS, 28	/* entry# * 8 */
    335 	add	5,5,4	/* & VG_(tt_fast)[entry#] */
    336 	lwz	6,0(5)   /* .guest */
    337 	lwz	7,4(5)   /* .host */
    338         cmpw    3,6
    339         bne     fast_lookup_failed
    340 
    341         /* increment bb profile counter */
    342 	srwi	4,4,1	/* entry# * sizeof(UInt*) */
    343         addis   6,4,VG_(tt_fastN)@ha
    344         lwz     9,VG_(tt_fastN)@l(6)
    345         lwz     8,0(9)
    346         addi    8,8,1
    347         stw     8,0(9)
    348 
    349         /* Found a match.  Call .host. */
    350         mtctr   7
    351         bctrl
    352 
    353         /* On return from guest code:
    354 	   r3  holds destination (original) address.
    355            r31 may be unchanged (guest_state), or may indicate further
    356            details of the control transfer requested to *r3.
    357         */
    358 	/* start over */
    359 	b	VG_(run_innerloop__dispatch_profiled)
    360 	/*NOTREACHED*/
    361 
    362 /*----------------------------------------------------*/
    363 /*--- exit points                                  ---*/
    364 /*----------------------------------------------------*/
    365 
    366 gsp_changed:
    367 	/* Someone messed with the gsp (in r31).  Have to
    368            defer to scheduler to resolve this.  dispatch ctr
    369 	   is not yet decremented, so no need to increment. */
    370 	/* %CIA is NOT up to date here.  First, need to write
    371 	   %r3 back to %CIA, but without trashing %r31 since
    372 	   that holds the value we want to return to the scheduler.
    373 	   Hence use %r5 transiently for the guest state pointer. */
    374         lwz     5,44(1)         /* original guest_state ptr */
    375         stw     3,OFFSET_ppc32_CIA(5)
    376 	mr	3,31		/* r3 = new gsp value */
    377 	b	run_innerloop_exit
    378 	/*NOTREACHED*/
    379 
    380 counter_is_zero:
    381 	/* %CIA is up to date */
    382 	/* back out decrement of the dispatch counter */
    383         addi    29,29,1
    384         li      3,VG_TRC_INNER_COUNTERZERO
    385         b       run_innerloop_exit
    386 
    387 fast_lookup_failed:
    388 	/* %CIA is up to date */
    389 	/* back out decrement of the dispatch counter */
    390         addi    29,29,1
    391         li      3,VG_TRC_INNER_FASTMISS
    392 	b       run_innerloop_exit
    393 
    394 
    395 
    396 /* All exits from the dispatcher go through here.
    397    r3 holds the return value.
    398 */
    399 run_innerloop_exit:
    400         /* We're leaving.  Check that nobody messed with
    401            VSCR or FPSCR. */
    402 
    403         /* Using r10 - value used again further on, so don't trash! */
    404         lis     10,VG_(machine_ppc32_has_FP)@ha
    405         lwz     10,VG_(machine_ppc32_has_FP)@l(10)
    406         cmplwi  10,0
    407         beq     LafterFP8
    408 
    409 	/* Set fpscr back to a known state, since vex-generated code
    410 	   may have messed with fpscr[rm]. */
    411         li      5,0
    412         addi    1,1,-16
    413         stw     5,0(1)
    414         lfs     3,0(1)
    415         addi    1,1,16
    416         mtfsf   0xFF,3   /* fpscr = f3 */
    417 LafterFP8:
    418 
    419 	/* Using r11 - value used again further on, so don't trash! */
    420         lis     11,VG_(machine_ppc32_has_VMX)@ha
    421         lwz     11,VG_(machine_ppc32_has_VMX)@l(11)
    422         cmplwi  11,0
    423         beq     LafterVMX8
    424 
    425 #ifdef HAS_ALTIVEC
    426         /* Check VSCR[NJ] == 1 */
    427         /* first generate 4x 0x00010000 */
    428         vspltisw  4,0x1                   /* 4x 0x00000001 */
    429         vspltisw  5,0x0                   /* zero */
    430         vsldoi    6,4,5,0x2               /* <<2*8 => 4x 0x00010000 */
    431         /* retrieve VSCR and mask wanted bits */
    432         mfvscr    7
    433         vand      7,7,6                   /* gives NJ flag */
    434         vspltw    7,7,0x3                 /* flags-word to all lanes */
    435         vcmpequw. 8,6,7                   /* CR[24] = 1 if v6 == v7 */
    436         bt        24,invariant_violation  /* branch if all_equal */
    437 #endif
    438 LafterVMX8:
    439 
    440 	/* otherwise we're OK */
    441         b       run_innerloop_exit_REALLY
    442 
    443 
    444 invariant_violation:
    445         li      3,VG_TRC_INVARIANT_FAILED
    446         b       run_innerloop_exit_REALLY
    447 
    448 run_innerloop_exit_REALLY:
    449         /* r3 holds VG_TRC_* value to return */
    450 
    451         /* Return to parent stack */
    452         addi    1,1,16
    453 
    454         /* Write ctr to VG(dispatch_ctr) */
    455         lis     5,VG_(dispatch_ctr)@ha
    456         stw     29,VG_(dispatch_ctr)@l(5)
    457 
    458         /* Restore cr */
    459         lwz     0,44(1)
    460         mtcr    0
    461 
    462         /* Restore callee-saved registers... */
    463 
    464         /* r10 already holds VG_(machine_ppc32_has_FP) value */
    465         cmplwi  10,0
    466         beq     LafterFP9
    467 
    468         /* Floating-point regs */
    469         lfd     31,488(1)
    470         lfd     30,480(1)
    471         lfd     29,472(1)
    472         lfd     28,464(1)
    473         lfd     27,456(1)
    474         lfd     26,448(1)
    475         lfd     25,440(1)
    476         lfd     24,432(1)
    477         lfd     23,424(1)
    478         lfd     22,416(1)
    479         lfd     21,408(1)
    480         lfd     20,400(1)
    481         lfd     19,392(1)
    482         lfd     18,384(1)
    483         lfd     17,376(1)
    484         lfd     16,368(1)
    485         lfd     15,360(1)
    486         lfd     14,352(1)
    487 LafterFP9:
    488 
    489         /* General regs */
    490         lwz     31,348(1)
    491         lwz     30,344(1)
    492         lwz     29,340(1)
    493         lwz     28,336(1)
    494         lwz     27,332(1)
    495         lwz     26,328(1)
    496         lwz     25,324(1)
    497         lwz     24,320(1)
    498         lwz     23,316(1)
    499         lwz     22,312(1)
    500         lwz     21,308(1)
    501         lwz     20,304(1)
    502         lwz     19,300(1)
    503         lwz     18,296(1)
    504         lwz     17,292(1)
    505         lwz     16,288(1)
    506         lwz     15,284(1)
    507         lwz     14,280(1)
    508         lwz     13,276(1)
    509 
    510         /* r11 already holds VG_(machine_ppc32_has_VMX) value */
    511         cmplwi  11,0
    512         beq     LafterVMX9
    513 
    514 #ifdef HAS_ALTIVEC
    515         /* VRSAVE */
    516         lwz     4,244(1)
    517         mfspr   4,256         /* VRSAVE reg is spr number 256 */
    518 
    519         /* Vector regs */
    520         li      4,224
    521         lvx     31,4,1
    522         li      4,208
    523         lvx     30,4,1
    524         li      4,192
    525         lvx     29,4,1
    526         li      4,176
    527         lvx     28,4,1
    528         li      4,160
    529         lvx     27,4,1
    530         li      4,144
    531         lvx     26,4,1
    532         li      4,128
    533         lvx     25,4,1
    534         li      4,112
    535         lvx     24,4,1
    536         li      4,96
    537         lvx     23,4,1
    538         li      4,80
    539         lvx     22,4,1
    540         li      4,64
    541         lvx     21,4,1
    542         li      4,48
    543         lvx     20,4,1
    544 #endif
    545 LafterVMX9:
    546 
    547         /* reset lr & sp */
    548         lwz     0,500(1)  /* stack_size + 4 */
    549         mtlr    0
    550         addi    1,1,496   /* stack_size */
    551         blr
    552 .size VG_(run_innerloop), .-VG_(run_innerloop)
    553 
    554 
    555 /*------------------------------------------------------------*/
    556 /*---                                                      ---*/
    557 /*--- A special dispatcher, for running no-redir           ---*/
    558 /*--- translations.  Just runs the given translation once. ---*/
    559 /*---                                                      ---*/
    560 /*------------------------------------------------------------*/
    561 
    562 /* signature:
    563 void VG_(run_a_noredir_translation) ( UWord* argblock );
    564 */
    565 
    566 /* Run a no-redir translation.  argblock points to 4 UWords, 2 to carry args
    567    and 2 to carry results:
    568       0: input:  ptr to translation
    569       1: input:  ptr to guest state
    570       2: output: next guest PC
    571       3: output: guest state pointer afterwards (== thread return code)
    572 */
    573 .global VG_(run_a_noredir_translation)
    574 .type VG_(run_a_noredir_translation), @function
    575 VG_(run_a_noredir_translation):
    576 	/* save callee-save int regs, & lr */
    577 	stwu 1,-256(1)
    578 	stw  14,128(1)
    579 	stw  15,132(1)
    580 	stw  16,136(1)
    581 	stw  17,140(1)
    582 	stw  18,144(1)
    583 	stw  19,148(1)
    584 	stw  20,152(1)
    585 	stw  21,156(1)
    586 	stw  22,160(1)
    587 	stw  23,164(1)
    588 	stw  24,168(1)
    589 	stw  25,172(1)
    590 	stw  26,176(1)
    591 	stw  27,180(1)
    592 	stw  28,184(1)
    593 	stw  29,188(1)
    594 	stw  30,192(1)
    595 	stw  31,196(1)
    596 	mflr 31
    597 	stw  31,200(1)
    598 
    599 	stw  3,204(1)
    600 	lwz  31,4(3)
    601 	lwz  30,0(3)
    602 	mtlr 30
    603 	blrl
    604 
    605 	lwz  4,204(1)
    606 	stw  3,  8(4)
    607 	stw  31,12(4)
    608 
    609 	lwz  14,128(1)
    610 	lwz  15,132(1)
    611 	lwz  16,136(1)
    612 	lwz  17,140(1)
    613 	lwz  18,144(1)
    614 	lwz  19,148(1)
    615 	lwz  20,152(1)
    616 	lwz  21,156(1)
    617 	lwz  22,160(1)
    618 	lwz  23,164(1)
    619 	lwz  24,168(1)
    620 	lwz  25,172(1)
    621 	lwz  26,176(1)
    622 	lwz  27,180(1)
    623 	lwz  28,184(1)
    624 	lwz  29,188(1)
    625 	lwz  30,192(1)
    626 	lwz  31,200(1)
    627 	mtlr 31
    628 	lwz  31,196(1)
    629 	addi 1,1,256
    630 	blr
    631 .size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
    632 
    633 
    634 /* Let the linker know we don't need an executable stack */
    635 .section .note.GNU-stack,"",@progbits
    636 
    637 #endif // defined(VGP_ppc32_linux)
    638 
    639 /*--------------------------------------------------------------------*/
    640 /*--- end                                                          ---*/
    641 /*--------------------------------------------------------------------*/
    642