Home | History | Annotate | Download | only in m_dispatch
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- The core dispatch loop, for jumping to a code address.       ---*/
      4 /*---                                       dispatch-amd64-linux.S ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8   This file is part of Valgrind, a dynamic binary instrumentation
      9   framework.
     10 
     11   Copyright (C) 2000-2011 Julian Seward
     12      jseward (at) acm.org
     13 
     14   This program is free software; you can redistribute it and/or
     15   modify it under the terms of the GNU General Public License as
     16   published by the Free Software Foundation; either version 2 of the
     17   License, or (at your option) any later version.
     18 
     19   This program is distributed in the hope that it will be useful, but
     20   WITHOUT ANY WARRANTY; without even the implied warranty of
     21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22   General Public License for more details.
     23 
     24   You should have received a copy of the GNU General Public License
     25   along with this program; if not, write to the Free Software
     26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27   02111-1307, USA.
     28 
     29   The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #if defined(VGP_amd64_linux)
     33 
     34 #include "pub_core_basics_asm.h"
     35 #include "pub_core_dispatch_asm.h"
     36 #include "pub_core_transtab_asm.h"
     37 #include "libvex_guest_offsets.h"	/* for OFFSET_amd64_RIP */
     38 
     39 
     40 /*------------------------------------------------------------*/
     41 /*---                                                      ---*/
     42 /*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
     43 /*--- run all translations except no-redir ones.           ---*/
     44 /*---                                                      ---*/
     45 /*------------------------------------------------------------*/
     46 
     47 /*----------------------------------------------------*/
     48 /*--- Preamble (set everything up)                 ---*/
     49 /*----------------------------------------------------*/
     50 
     51 /* signature:
     52 UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
     53 */
     54 
     55 .text
     56 .globl VG_(run_innerloop)
     57 .type  VG_(run_innerloop), @function
     58 VG_(run_innerloop):
     59 	/* %rdi holds guest_state */
     60 	/* %rsi holds do_profiling */
     61 
     62 	/* ----- entry point to VG_(run_innerloop) ----- */
     63 	pushq	%rbx
     64 	pushq	%rcx
     65 	pushq	%rdx
     66 	pushq	%rsi
     67 	pushq	%rbp
     68 	pushq	%r8
     69 	pushq	%r9
     70 	pushq	%r10
     71 	pushq	%r11
     72 	pushq	%r12
     73 	pushq	%r13
     74 	pushq	%r14
     75 	pushq	%r15
     76 	pushq	%rdi  /* guest_state */
     77 
     78 	movq	VG_(dispatch_ctr)@GOTPCREL(%rip), %r15
     79 	movl	(%r15), %r15d
     80 	pushq	%r15
     81 
     82 	/* 8(%rsp) holds cached copy of guest_state ptr */
     83 	/* 0(%rsp) holds cached copy of VG_(dispatch_ctr) */
     84 
     85 	/* Set up the guest state pointer */
     86 	movq	%rdi, %rbp
     87 
     88 	/* fetch %RIP into %rax */
     89 	movq	OFFSET_amd64_RIP(%rbp), %rax
     90 
     91 	/* set host FPU control word to the default mode expected
     92            by VEX-generated code.  See comments in libvex.h for
     93            more info. */
     94 	finit
     95 	pushq	$0x027F
     96 	fldcw	(%rsp)
     97 	addq	$8, %rsp
     98 
     99 	/* set host SSE control word to the default mode expected
    100 	   by VEX-generated code. */
    101 	pushq	$0x1F80
    102 	ldmxcsr	(%rsp)
    103 	addq	$8, %rsp
    104 
    105 	/* set dir flag to known value */
    106 	cld
    107 
    108 	/* fall into main loop  (the right one) */
    109 	cmpq	$0, %rsi
    110 	je	VG_(run_innerloop__dispatch_unassisted_unprofiled)
    111 	jmp	VG_(run_innerloop__dispatch_unassisted_profiled)
    112 	/*NOTREACHED*/
    113 
    114 /*----------------------------------------------------*/
    115 /*--- NO-PROFILING (standard) dispatcher           ---*/
    116 /*----------------------------------------------------*/
    117 
    118 .align	16
    119 .global	VG_(run_innerloop__dispatch_unassisted_unprofiled)
    120 VG_(run_innerloop__dispatch_unassisted_unprofiled):
    121 	/* AT ENTRY: %rax is next guest addr, %rbp is the
    122            unmodified guest state ptr */
    123 
    124 	/* save the jump address in the guest state */
    125 	movq	%rax, OFFSET_amd64_RIP(%rbp)
    126 
    127 	/* Are we out of timeslice?  If yes, defer to scheduler. */
    128 	subl	$1, 0(%rsp)
    129 	jz	counter_is_zero
    130 
    131 	/* try a fast lookup in the translation cache */
    132 	movabsq $VG_(tt_fast), %rcx
    133 	movq	%rax, %rbx		/* next guest addr */
    134 	andq	$VG_TT_FAST_MASK, %rbx	/* entry# */
    135 	shlq	$4, %rbx		/* entry# * sizeof(FastCacheEntry) */
    136 	movq	0(%rcx,%rbx,1), %r10	/* .guest */
    137 	movq	8(%rcx,%rbx,1), %r11	/* .host */
    138 	cmpq	%rax, %r10
    139 	jnz	fast_lookup_failed
    140 
    141         /* Found a match.  Jump to .host. */
    142 	jmp 	*%r11
    143 	ud2	/* persuade insn decoders not to speculate past here */
    144 	/* generated code should run, then jump back to either
    145 	   VG_(run_innerloop__dispatch_unassisted_unprofiled)
    146 	   VG_(run_innerloop__dispatch_assisted_unprofiled). */
    147 	/*NOTREACHED*/
    148 
    149 .align	16
    150 .global	VG_(run_innerloop__dispatch_assisted_unprofiled)
    151 VG_(run_innerloop__dispatch_assisted_unprofiled):
    152 	/* AT ENTRY: %rax is next guest addr, %rbp is the
    153            modified guest state ptr */
    154 	/* We know the guest state pointer has been modified.
    155 	   So jump directly to gsp_changed. */
    156 	jmp	gsp_changed
    157 	ud2
    158 	/*NOTREACHED*/
    159 
    160 /*----------------------------------------------------*/
    161 /*--- PROFILING dispatcher (can be much slower)    ---*/
    162 /*----------------------------------------------------*/
    163 
    164 .align	16
    165 .global	VG_(run_innerloop__dispatch_unassisted_profiled)
    166 VG_(run_innerloop__dispatch_unassisted_profiled):
    167 	/* AT ENTRY: %rax is next guest addr, %rbp is the
    168            unmodified guest state ptr */
    169 
    170 	/* save the jump address in the guest state */
    171 	movq	%rax, OFFSET_amd64_RIP(%rbp)
    172 
    173 	/* Are we out of timeslice?  If yes, defer to scheduler. */
    174 	subl	$1, 0(%rsp)
    175 	jz	counter_is_zero
    176 
    177 	/* try a fast lookup in the translation cache */
    178 	movabsq $VG_(tt_fast), %rcx
    179 	movq	%rax, %rbx
    180 	andq	$VG_TT_FAST_MASK, %rbx	/* entry# */
    181 	shlq	$4, %rbx		/* entry# * sizeof(FastCacheEntry) */
    182 	movq	0(%rcx,%rbx,1), %r10	/* .guest */
    183 	movq	8(%rcx,%rbx,1), %r11	/* .host */
    184 	cmpq	%rax, %r10
    185 	jnz	fast_lookup_failed
    186 
    187 	/* increment bb profile counter */
    188 	movabsq	$VG_(tt_fastN), %rdx
    189 	shrq	$1, %rbx		/* entry# * sizeof(UInt*) */
    190 	movq	(%rdx,%rbx,1), %rdx
    191 	addl	$1, (%rdx)
    192 
    193         /* Found a match.  Jump to .host. */
    194 	jmp 	*%r11
    195 	ud2	/* persuade insn decoders not to speculate past here */
    196 	/* generated code should run, then jump back to either
    197 	   VG_(run_innerloop__dispatch_unassisted_profiled)
    198 	   VG_(run_innerloop__dispatch_assisted_profiled). */
    199 	/*NOTREACHED*/
    200 
    201 .align	16
    202 .global	VG_(run_innerloop__dispatch_assisted_profiled)
    203 VG_(run_innerloop__dispatch_assisted_profiled):
    204 	/* AT ENTRY: %rax is next guest addr, %rbp is the
    205            modified guest state ptr */
    206 
    207 	/* Well, we know the guest state pointer has been modified.
    208 	   So jump directly to gsp_changed. */
    209 	jmp	gsp_changed
    210 	ud2
    211 	/*NOTREACHED*/
    212 
    213 /*----------------------------------------------------*/
    214 /*--- exit points                                  ---*/
    215 /*----------------------------------------------------*/
    216 
    217 gsp_changed:
    218 	/* Someone messed with the gsp.  Have to
    219            defer to scheduler to resolve this.  dispatch ctr
    220 	   is not yet decremented, so no need to increment. */
    221 	/* %RIP is NOT up to date here.  First, need to write
    222 	   %rax back to %RIP, but without trashing %rbp since
    223 	   that holds the value we want to return to the scheduler.
    224 	   Hence use %r15 transiently for the guest state pointer. */
    225 	movq	8(%rsp), %r15
    226 	movq	%rax, OFFSET_amd64_RIP(%r15)
    227 	movq	%rbp, %rax
    228 	jmp	run_innerloop_exit
    229 	/*NOTREACHED*/
    230 
    231 counter_is_zero:
    232 	/* %RIP is up to date here */
    233 	/* back out decrement of the dispatch counter */
    234 	addl	$1, 0(%rsp)
    235 	movq	$VG_TRC_INNER_COUNTERZERO, %rax
    236 	jmp	run_innerloop_exit
    237 
    238 fast_lookup_failed:
    239 	/* %RIP is up to date here */
    240 	/* back out decrement of the dispatch counter */
    241 	addl	$1, 0(%rsp)
    242 	movq	$VG_TRC_INNER_FASTMISS, %rax
    243 	jmp	run_innerloop_exit
    244 
    245 
    246 
    247 /* All exits from the dispatcher go through here.  %rax holds
    248    the return value.
    249 */
    250 run_innerloop_exit:
    251 	/* We're leaving.  Check that nobody messed with
    252            %mxcsr or %fpucw.  We can't mess with %rax here as it
    253 	   holds the tentative return value, but any other is OK. */
    254 #if !defined(ENABLE_INNER)
    255         /* This check fails for self-hosting, so skip in that case */
    256 	pushq	$0
    257 	fstcw	(%rsp)
    258 	cmpl	$0x027F, (%rsp)
    259 	popq	%r15 /* get rid of the word without trashing %eflags */
    260 	jnz	invariant_violation
    261 #endif
    262 	pushq	$0
    263 	stmxcsr	(%rsp)
    264 	andl	$0xFFFFFFC0, (%rsp)  /* mask out status flags */
    265 	cmpl	$0x1F80, (%rsp)
    266 	popq	%r15
    267 	jnz	invariant_violation
    268 	/* otherwise we're OK */
    269 	jmp	run_innerloop_exit_REALLY
    270 
    271 invariant_violation:
    272 	movq	$VG_TRC_INVARIANT_FAILED, %rax
    273 	jmp	run_innerloop_exit_REALLY
    274 
    275 run_innerloop_exit_REALLY:
    276 
    277 	/* restore VG_(dispatch_ctr) */
    278 	popq	%r14
    279 	movq	VG_(dispatch_ctr)@GOTPCREL(%rip), %r15
    280 	movl	%r14d, (%r15)
    281 
    282 	popq	%rdi
    283 	popq	%r15
    284 	popq	%r14
    285 	popq	%r13
    286 	popq	%r12
    287 	popq	%r11
    288 	popq	%r10
    289 	popq	%r9
    290 	popq	%r8
    291 	popq	%rbp
    292 	popq	%rsi
    293 	popq	%rdx
    294 	popq	%rcx
    295 	popq	%rbx
    296 	ret
    297 .size VG_(run_innerloop), .-VG_(run_innerloop)
    298 
    299 
    300 /*------------------------------------------------------------*/
    301 /*---                                                      ---*/
    302 /*--- A special dispatcher, for running no-redir           ---*/
    303 /*--- translations.  Just runs the given translation once. ---*/
    304 /*---                                                      ---*/
    305 /*------------------------------------------------------------*/
    306 
    307 /* signature:
    308 void VG_(run_a_noredir_translation) ( UWord* argblock );
    309 */
    310 
    311 /* Run a no-redir translation.  argblock points to 4 UWords, 2 to carry args
    312    and 2 to carry results:
    313       0: input:  ptr to translation
    314       1: input:  ptr to guest state
    315       2: output: next guest PC
    316       3: output: guest state pointer afterwards (== thread return code)
    317 */
    318 .align 16
    319 .global VG_(run_a_noredir_translation)
    320 .type VG_(run_a_noredir_translation), @function
    321 VG_(run_a_noredir_translation):
    322 	/* Save callee-saves regs */
    323 	pushq %rbx
    324 	pushq %rbp
    325 	pushq %r12
    326 	pushq %r13
    327 	pushq %r14
    328 	pushq %r15
    329 
    330 	pushq %rdi  /* we will need it after running the translation */
    331 	movq 8(%rdi), %rbp
    332 	jmp *0(%rdi)
    333 	/*NOTREACHED*/
    334 	ud2
    335 	/* If the translation has been correctly constructed, we
    336 	should resume at the the following label. */
    337 .global VG_(run_a_noredir_translation__return_point)
    338 VG_(run_a_noredir_translation__return_point):
    339 	popq %rdi
    340 	movq %rax, 16(%rdi)
    341 	movq %rbp, 24(%rdi)
    342 
    343 	popq  %r15
    344 	popq  %r14
    345 	popq  %r13
    346 	popq  %r12
    347 	popq  %rbp
    348 	popq  %rbx
    349 	ret
    350 .size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
    351 
    352 /* Let the linker know we don't need an executable stack */
    353 .section .note.GNU-stack,"",@progbits
    354 
    355 #endif // defined(VGP_amd64_linux)
    356 
    357 /*--------------------------------------------------------------------*/
    358 /*--- end                                                          ---*/
    359 /*--------------------------------------------------------------------*/
    360