Home | History | Annotate | Download | only in out
      1 /*
      2  * This file was generated automatically by gen-template.py for 'mips'.
      3  *
      4  * --> DO NOT EDIT <--
      5  */
      6 
      7 /* File: mips/header.S */
      8 /*
      9  * Copyright (C) 2008 The Android Open Source Project
     10  *
     11  * Licensed under the Apache License, Version 2.0 (the "License");
     12  * you may not use this file except in compliance with the License.
     13  * You may obtain a copy of the License at
     14  *
     15  *      http://www.apache.org/licenses/LICENSE-2.0
     16  *
     17  * Unless required by applicable law or agreed to in writing, software
     18  * distributed under the License is distributed on an "AS IS" BASIS,
     19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     20  * See the License for the specific language governing permissions and
     21  * limitations under the License.
     22  */
     23 
     24 #if defined(WITH_JIT)
     25 
     26 /*
     27  * This is a #include, not a %include, because we want the C pre-processor
     28  * to expand the macros into assembler assignment statements.
     29  */
     30 #include "../../../mterp/common/asm-constants.h"
     31 #include "../../../mterp/common/mips-defines.h"
     32 #include "../../../mterp/common/jit-config.h"
     33 #include <asm/regdef.h>
     34 #include <asm/fpregdef.h>
     35 
     36 #ifdef	__mips_hard_float
     37 #define		HARD_FLOAT
     38 #else
     39 #define		SOFT_FLOAT
     40 #endif
     41 
     42 /* MIPS definitions and declarations
     43 
     44    reg	nick		purpose
     45    s0	rPC		interpreted program counter, used for fetching instructions
     46    s1	rFP		interpreted frame pointer, used for accessing locals and args
     47    s2	rSELF		pointer to thread
     48    s3	rIBASE		interpreted instruction base pointer, used for computed goto
     49    s4	rINST		first 16-bit code unit of current instruction
     50 */
     51 
     52 /* register offsets */
     53 #define r_ZERO      0
     54 #define r_AT        1
     55 #define r_V0        2
     56 #define r_V1        3
     57 #define r_A0        4
     58 #define r_A1        5
     59 #define r_A2        6
     60 #define r_A3        7
     61 #define r_T0        8
     62 #define r_T1        9
     63 #define r_T2        10
     64 #define r_T3        11
     65 #define r_T4        12
     66 #define r_T5        13
     67 #define r_T6        14
     68 #define r_T7        15
     69 #define r_S0        16
     70 #define r_S1        17
     71 #define r_S2        18
     72 #define r_S3        19
     73 #define r_S4        20
     74 #define r_S5        21
     75 #define r_S6        22
     76 #define r_S7        23
     77 #define r_T8        24
     78 #define r_T9        25
     79 #define r_K0        26
     80 #define r_K1        27
     81 #define r_GP        28
     82 #define r_SP        29
     83 #define r_FP        30
     84 #define r_RA        31
     85 #define r_F0        32
     86 #define r_F1        33
     87 #define r_F2        34
     88 #define r_F3        35
     89 #define r_F4        36
     90 #define r_F5        37
     91 #define r_F6        38
     92 #define r_F7        39
     93 #define r_F8        40
     94 #define r_F9        41
     95 #define r_F10       42
     96 #define r_F11       43
     97 #define r_F12       44
     98 #define r_F13       45
     99 #define r_F14       46
    100 #define r_F15       47
    101 #define r_F16       48
    102 #define r_F17       49
    103 #define r_F18       50
    104 #define r_F19       51
    105 #define r_F20       52
    106 #define r_F21       53
    107 #define r_F22       54
    108 #define r_F23       55
    109 #define r_F24       56
    110 #define r_F25       57
    111 #define r_F26       58
    112 #define r_F27       59
    113 #define r_F28       60
    114 #define r_F29       61
    115 #define r_F30       62
    116 #define r_F31       63
    117 
    118 /* single-purpose registers, given names for clarity */
    119 #define rPC	s0
    120 #define rFP	s1
    121 #define rSELF	s2
    122 #define rIBASE	s3
    123 #define rINST	s4
    124 #define rOBJ	s5
    125 #define rBIX	s6
    126 #define rTEMP	s7
    127 
    128 /* The long arguments sent to function calls in Big-endian mode should be register
    129 swapped when sent to functions in little endian mode. In other words long variable
    130 sent as a0(MSW), a1(LSW) for a function call in LE mode should be sent as a1, a0 in
    131 Big Endian mode */
    132 
    133 #ifdef HAVE_LITTLE_ENDIAN
    134 #define rARG0     a0
    135 #define rARG1     a1
    136 #define rARG2     a2
    137 #define rARG3     a3
    138 #define rRESULT0  v0
    139 #define rRESULT1  v1
    140 #else
    141 #define rARG0     a1
    142 #define rARG1     a0
    143 #define rARG2     a3
    144 #define rARG3     a2
    145 #define rRESULT0  v1
    146 #define rRESULT1  v0
    147 #endif
    148 
    149 
    150 /* save/restore the PC and/or FP from the thread struct */
    151 #define LOAD_PC_FROM_SELF()	lw	rPC, offThread_pc(rSELF)
    152 #define SAVE_PC_TO_SELF()	sw	rPC, offThread_pc(rSELF)
    153 #define LOAD_FP_FROM_SELF()	lw	rFP, offThread_curFrame(rSELF)
    154 #define SAVE_FP_TO_SELF()	sw	rFP, offThread_curFrame(rSELF)
    155 
    156 #define EXPORT_PC() \
    157 	sw	rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    158 
    159 #define SAVEAREA_FROM_FP(rd, _fpreg) \
    160 	subu	rd, _fpreg, sizeofStackSaveArea
    161 
    162 #define FETCH_INST()			lhu	rINST, (rPC)
    163 
    164 #define FETCH_ADVANCE_INST(_count)	lhu     rINST, (_count*2)(rPC); \
    165 					addu	rPC, rPC, (_count * 2)
    166 
    167 #define FETCH_ADVANCE_INST_RB(rd)	addu	rPC, rPC, rd;	\
    168 					lhu     rINST, (rPC)
    169 
    170 #define FETCH(rd, _count)		lhu	rd, (_count * 2)(rPC)
    171 #define FETCH_S(rd, _count)		lh	rd, (_count * 2)(rPC)
    172 
    173 #ifdef HAVE_LITTLE_ENDIAN
    174 
    175 #define FETCH_B(rd, _count)            lbu     rd, (_count * 2)(rPC)
    176 #define FETCH_C(rd, _count)            lbu     rd, (_count * 2 + 1)(rPC)
    177 
    178 #else
    179 
    180 #define FETCH_B(rd, _count)            lbu     rd, (_count * 2 + 1)(rPC)
    181 #define FETCH_C(rd, _count)            lbu     rd, (_count * 2)(rPC)
    182 
    183 #endif
    184 
    185 #define GET_INST_OPCODE(rd)		and	rd, rINST, 0xFF
    186 
    187 #define GOTO_OPCODE(rd)			sll  rd, rd, -1000;	\
    188 					addu rd, rIBASE, rd;	\
    189 					jr  rd
    190 
    191 
    192 #define LOAD(rd, rbase)			lw  rd, 0(rbase)
    193 #define LOAD_F(rd, rbase)		l.s rd, (rbase)
    194 #define STORE(rd, rbase)		sw  rd, 0(rbase)
    195 #define STORE_F(rd, rbase)		s.s rd, (rbase)
    196 
    197 #define GET_VREG(rd, rix)		LOAD_eas2(rd,rFP,rix)
    198 
    199 #define GET_VREG_F(rd, rix)		EAS2(AT, rFP, rix);		\
    200 					.set noat;  l.s rd, (AT); .set at
    201 
    202 #define SET_VREG(rd, rix)		STORE_eas2(rd, rFP, rix)
    203 
    204 #define SET_VREG_GOTO(rd, rix, dst)	.set noreorder;		\
    205 					sll  dst, dst, -1000;	\
    206 					addu dst, rIBASE, dst;			\
    207 					sll  t8, rix, 2;	\
    208 					addu t8, t8, rFP;	\
    209 					jr  dst;		\
    210 					sw  rd, 0(t8);		\
    211 					.set reorder
    212 
    213 #define SET_VREG_F(rd, rix)		EAS2(AT, rFP, rix);		\
    214 					.set noat;  s.s	rd, (AT); .set at
    215 
    216 
    217 #define GET_OPA(rd)			srl     rd, rINST, 8
    218 #ifndef		MIPS32R2
    219 #define GET_OPA4(rd)			GET_OPA(rd);  and  rd, 0xf
    220 #else
    221 #define GET_OPA4(rd)			ext	rd, rd, 8, 4
    222 #endif
    223 #define GET_OPB(rd)			srl     rd, rINST, 12
    224 
    225 #define LOAD_rSELF_OFF(rd,off)		lw    rd, offThread_##off##(rSELF)
    226 
    227 #define LOAD_rSELF_method(rd)		LOAD_rSELF_OFF(rd, method)
    228 #define LOAD_rSELF_methodClassDex(rd)	LOAD_rSELF_OFF(rd, methodClassDex)
    229 #define LOAD_rSELF_interpStackEnd(rd)	LOAD_rSELF_OFF(rd, interpStackEnd)
    230 #define LOAD_rSELF_retval(rd)		LOAD_rSELF_OFF(rd, retval)
    231 #define LOAD_rSELF_pActiveProfilers(rd)	LOAD_rSELF_OFF(rd, pActiveProfilers)
    232 #define LOAD_rSELF_bailPtr(rd)		LOAD_rSELF_OFF(rd, bailPtr)
    233 
    234 #define GET_JIT_PROF_TABLE(rd)		LOAD_rSELF_OFF(rd,pJitProfTable)
    235 #define GET_JIT_THRESHOLD(rd)		LOAD_rSELF_OFF(rd,jitThreshold)
    236 
    237 /*
    238  * Form an Effective Address rd = rbase + roff<<n;
    239  * Uses reg AT
    240  */
    241 #define EASN(rd,rbase,roff,rshift)	.set noat;		\
    242 					sll  AT, roff, rshift;	\
    243 					addu rd, rbase, AT;	\
    244 					.set at
    245 
    246 #define EAS1(rd,rbase,roff)		EASN(rd,rbase,roff,1)
    247 #define EAS2(rd,rbase,roff)		EASN(rd,rbase,roff,2)
    248 #define EAS3(rd,rbase,roff)		EASN(rd,rbase,roff,3)
    249 #define EAS4(rd,rbase,roff)		EASN(rd,rbase,roff,4)
    250 
    251 /*
    252  * Form an Effective Shift Right rd = rbase + roff>>n;
    253  * Uses reg AT
    254  */
    255 #define ESRN(rd,rbase,roff,rshift)	.set noat;		\
    256 					srl  AT, roff, rshift;	\
    257 					addu rd, rbase, AT;	\
    258 					.set at
    259 
    260 #define LOAD_eas2(rd,rbase,roff)	EAS2(AT, rbase, roff);  \
    261 					.set noat;  lw  rd, 0(AT); .set at
    262 
    263 #define STORE_eas2(rd,rbase,roff)	EAS2(AT, rbase, roff);  \
    264 					.set noat;  sw  rd, 0(AT); .set at
    265 
    266 #define LOAD_RB_OFF(rd,rbase,off)	lw	rd, off(rbase)
    267 #define LOADu2_RB_OFF(rd,rbase,off)	lhu	rd, off(rbase)
    268 #define STORE_RB_OFF(rd,rbase,off)	sw	rd, off(rbase)
    269 
    270 #ifdef HAVE_LITTLE_ENDIAN
    271 
    272 #define STORE64_off(rlo,rhi,rbase,off)	        sw	rlo, off(rbase);	\
    273 					        sw	rhi, (off+4)(rbase)
    274 #define LOAD64_off(rlo,rhi,rbase,off)	        lw	rlo, off(rbase);	\
    275 					        lw	rhi, (off+4)(rbase)
    276 
    277 #define STORE64_off_F(rlo,rhi,rbase,off)	s.s	rlo, off(rbase);	\
    278 						s.s	rhi, (off+4)(rbase)
    279 #define LOAD64_off_F(rlo,rhi,rbase,off)		l.s	rlo, off(rbase);	\
    280 						l.s	rhi, (off+4)(rbase)
    281 #else
    282 
    283 #define STORE64_off(rlo,rhi,rbase,off)	        sw	rlo, (off+4)(rbase);	\
    284 					        sw	rhi, (off)(rbase)
    285 #define LOAD64_off(rlo,rhi,rbase,off)	        lw	rlo, (off+4)(rbase);	\
    286 					        lw	rhi, (off)(rbase)
    287 #define STORE64_off_F(rlo,rhi,rbase,off)	s.s	rlo, (off+4)(rbase);	\
    288 						s.s	rhi, (off)(rbase)
    289 #define LOAD64_off_F(rlo,rhi,rbase,off)		l.s	rlo, (off+4)(rbase);	\
    290 						l.s	rhi, (off)(rbase)
    291 #endif
    292 
    293 #define STORE64(rlo,rhi,rbase)		STORE64_off(rlo,rhi,rbase,0)
    294 #define LOAD64(rlo,rhi,rbase)		LOAD64_off(rlo,rhi,rbase,0)
    295 
    296 #define STORE64_F(rlo,rhi,rbase)	STORE64_off_F(rlo,rhi,rbase,0)
    297 #define LOAD64_F(rlo,rhi,rbase)		LOAD64_off_F(rlo,rhi,rbase,0)
    298 
    299 #define STORE64_lo(rd,rbase)		sw	rd, 0(rbase)
    300 #define STORE64_hi(rd,rbase)		sw	rd, 4(rbase)
    301 
    302 
    303 #define LOAD_offThread_exception(rd,rbase)		LOAD_RB_OFF(rd,rbase,offThread_exception)
    304 #define LOAD_base_offArrayObject_length(rd,rbase)	LOAD_RB_OFF(rd,rbase,offArrayObject_length)
    305 #define LOAD_base_offClassObject_accessFlags(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_accessFlags)
    306 #define LOAD_base_offClassObject_descriptor(rd,rbase)   LOAD_RB_OFF(rd,rbase,offClassObject_descriptor)
    307 #define LOAD_base_offClassObject_super(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_super)
    308 
    309 #define LOAD_base_offClassObject_vtable(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_vtable)
    310 #define LOAD_base_offClassObject_vtableCount(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_vtableCount)
    311 #define LOAD_base_offDvmDex_pResClasses(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResClasses)
    312 #define LOAD_base_offDvmDex_pResFields(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResFields)
    313 
    314 #define LOAD_base_offDvmDex_pResMethods(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResMethods)
    315 #define LOAD_base_offDvmDex_pResStrings(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResStrings)
    316 #define LOAD_base_offInstField_byteOffset(rd,rbase)	LOAD_RB_OFF(rd,rbase,offInstField_byteOffset)
    317 #define LOAD_base_offStaticField_value(rd,rbase)	LOAD_RB_OFF(rd,rbase,offStaticField_value)
    318 #define LOAD_base_offMethod_clazz(rd,rbase)		LOAD_RB_OFF(rd,rbase,offMethod_clazz)
    319 
    320 #define LOAD_base_offMethod_name(rd,rbase)		LOAD_RB_OFF(rd,rbase,offMethod_name)
    321 #define LOAD_base_offObject_clazz(rd,rbase)		LOAD_RB_OFF(rd,rbase,offObject_clazz)
    322 
    323 #define LOADu2_offMethod_methodIndex(rd,rbase)		LOADu2_RB_OFF(rd,rbase,offMethod_methodIndex)
    324 
    325 
    326 #define STORE_offThread_exception(rd,rbase)		STORE_RB_OFF(rd,rbase,offThread_exception)
    327 
    328 
    329 #define	STACK_STORE(rd,off)	sw   rd, off(sp)
    330 #define	STACK_LOAD(rd,off)	lw   rd, off(sp)
    331 #define CREATE_STACK(n)	 	subu sp, sp, n
    332 #define DELETE_STACK(n)	 	addu sp, sp, n
    333 
    334 #define SAVE_RA(offset)	 	STACK_STORE(ra, offset)
    335 #define LOAD_RA(offset)	 	STACK_LOAD(ra, offset)
    336 
    337 #define LOAD_ADDR(dest,addr)	la   dest, addr
    338 #define LOAD_IMM(dest, imm)	li   dest, imm
    339 #define MOVE_REG(dest,src)	move dest, src
    340 #define	RETURN			jr   ra
    341 #define	STACK_SIZE		128
    342 
    343 #define STACK_OFFSET_ARG04	16
    344 #define STACK_OFFSET_GP		84
    345 #define STACK_OFFSET_rFP	112
    346 
    347 /* This directive will make sure all subsequent jal restore gp at a known offset */
    348         .cprestore STACK_OFFSET_GP
    349 
    350 #define JAL(func)		move rTEMP, ra;				\
    351 				jal  func;				\
    352 				move ra, rTEMP
    353 
    354 #define JALR(reg)		move rTEMP, ra;				\
    355 				jalr ra, reg;				\
    356 				move ra, rTEMP
    357 
    358 #define BAL(n)			bal  n
    359 
    360 #define	STACK_STORE_RA()  	CREATE_STACK(STACK_SIZE);		\
    361 				STACK_STORE(gp, STACK_OFFSET_GP);	\
    362 				STACK_STORE(ra, 124)
    363 
    364 #define	STACK_STORE_S0()  	STACK_STORE_RA();			\
    365 				STACK_STORE(s0, 116)
    366 
    367 #define	STACK_STORE_S0S1()  	STACK_STORE_S0();			\
    368 				STACK_STORE(s1, STACK_OFFSET_rFP)
    369 
    370 #define	STACK_LOAD_RA()		STACK_LOAD(ra, 124);			\
    371 				STACK_LOAD(gp, STACK_OFFSET_GP);	\
    372 				DELETE_STACK(STACK_SIZE)
    373 
    374 #define	STACK_LOAD_S0()  	STACK_LOAD(s0, 116);			\
    375 				STACK_LOAD_RA()
    376 
    377 #define	STACK_LOAD_S0S1()  	STACK_LOAD(s1, STACK_OFFSET_rFP);	\
    378 				STACK_LOAD_S0()
    379 
    380 #define STACK_STORE_FULL()	CREATE_STACK(STACK_SIZE);	\
    381 				STACK_STORE(ra, 124);		\
    382 				STACK_STORE(fp, 120);		\
    383 				STACK_STORE(s0, 116);		\
    384 				STACK_STORE(s1, STACK_OFFSET_rFP);	\
    385 				STACK_STORE(s2, 108);		\
    386 				STACK_STORE(s3, 104);		\
    387 				STACK_STORE(s4, 100);		\
    388 				STACK_STORE(s5, 96);		\
    389 				STACK_STORE(s6, 92);		\
    390 				STACK_STORE(s7, 88);
    391 
    392 #define STACK_LOAD_FULL()	STACK_LOAD(gp, STACK_OFFSET_GP);	\
    393 				STACK_LOAD(s7, 88);	\
    394 				STACK_LOAD(s6, 92);	\
    395 				STACK_LOAD(s5, 96);	\
    396 				STACK_LOAD(s4, 100);	\
    397 				STACK_LOAD(s3, 104);	\
    398 				STACK_LOAD(s2, 108);	\
    399 				STACK_LOAD(s1, STACK_OFFSET_rFP);	\
    400 				STACK_LOAD(s0, 116);	\
    401 				STACK_LOAD(fp, 120);	\
    402 				STACK_LOAD(ra, 124);	\
    403 				DELETE_STACK(STACK_SIZE)
    404 
    405 /*
    406  * first 8 words are reserved for function calls
    407  * Maximum offset is STACK_OFFSET_SCRMX-STACK_OFFSET_SCR
    408  */
    409 #define STACK_OFFSET_SCR   32
    410 #define SCRATCH_STORE(r,off) \
    411     STACK_STORE(r, STACK_OFFSET_SCR+off);
    412 #define SCRATCH_LOAD(r,off) \
    413     STACK_LOAD(r, STACK_OFFSET_SCR+off);
    414 
    415 /* File: mips/platform.S */
    416 /*
    417  * ===========================================================================
    418  *  CPU-version-specific defines and utility
    419  * ===========================================================================
    420  */
    421 
    422 
    423 
    424     .global dvmCompilerTemplateStart
    425     .type   dvmCompilerTemplateStart, %function
    426     .section .data.rel.ro
    427 
    428 dvmCompilerTemplateStart:
    429 
    430 /* ------------------------------ */
    431     .balign 4
    432     .global dvmCompiler_TEMPLATE_CMP_LONG
    433 dvmCompiler_TEMPLATE_CMP_LONG:
    434 /* File: mips/TEMPLATE_CMP_LONG.S */
    435     /*
    436      * Compare two 64-bit values
    437      *    x = y     return  0
    438      *    x < y     return -1
    439      *    x > y     return  1
    440      *
    441      * I think I can improve on the ARM code by the following observation
    442      *    slt   t0,  x.hi, y.hi;        # (x.hi < y.hi) ? 1:0
    443      *    sgt   t1,  x.hi, y.hi;        # (y.hi > x.hi) ? 1:0
    444      *    subu  v0, t0, t1              # v0= -1:1:0 for [ < > = ]
    445      *
    446      * This code assumes the register pair ordering will depend on endianess (a1:a0 or a0:a1).
    447      *    a1:a0 => vBB
    448      *    a3:a2 => vCC
    449      */
    450     /* cmp-long vAA, vBB, vCC */
    451     slt    t0, rARG1, rARG3             # compare hi
    452     sgt    t1, rARG1, rARG3
    453     subu   v0, t1, t0                   # v0<- (-1,1,0)
    454     bnez   v0, .LTEMPLATE_CMP_LONG_finish
    455                                         # at this point x.hi==y.hi
    456     sltu   t0, rARG0, rARG2             # compare lo
    457     sgtu   t1, rARG0, rARG2
    458     subu   v0, t1, t0                   # v0<- (-1,1,0) for [< > =]
    459 .LTEMPLATE_CMP_LONG_finish:
    460     RETURN
    461 
    462 /* ------------------------------ */
    463     .balign 4
    464     .global dvmCompiler_TEMPLATE_RETURN
    465 dvmCompiler_TEMPLATE_RETURN:
    466 /* File: mips/TEMPLATE_RETURN.S */
    467     /*
    468      * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
    469      * If the stored value in returnAddr
    470      * is non-zero, the caller is compiled by the JIT thus return to the
    471      * address in the code cache following the invoke instruction. Otherwise
    472      * return to the special dvmJitToInterpNoChain entry point.
    473      */
    474 #if defined(TEMPLATE_INLINE_PROFILING)
    475     # preserve a0-a2 and ra
    476     SCRATCH_STORE(a0, 0)
    477     SCRATCH_STORE(a1, 4)
    478     SCRATCH_STORE(a2, 8)
    479     SCRATCH_STORE(ra, 12)
    480 
    481     # a0=rSELF
    482     move    a0, rSELF
    483     la      t9, dvmFastMethodTraceExit
    484     JALR(t9)
    485     lw      gp, STACK_OFFSET_GP(sp)
    486 
    487     # restore a0-a2 and ra
    488     SCRATCH_LOAD(ra, 12)
    489     SCRATCH_LOAD(a2, 8)
    490     SCRATCH_LOAD(a1, 4)
    491     SCRATCH_LOAD(a0, 0)
    492 #endif
    493     SAVEAREA_FROM_FP(a0, rFP)           # a0<- saveArea (old)
    494     lw      t0, offStackSaveArea_prevFrame(a0)     # t0<- saveArea->prevFrame
    495     lbu     t1, offThread_breakFlags(rSELF)        # t1<- breakFlags
    496     lw      rPC, offStackSaveArea_savedPc(a0)      # rPC<- saveArea->savedPc
    497 #if !defined(WITH_SELF_VERIFICATION)
    498     lw      t2,  offStackSaveArea_returnAddr(a0)   # t2<- chaining cell ret
    499 #else
    500     move    t2, zero                               # disable chaining
    501 #endif
    502     lw      a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
    503                                                    # a2<- method we're returning to
    504 #if !defined(WITH_SELF_VERIFICATION)
    505     beq     a2, zero, 1f                           # bail to interpreter
    506 #else
    507     bne     a2, zero, 2f
    508     JALR(ra)                                       # punt to interpreter and compare state
    509     # DOUG: assume this does not return ???
    510 2:
    511 #endif
    512     la      t4, .LdvmJitToInterpNoChainNoProfile   # defined in footer.S
    513     lw      a1, (t4)
    514     move    rFP, t0                                # publish new FP
    515     beq     a2, zero, 4f
    516     lw      t0, offMethod_clazz(a2)                # t0<- method->clazz
    517 4:
    518 
    519     sw      a2, offThread_method(rSELF)            # self->method = newSave->method
    520     lw      a0, offClassObject_pDvmDex(t0)         # a0<- method->clazz->pDvmDex
    521     sw      rFP, offThread_curFrame(rSELF)         # self->curFrame = fp
    522     add     rPC, rPC, 3*2                          # publish new rPC
    523     sw      a0, offThread_methodClassDex(rSELF)
    524     movn    t2, zero, t1                           # check the breadFlags and
    525                                                    # clear the chaining cell address
    526     sw      t2, offThread_inJitCodeCache(rSELF)    # in code cache or not
    527     beq     t2, zero, 3f                           # chaining cell exists?
    528     JALR(t2)                                       # jump to the chaining cell
    529     # DOUG: assume this does not return ???
    530 3:
    531 #if defined(WITH_JIT_TUNING)
    532     li      a0, kCallsiteInterpreted
    533 #endif
    534     j       a1                                     # callsite is interpreted
    535 1:
    536     sw      zero, offThread_inJitCodeCache(rSELF)  # reset inJitCodeCache
    537     SAVE_PC_TO_SELF()                              # SAVE_PC_FP_TO_SELF()
    538     SAVE_FP_TO_SELF()
    539     la      t4, .LdvmMterpStdBail                  # defined in footer.S
    540     lw      a2, (t4)
    541     move    a0, rSELF                              # Expecting rSELF in a0
    542     JALR(a2)                                       # exit the interpreter
    543     # DOUG: assume this does not return ???
    544 
    545 /* ------------------------------ */
    546     .balign 4
    547     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
    548 dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
    549 /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
    550     /*
    551      * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
    552      * into rPC then jump to dvmJitToInterpNoChain to dispatch the
    553      * runtime-resolved callee.
    554      */
    555     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
    556     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
    557     lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
    558     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
    559     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
    560     move   a3, a1                                 # a3<- returnCell
    561     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
    562     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
    563     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
    564     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
    565     sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
    566     sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
    567     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
    568     RETURN                                        # return to raise stack overflow excep.
    569 
    570 1:
    571     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
    572     lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
    573     lw     t0, offMethod_accessFlags(a0)          # t0<- methodToCall->accessFlags
    574     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    575     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
    576     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
    577 
    578     # set up newSaveArea
    579     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
    580     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
    581     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
    582     beqz   t8, 2f                                 # breakFlags != 0
    583     RETURN                                        # bail to the interpreter
    584 
    585 2:
    586     and    t6, t0, ACC_NATIVE
    587     beqz   t6, 3f
    588 #if !defined(WITH_SELF_VERIFICATION)
    589     j      .LinvokeNative
    590 #else
    591     RETURN                                        # bail to the interpreter
    592 #endif
    593 
    594 3:
    595     # continue executing the next instruction through the interpreter
    596     la     t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
    597     lw     rTEMP, (t0)
    598     lw     a3, offClassObject_pDvmDex(t9)         # a3<- method->clazz->pDvmDex
    599 
    600     # Update "thread" values for the new method
    601     sw     a0, offThread_method(rSELF)            # self->method = methodToCall
    602     sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
    603     move   rFP, a1                                # fp = newFp
    604     sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
    605 #if defined(TEMPLATE_INLINE_PROFILING)
    606     # preserve a0-a3
    607     SCRATCH_STORE(a0, 0)
    608     SCRATCH_STORE(a1, 4)
    609     SCRATCH_STORE(a2, 8)
    610     SCRATCH_STORE(a3, 12)
    611 
    612     # a0=methodToCall, a1=rSELF
    613     move   a1, rSELF
    614     la     t9, dvmFastMethodTraceEnter
    615     JALR(t9)
    616     lw     gp, STACK_OFFSET_GP(sp)
    617 
    618     # restore a0-a3
    619     SCRATCH_LOAD(a3, 12)
    620     SCRATCH_LOAD(a2, 8)
    621     SCRATCH_LOAD(a1, 4)
    622     SCRATCH_LOAD(a0, 0)
    623 #endif
    624 
    625     # Start executing the callee
    626 #if defined(WITH_JIT_TUNING)
    627     li     a0, kInlineCacheMiss
    628 #endif
    629     jr     rTEMP                                  # dvmJitToInterpTraceSelectNoChain
    630 
    631 /* ------------------------------ */
    632     .balign 4
    633     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
    634 dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
    635 /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
    636     /*
    637      * For monomorphic callsite, setup the Dalvik frame and return to the
    638      * Thumb code through the link register to transfer control to the callee
    639      * method through a dedicated chaining cell.
    640      */
    641     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
    642     # methodToCall is guaranteed to be non-native
    643 .LinvokeChain:
    644     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
    645     lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
    646     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
    647     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
    648     move   a3, a1                                 # a3<- returnCell
    649     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
    650     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
    651     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
    652     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
    653     add    t2, ra, 8                              # setup the punt-to-interp address
    654                                                   # 8 bytes skips branch and delay slot
    655     sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
    656     sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
    657     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
    658     jr     t2                                     # return to raise stack overflow excep.
    659 
    660 1:
    661     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
    662     lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
    663     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    664     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
    665     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
    666 
    667     # set up newSaveArea
    668     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
    669     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
    670     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
    671     beqz   t8, 2f                                 # breakFlags != 0
    672     jr     t2                                     # bail to the interpreter
    673 
    674 2:
    675     lw     a3, offClassObject_pDvmDex(t9)         # a3<- methodToCall->clazz->pDvmDex
    676 
    677     # Update "thread" values for the new method
    678     sw     a0, offThread_method(rSELF)            # self->method = methodToCall
    679     sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
    680     move   rFP, a1                                # fp = newFp
    681     sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
    682 #if defined(TEMPLATE_INLINE_PROFILING)
    683     # preserve a0-a2 and ra
    684     SCRATCH_STORE(a0, 0)
    685     SCRATCH_STORE(a1, 4)
    686     SCRATCH_STORE(a2, 8)
    687     SCRATCH_STORE(ra, 12)
    688 
    689     move   a1, rSELF
    690     # a0=methodToCall, a1=rSELF
    691     la     t9, dvmFastMethodTraceEnter
    692     jalr   t9
    693     lw     gp, STACK_OFFSET_GP(sp)
    694 
    695     # restore a0-a2 and ra
    696     SCRATCH_LOAD(ra, 12)
    697     SCRATCH_LOAD(a2, 8)
    698     SCRATCH_LOAD(a1, 4)
    699     SCRATCH_LOAD(a0, 0)
    700 #endif
    701     RETURN                                        # return to the callee-chaining cell
    702 
    703 /* ------------------------------ */
    704     .balign 4
    705     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
    706 dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
    707 /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
    708     /*
    709      * For polymorphic callsite, check whether the cached class pointer matches
    710      * the current one. If so setup the Dalvik frame and return to the
    711      * Thumb code through the link register to transfer control to the callee
    712      * method through a dedicated chaining cell.
    713      *
    714      * The predicted chaining cell is declared in ArmLIR.h with the
    715      * following layout:
    716      *
    717      *  typedef struct PredictedChainingCell {
    718      *      u4 branch;
    719      *      u4 delay_slot;
    720      *      const ClassObject *clazz;
    721      *      const Method *method;
    722      *      u4 counter;
    723      *  } PredictedChainingCell;
    724      *
    725      * Upon returning to the callsite:
    726      *    - lr   : to branch to the chaining cell
    727      *    - lr+8 : to punt to the interpreter
    728      *    - lr+16: to fully resolve the callee and may rechain.
    729      *             a3 <- class
    730      */
    731     # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
    732     lw      a3, offObject_clazz(a0)     # a3 <- this->class
    733     lw      rIBASE, 8(a2)                   # t0 <- predictedChainCell->clazz
    734     lw      a0, 12(a2)                  # a0 <- predictedChainCell->method
    735     lw      t1, offThread_icRechainCount(rSELF)    # t1 <- shared rechainCount
    736 
    737 #if defined(WITH_JIT_TUNING)
    738     la      rINST, .LdvmICHitCount
    739     #add     t2, t2, 1
    740     bne    a3, rIBASE, 1f
    741     nop
    742     lw      t2, 0(rINST)
    743     add     t2, t2, 1
    744     sw      t2, 0(rINST)
    745 1:
    746     #add     t2, t2, 1
    747 #endif
    748     beq     a3, rIBASE, .LinvokeChain       # branch if predicted chain is valid
    749     lw      rINST, offClassObject_vtable(a3)     # rINST <- this->class->vtable
    750     beqz    rIBASE, 2f                      # initialized class or not
    751     sub     a1, t1, 1                   # count--
    752     sw      a1, offThread_icRechainCount(rSELF)   # write back to InterpState
    753     b       3f
    754 2:
    755     move    a1, zero
    756 3:
    757     add     ra, ra, 16                  # return to fully-resolve landing pad
    758     /*
    759      * a1 <- count
    760      * a2 <- &predictedChainCell
    761      * a3 <- this->class
    762      * rPC <- dPC
    763      * rINST <- this->class->vtable
    764      */
    765     RETURN
    766 
    767 /* ------------------------------ */
    768     .balign 4
    769     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
    770 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
    771 /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
    772     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
    773     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
    774     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
    775     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
    776     move   a3, a1                                 # a3<- returnCell
    777     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
    778     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
    779     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
    780     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
    781     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
    782     RETURN                                        # return to raise stack overflow excep.
    783 
    784 1:
    785     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
    786     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    787     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
    788     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
    789 
    790     # set up newSaveArea
    791     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
    792     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
    793     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
    794     lw     rTEMP, offMethod_nativeFunc(a0)        # t9<- method->nativeFunc
    795 #if !defined(WITH_SELF_VERIFICATION)
    796     beqz   t8, 2f                                 # breakFlags != 0
    797     RETURN                                        # bail to the interpreter
    798 2:
    799 #else
    800     RETURN                                        # bail to the interpreter unconditionally
    801 #endif
    802 
    803     # go ahead and transfer control to the native code
    804     lw     t6, offThread_jniLocal_topCookie(rSELF)  # t6<- thread->localRef->...
    805     sw     a1, offThread_curFrame(rSELF)          # self->curFrame = newFp
    806     sw     zero, offThread_inJitCodeCache(rSELF)  # not in the jit code cache
    807     sw     t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
    808                                                   # newFp->localRefCookie=top
    809     SAVEAREA_FROM_FP(rBIX, a1)                    # rBIX<- new stack save area
    810     move   a2, a0                                 # a2<- methodToCall
    811     move   a0, a1                                 # a0<- newFp
    812     add    a1, rSELF, offThread_retval            # a1<- &retval
    813     move   a3, rSELF                              # a3<- self
    814 #if defined(TEMPLATE_INLINE_PROFILING)
    815     # a2: methodToCall
    816     # preserve a0-a3
    817     SCRATCH_STORE(a0, 0)
    818     SCRATCH_STORE(a1, 4)
    819     SCRATCH_STORE(a2, 8)
    820     SCRATCH_STORE(a3, 12)
    821 
    822     move   a0, a2
    823     move   a1, rSELF
    824     # a0=JNIMethod, a1=rSELF
    825     la      t9, dvmFastMethodTraceEnter
    826     JALR(t9)                                      # off to the native code
    827     lw     gp, STACK_OFFSET_GP(sp)
    828 
    829     # restore a0-a3
    830     SCRATCH_LOAD(a3, 12)
    831     SCRATCH_LOAD(a2, 8)
    832     SCRATCH_LOAD(a1, 4)
    833     SCRATCH_LOAD(a0, 0)
    834 
    835     move   rOBJ, a2                               # save a2
    836 #endif
    837 
    838     JALR(rTEMP)                                   # off to the native code
    839     lw     gp, STACK_OFFSET_GP(sp)
    840 
    841 #if defined(TEMPLATE_INLINE_PROFILING)
    842     move   a0, rOBJ
    843     move   a1, rSELF
    844     # a0=JNIMethod, a1=rSELF
    845     la      t9, dvmFastNativeMethodTraceExit
    846     JALR(t9)
    847     lw     gp, STACK_OFFSET_GP(sp)
    848 #endif
    849 
    850     # native return; rBIX=newSaveArea
    851     # equivalent to dvmPopJniLocals
    852     lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
    853     lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
    854     lw     a1, offThread_exception(rSELF)            # check for exception
    855     sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
    856     sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
    857     lw     a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    858 
    859     # a0 = dalvikCallsitePC
    860     bnez   a1, .LhandleException                     # handle exception if any
    861 
    862     sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
    863     beqz   a2, 3f
    864     jr     a2                                        # go if return chaining cell still exist
    865 
    866 3:
    867     # continue executing the next instruction through the interpreter
    868     la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
    869     lw     a1, (a1)
    870     add    rPC, a0, 3*2                              # reconstruct new rPC (advance 3 dalvik instr)
    871 
    872 #if defined(WITH_JIT_TUNING)
    873     li     a0, kCallsiteInterpreted
    874 #endif
    875     jr     a1
    876 
    877 /* ------------------------------ */
    878     .balign 4
    879     .global dvmCompiler_TEMPLATE_MUL_LONG
    880 dvmCompiler_TEMPLATE_MUL_LONG:
    881 /* File: mips/TEMPLATE_MUL_LONG.S */
    882     /*
    883      * Signed 64-bit integer multiply.
    884      *
    885      * For JIT: op1 in a0/a1, op2 in a2/a3, return in v0/v1
    886      *
    887      * Consider WXxYZ (a1a0 x a3a2) with a long multiply:
    888      *
    889      *         a1   a0
    890      *   x     a3   a2
    891      *   -------------
    892      *       a2a1 a2a0
    893      *       a3a0
    894      *  a3a1 (<= unused)
    895      *  ---------------
    896      *         v1   v0
    897      *
    898      */
    899     /* mul-long vAA, vBB, vCC */
    900     mul     rRESULT1,rARG3,rARG0              #  v1= a3a0
    901     multu   rARG2,rARG0
    902     mfhi    t1
    903     mflo    rRESULT0                          #  v0= a2a0
    904     mul     t0,rARG2,rARG1                    #  t0= a2a1
    905     addu    rRESULT1,rRESULT1,t1              #  v1= a3a0 + hi(a2a0)
    906     addu    rRESULT1,rRESULT1,t0              #  v1= a3a0 + hi(a2a0) + a2a1;
    907     RETURN
    908 
    909 /* ------------------------------ */
    910     .balign 4
    911     .global dvmCompiler_TEMPLATE_SHL_LONG
    912 dvmCompiler_TEMPLATE_SHL_LONG:
    913 /* File: mips/TEMPLATE_SHL_LONG.S */
    914     /*
    915      * Long integer shift.  This is different from the generic 32/64-bit
    916      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    917      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    918      * 6 bits.
    919      */
    920     /* shl-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
    921     sll     rRESULT0, rARG0, a2		#  rlo<- alo << (shift&31)
    922     not     rRESULT1, a2		#  rhi<- 31-shift  (shift is 5b)
    923     srl     rARG0, 1
    924     srl     rARG0, rRESULT1		#  alo<- alo >> (32-(shift&31))
    925     sll     rRESULT1, rARG1, a2		#  rhi<- ahi << (shift&31)
    926     or      rRESULT1, rARG0		#  rhi<- rhi | alo
    927     andi    a2, 0x20			#  shift< shift & 0x20
    928     movn    rRESULT1, rRESULT0, a2	#  rhi<- rlo (if shift&0x20)
    929     movn    rRESULT0, zero, a2		#  rlo<- 0  (if shift&0x20)
    930     RETURN
    931 
    932 /* ------------------------------ */
    933     .balign 4
    934     .global dvmCompiler_TEMPLATE_SHR_LONG
    935 dvmCompiler_TEMPLATE_SHR_LONG:
    936 /* File: mips/TEMPLATE_SHR_LONG.S */
    937     /*
    938      * Long integer shift.  This is different from the generic 32/64-bit
    939      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    940      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    941      * 6 bits.
    942      */
    943     /* shr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
    944     sra     rRESULT1, rARG1, a2		#  rhi<- ahi >> (shift&31)
    945     srl     rRESULT0, rARG0, a2		#  rlo<- alo >> (shift&31)
    946     sra     a3, rARG1, 31		#  a3<- sign(ah)
    947     not     rARG0, a2			#  alo<- 31-shift (shift is 5b)
    948     sll     rARG1, 1
    949     sll     rARG1, rARG0		#  ahi<- ahi << (32-(shift&31))
    950     or      rRESULT0, rARG1		#  rlo<- rlo | ahi
    951     andi    a2, 0x20			#  shift & 0x20
    952     movn    rRESULT0, rRESULT1, a2	#  rlo<- rhi (if shift&0x20)
    953     movn    rRESULT1, a3, a2		#  rhi<- sign(ahi) (if shift&0x20)
    954     RETURN
    955 
    956 /* ------------------------------ */
    957     .balign 4
    958     .global dvmCompiler_TEMPLATE_USHR_LONG
    959 dvmCompiler_TEMPLATE_USHR_LONG:
    960 /* File: mips/TEMPLATE_USHR_LONG.S */
    961     /*
    962      * Long integer shift.  This is different from the generic 32/64-bit
    963      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    964      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    965      * 6 bits.
    966      */
    967     /* ushr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
    968     srl     rRESULT1, rARG1, a2		#  rhi<- ahi >> (shift&31)
    969     srl     rRESULT0, rARG0, a2		#  rlo<- alo >> (shift&31)
    970     not     rARG0, a2			#  alo<- 31-n  (shift is 5b)
    971     sll     rARG1, 1
    972     sll     rARG1, rARG0		#  ahi<- ahi << (32-(shift&31))
    973     or      rRESULT0, rARG1		#  rlo<- rlo | ahi
    974     andi    a2, 0x20			#  shift & 0x20
    975     movn    rRESULT0, rRESULT1, a2	#  rlo<- rhi (if shift&0x20)
    976     movn    rRESULT1, zero, a2		#  rhi<- 0 (if shift&0x20)
    977     RETURN
    978 
    979 /* ------------------------------ */
    980     .balign 4
    981     .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
    982 dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
    983 /* File: mips/TEMPLATE_ADD_FLOAT_VFP.S */
    984 /* File: mips/fbinop.S */
    985     /*
    986      * Generic 32-bit binary float operation. a0 = a1 op a2.
    987      *
    988      * For: add-fp, sub-fp, mul-fp, div-fp
    989      *
    990      * On entry:
    991      *     a0 = target dalvik register address
    992      *     a1 = op1 address
    993      *     a2 = op2 address
    994      *
    995      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
    996      *
    997      */
    998     move rOBJ, a0                       # save a0
    999 #ifdef  SOFT_FLOAT
   1000     LOAD(a0, a1)                        # a0<- vBB
   1001     LOAD(a1, a2)                        # a1<- vCC
   1002     .if 0
   1003     beqz    a1, common_errDivideByZero  # is second operand zero?
   1004     .endif
   1005                                # optional op
   1006     JAL(__addsf3)                              # v0 = result
   1007     STORE(v0, rOBJ)                     # vAA <- v0
   1008 #else
   1009     LOAD_F(fa0, a1)                     # fa0<- vBB
   1010     LOAD_F(fa1, a2)                     # fa1<- vCC
   1011     .if 0
   1012     # is second operand zero?
   1013     li.s        ft0, 0
   1014     c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
   1015     bc1t        fcc0, common_errDivideByZero
   1016     .endif
   1017                                # optional op
   1018     add.s fv0, fa0, fa1                            # fv0 = result
   1019     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1020 #endif
   1021     RETURN
   1022 
   1023 
   1024 /* ------------------------------ */
   1025     .balign 4
   1026     .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
   1027 dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
   1028 /* File: mips/TEMPLATE_SUB_FLOAT_VFP.S */
   1029 /* File: mips/fbinop.S */
   1030     /*
   1031      * Generic 32-bit binary float operation. a0 = a1 op a2.
   1032      *
   1033      * For: add-fp, sub-fp, mul-fp, div-fp
   1034      *
   1035      * On entry:
   1036      *     a0 = target dalvik register address
   1037      *     a1 = op1 address
   1038      *     a2 = op2 address
   1039      *
   1040      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1041      *
   1042      */
   1043     move rOBJ, a0                       # save a0
   1044 #ifdef  SOFT_FLOAT
   1045     LOAD(a0, a1)                        # a0<- vBB
   1046     LOAD(a1, a2)                        # a1<- vCC
   1047     .if 0
   1048     beqz    a1, common_errDivideByZero  # is second operand zero?
   1049     .endif
   1050                                # optional op
   1051     JAL(__subsf3)                              # v0 = result
   1052     STORE(v0, rOBJ)                     # vAA <- v0
   1053 #else
   1054     LOAD_F(fa0, a1)                     # fa0<- vBB
   1055     LOAD_F(fa1, a2)                     # fa1<- vCC
   1056     .if 0
   1057     # is second operand zero?
   1058     li.s        ft0, 0
   1059     c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
   1060     bc1t        fcc0, common_errDivideByZero
   1061     .endif
   1062                                # optional op
   1063     sub.s fv0, fa0, fa1                            # fv0 = result
   1064     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1065 #endif
   1066     RETURN
   1067 
   1068 
   1069 /* ------------------------------ */
   1070     .balign 4
   1071     .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
   1072 dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
   1073 /* File: mips/TEMPLATE_MUL_FLOAT_VFP.S */
   1074 /* File: mips/fbinop.S */
   1075     /*
   1076      * Generic 32-bit binary float operation. a0 = a1 op a2.
   1077      *
   1078      * For: add-fp, sub-fp, mul-fp, div-fp
   1079      *
   1080      * On entry:
   1081      *     a0 = target dalvik register address
   1082      *     a1 = op1 address
   1083      *     a2 = op2 address
   1084      *
   1085      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1086      *
   1087      */
   1088     move rOBJ, a0                       # save a0
   1089 #ifdef  SOFT_FLOAT
   1090     LOAD(a0, a1)                        # a0<- vBB
   1091     LOAD(a1, a2)                        # a1<- vCC
   1092     .if 0
   1093     beqz    a1, common_errDivideByZero  # is second operand zero?
   1094     .endif
   1095                                # optional op
   1096     JAL(__mulsf3)                              # v0 = result
   1097     STORE(v0, rOBJ)                     # vAA <- v0
   1098 #else
   1099     LOAD_F(fa0, a1)                     # fa0<- vBB
   1100     LOAD_F(fa1, a2)                     # fa1<- vCC
   1101     .if 0
   1102     # is second operand zero?
   1103     li.s        ft0, 0
   1104     c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
   1105     bc1t        fcc0, common_errDivideByZero
   1106     .endif
   1107                                # optional op
   1108     mul.s fv0, fa0, fa1                            # fv0 = result
   1109     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1110 #endif
   1111     RETURN
   1112 
   1113 
   1114 /* ------------------------------ */
   1115     .balign 4
   1116     .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
   1117 dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
   1118 /* File: mips/TEMPLATE_DIV_FLOAT_VFP.S */
   1119 /* File: mips/fbinop.S */
   1120     /*
   1121      * Generic 32-bit binary float operation. a0 = a1 op a2.
   1122      *
   1123      * For: add-fp, sub-fp, mul-fp, div-fp
   1124      *
   1125      * On entry:
   1126      *     a0 = target dalvik register address
   1127      *     a1 = op1 address
   1128      *     a2 = op2 address
   1129      *
   1130      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1131      *
   1132      */
   1133     move rOBJ, a0                       # save a0
   1134 #ifdef  SOFT_FLOAT
   1135     LOAD(a0, a1)                        # a0<- vBB
   1136     LOAD(a1, a2)                        # a1<- vCC
   1137     .if 0
   1138     beqz    a1, common_errDivideByZero  # is second operand zero?
   1139     .endif
   1140                                # optional op
   1141     JAL(__divsf3)                              # v0 = result
   1142     STORE(v0, rOBJ)                     # vAA <- v0
   1143 #else
   1144     LOAD_F(fa0, a1)                     # fa0<- vBB
   1145     LOAD_F(fa1, a2)                     # fa1<- vCC
   1146     .if 0
   1147     # is second operand zero?
   1148     li.s        ft0, 0
   1149     c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
   1150     bc1t        fcc0, common_errDivideByZero
   1151     .endif
   1152                                # optional op
   1153     div.s fv0, fa0, fa1                            # fv0 = result
   1154     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1155 #endif
   1156     RETURN
   1157 
   1158 
   1159 /* ------------------------------ */
   1160     .balign 4
   1161     .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
   1162 dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
   1163 /* File: mips/TEMPLATE_ADD_DOUBLE_VFP.S */
   1164 /* File: mips/fbinopWide.S */
   1165     /*
   1166      * Generic 64-bit binary operation.  Provide an "instr" line that
   1167      * specifies an instruction that performs "result = a0-a1 op a2-a3".
   1168      * This could be an MIPS instruction or a function call.
   1169      * If "chkzero" is set to 1, we perform a divide-by-zero check on
   1170      * vCC (a1).  Useful for integer division and modulus.
   1171      *
   1172      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
   1173      *      xor-long, add-double, sub-double, mul-double, div-double,
   1174      *      rem-double
   1175      *
   1176      * On entry:
   1177      *     a0 = target dalvik register address
   1178      *     a1 = op1 address
   1179      *     a2 = op2 address
   1180      *
   1181      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1182      */
   1183     move rOBJ, a0                       # save a0
   1184 #ifdef  SOFT_FLOAT
   1185     move t0, a1                         # save a1
   1186     move t1, a2                         # save a2
   1187     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
   1188     LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
   1189     .if 0
   1190     or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
   1191     beqz        t0, common_errDivideByZero
   1192     .endif
   1193                                # optional op
   1194     JAL(__adddf3)                              # result<- op, a0-a3 changed
   1195     STORE64(rRESULT0, rRESULT1, rOBJ)
   1196 #else
   1197     LOAD64_F(fa0, fa0f, a1)
   1198     LOAD64_F(fa1, fa1f, a2)
   1199     .if 0
   1200     li.d        ft0, 0
   1201     c.eq.d      fcc0, fa1, ft0
   1202     bc1t        fcc0, common_errDivideByZero
   1203     .endif
   1204                                # optional op
   1205     add.d fv0, fa0, fa1
   1206     STORE64_F(fv0, fv0f, rOBJ)
   1207 #endif
   1208     RETURN
   1209 
   1210 
   1211 /* ------------------------------ */
   1212     .balign 4
   1213     .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
   1214 dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
   1215 /* File: mips/TEMPLATE_SUB_DOUBLE_VFP.S */
   1216 /* File: mips/fbinopWide.S */
   1217     /*
   1218      * Generic 64-bit binary operation.  Provide an "instr" line that
   1219      * specifies an instruction that performs "result = a0-a1 op a2-a3".
   1220      * This could be an MIPS instruction or a function call.
   1221      * If "chkzero" is set to 1, we perform a divide-by-zero check on
   1222      * vCC (a1).  Useful for integer division and modulus.
   1223      *
   1224      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
   1225      *      xor-long, add-double, sub-double, mul-double, div-double,
   1226      *      rem-double
   1227      *
   1228      * On entry:
   1229      *     a0 = target dalvik register address
   1230      *     a1 = op1 address
   1231      *     a2 = op2 address
   1232      *
   1233      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1234      */
   1235     move rOBJ, a0                       # save a0
   1236 #ifdef  SOFT_FLOAT
   1237     move t0, a1                         # save a1
   1238     move t1, a2                         # save a2
   1239     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
   1240     LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
   1241     .if 0
   1242     or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
   1243     beqz        t0, common_errDivideByZero
   1244     .endif
   1245                                # optional op
   1246     JAL(__subdf3)                              # result<- op, a0-a3 changed
   1247     STORE64(rRESULT0, rRESULT1, rOBJ)
   1248 #else
   1249     LOAD64_F(fa0, fa0f, a1)
   1250     LOAD64_F(fa1, fa1f, a2)
   1251     .if 0
   1252     li.d        ft0, 0
   1253     c.eq.d      fcc0, fa1, ft0
   1254     bc1t        fcc0, common_errDivideByZero
   1255     .endif
   1256                                # optional op
   1257     sub.d fv0, fa0, fa1
   1258     STORE64_F(fv0, fv0f, rOBJ)
   1259 #endif
   1260     RETURN
   1261 
   1262 
   1263 /* ------------------------------ */
   1264     .balign 4
   1265     .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
   1266 dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
   1267 /* File: mips/TEMPLATE_MUL_DOUBLE_VFP.S */
   1268 /* File: mips/fbinopWide.S */
   1269     /*
   1270      * Generic 64-bit binary operation.  Provide an "instr" line that
   1271      * specifies an instruction that performs "result = a0-a1 op a2-a3".
   1272      * This could be an MIPS instruction or a function call.
   1273      * If "chkzero" is set to 1, we perform a divide-by-zero check on
   1274      * vCC (a1).  Useful for integer division and modulus.
   1275      *
   1276      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
   1277      *      xor-long, add-double, sub-double, mul-double, div-double,
   1278      *      rem-double
   1279      *
   1280      * On entry:
   1281      *     a0 = target dalvik register address
   1282      *     a1 = op1 address
   1283      *     a2 = op2 address
   1284      *
   1285      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1286      */
   1287     move rOBJ, a0                       # save a0
   1288 #ifdef  SOFT_FLOAT
   1289     move t0, a1                         # save a1
   1290     move t1, a2                         # save a2
   1291     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
   1292     LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
   1293     .if 0
   1294     or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
   1295     beqz        t0, common_errDivideByZero
   1296     .endif
   1297                                # optional op
   1298     JAL(__muldf3)                              # result<- op, a0-a3 changed
   1299     STORE64(rRESULT0, rRESULT1, rOBJ)
   1300 #else
   1301     LOAD64_F(fa0, fa0f, a1)
   1302     LOAD64_F(fa1, fa1f, a2)
   1303     .if 0
   1304     li.d        ft0, 0
   1305     c.eq.d      fcc0, fa1, ft0
   1306     bc1t        fcc0, common_errDivideByZero
   1307     .endif
   1308                                # optional op
   1309     mul.d fv0, fa0, fa1
   1310     STORE64_F(fv0, fv0f, rOBJ)
   1311 #endif
   1312     RETURN
   1313 
   1314 
   1315 /* ------------------------------ */
   1316     .balign 4
   1317     .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
   1318 dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
   1319 /* File: mips/TEMPLATE_DIV_DOUBLE_VFP.S */
   1320 /* File: mips/fbinopWide.S */
   1321     /*
   1322      * Generic 64-bit binary operation.  Provide an "instr" line that
   1323      * specifies an instruction that performs "result = a0-a1 op a2-a3".
   1324      * This could be an MIPS instruction or a function call.
   1325      * If "chkzero" is set to 1, we perform a divide-by-zero check on
   1326      * vCC (a1).  Useful for integer division and modulus.
   1327      *
   1328      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
   1329      *      xor-long, add-double, sub-double, mul-double, div-double,
   1330      *      rem-double
   1331      *
   1332      * On entry:
   1333      *     a0 = target dalvik register address
   1334      *     a1 = op1 address
   1335      *     a2 = op2 address
   1336      *
   1337      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1338      */
   1339     move rOBJ, a0                       # save a0
   1340 #ifdef  SOFT_FLOAT
   1341     move t0, a1                         # save a1
   1342     move t1, a2                         # save a2
   1343     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
   1344     LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
   1345     .if 0
   1346     or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
   1347     beqz        t0, common_errDivideByZero
   1348     .endif
   1349                                # optional op
   1350     JAL(__divdf3)                              # result<- op, a0-a3 changed
   1351     STORE64(rRESULT0, rRESULT1, rOBJ)
   1352 #else
   1353     LOAD64_F(fa0, fa0f, a1)
   1354     LOAD64_F(fa1, fa1f, a2)
   1355     .if 0
   1356     li.d        ft0, 0
   1357     c.eq.d      fcc0, fa1, ft0
   1358     bc1t        fcc0, common_errDivideByZero
   1359     .endif
   1360                                # optional op
   1361     div.d fv0, fa0, fa1
   1362     STORE64_F(fv0, fv0f, rOBJ)
   1363 #endif
   1364     RETURN
   1365 
   1366 
   1367 /* ------------------------------ */
   1368     .balign 4
   1369     .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
   1370 dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
   1371 /* File: mips/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
   1372 /* File: mips/funopNarrower.S */
   1373     /*
   1374      * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
   1375      * that specifies an instruction that performs "result = op a0/a1", where
   1376      * "result" is a 32-bit quantity in a0.
   1377      *
   1378      * For: long-to-float, double-to-int, double-to-float
   1379      * If hard floating point support is available, use fa0 as the parameter, except for
   1380      * long-to-float opcode.
   1381      * (This would work for long-to-int, but that instruction is actually
   1382      * an exact match for OP_MOVE.)
   1383      *
   1384      * On entry:
   1385      *     a0 = target dalvik register address
   1386      *     a1 = src dalvik register address
   1387      *
   1388      */
   1389     move rINST, a0                      # save a0
   1390 #ifdef  SOFT_FLOAT
   1391     move t0, a1                         # save a1
   1392     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vB/vB+1
   1393                                # optional op
   1394     JAL(__truncdfsf2)                              # v0<- op, a0-a3 changed
   1395 .LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg:
   1396     STORE(v0, rINST)                    # vA<- v0
   1397 #else
   1398     LOAD64_F(fa0, fa0f, a1)
   1399                                # optional op
   1400     cvt.s.d  fv0,fa0                            # fv0 = result
   1401 .LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg_f:
   1402     STORE_F(fv0, rINST)                 # vA<- fv0
   1403 #endif
   1404     RETURN
   1405 
   1406 
   1407 /* ------------------------------ */
   1408     .balign 4
   1409     .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
   1410 dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
   1411 /* File: mips/TEMPLATE_DOUBLE_TO_INT_VFP.S */
   1412 /* File: mips/funopNarrower.S */
   1413     /*
   1414      * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
   1415      * that specifies an instruction that performs "result = op a0/a1", where
   1416      * "result" is a 32-bit quantity in a0.
   1417      *
   1418      * For: long-to-float, double-to-int, double-to-float
   1419      * If hard floating point support is available, use fa0 as the parameter, except for
   1420      * long-to-float opcode.
   1421      * (This would work for long-to-int, but that instruction is actually
   1422      * an exact match for OP_MOVE.)
   1423      *
   1424      * On entry:
   1425      *     a0 = target dalvik register address
   1426      *     a1 = src dalvik register address
   1427      *
   1428      */
   1429     move rINST, a0                      # save a0
   1430 #ifdef  SOFT_FLOAT
   1431     move t0, a1                         # save a1
   1432     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vB/vB+1
   1433                                # optional op
   1434     b    d2i_doconv                              # v0<- op, a0-a3 changed
   1435 .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg:
   1436     STORE(v0, rINST)                    # vA<- v0
   1437 #else
   1438     LOAD64_F(fa0, fa0f, a1)
   1439                                # optional op
   1440     b    d2i_doconv                            # fv0 = result
   1441 .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f:
   1442     STORE_F(fv0, rINST)                 # vA<- fv0
   1443 #endif
   1444     RETURN
   1445 
   1446 
   1447 /*
   1448  * Convert the double in a0/a1 to an int in a0.
   1449  *
   1450  * We have to clip values to int min/max per the specification.  The
   1451  * expected common case is a "reasonable" value that converts directly
   1452  * to modest integer.  The EABI convert function isn't doing this for us.
   1453  * Use rBIX / rOBJ as global to hold arguments (they are not bound to a global var)
   1454  */
   1455 
   1456 d2i_doconv:
   1457 #ifdef SOFT_FLOAT
   1458     la          t0, .LDOUBLE_TO_INT_max
   1459     LOAD64(rARG2, rARG3, t0)
   1460     move        rBIX, rARG0                       # save a0
   1461     move        rOBJ, rARG1                       #  and a1
   1462     JAL(__gedf2)                               # is arg >= maxint?
   1463 
   1464     move        t0, v0
   1465     li          v0, ~0x80000000                # return maxint (7fffffff)
   1466     bgez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg       # nonzero == yes
   1467 
   1468     move        rARG0, rBIX                       # recover arg
   1469     move        rARG1, rOBJ
   1470     la          t0, .LDOUBLE_TO_INT_min
   1471     LOAD64(rARG2, rARG3, t0)
   1472     JAL(__ledf2)                               # is arg <= minint?
   1473 
   1474     move        t0, v0
   1475     li          v0, 0x80000000                 # return minint (80000000)
   1476     blez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg       # nonzero == yes
   1477 
   1478     move        rARG0, rBIX                  # recover arg
   1479     move        rARG1, rOBJ
   1480     move        rARG2, rBIX                  # compare against self
   1481     move        rARG3, rOBJ
   1482     JAL(__nedf2)                        # is arg == self?
   1483 
   1484     move        t0, v0                  # zero == no
   1485     li          v0, 0
   1486     bnez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg        # return zero for NaN
   1487 
   1488     move        rARG0, rBIX                  # recover arg
   1489     move        rARG1, rOBJ
   1490     JAL(__fixdfsi)                      # convert double to int
   1491     b           .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg
   1492 #else
   1493     la          t0, .LDOUBLE_TO_INT_max
   1494     LOAD64_F(fa1, fa1f, t0)
   1495     c.ole.d     fcc0, fa1, fa0
   1496     l.s         fv0, .LDOUBLE_TO_INT_maxret
   1497     bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
   1498 
   1499     la          t0, .LDOUBLE_TO_INT_min
   1500     LOAD64_F(fa1, fa1f, t0)
   1501     c.ole.d     fcc0, fa0, fa1
   1502     l.s         fv0, .LDOUBLE_TO_INT_minret
   1503     bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
   1504 
   1505     mov.d       fa1, fa0
   1506     c.un.d      fcc0, fa0, fa1
   1507     li.s        fv0, 0
   1508     bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
   1509 
   1510     trunc.w.d   fv0, fa0
   1511     b           .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
   1512 #endif
   1513 
   1514 
   1515 .LDOUBLE_TO_INT_max:
   1516     .dword   0x41dfffffffc00000
   1517 .LDOUBLE_TO_INT_min:
   1518     .dword   0xc1e0000000000000                  # minint, as a double (high word)
   1519 .LDOUBLE_TO_INT_maxret:
   1520     .word   0x7fffffff
   1521 .LDOUBLE_TO_INT_minret:
   1522     .word   0x80000000
   1523 
   1524 /* ------------------------------ */
   1525     .balign 4
   1526     .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
   1527 dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
   1528 /* File: mips/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
   1529 /* File: mips/funopWider.S */
   1530     /*
   1531      * Generic 32bit-to-64bit floating point unary operation.  Provide an
   1532      * "instr" line that specifies an instruction that performs "d0 = op s0".
   1533      *
   1534      * For: int-to-double, float-to-double
   1535      *
   1536      * On entry:
   1537      *     a0 = target dalvik register address
   1538      *     a1 = src dalvik register address
   1539      */
   1540     /* unop vA, vB */
   1541     move rOBJ, a0                       # save a0
   1542 #ifdef  SOFT_FLOAT
   1543     LOAD(a0, a1)                        # a0<- vB
   1544                                # optional op
   1545     JAL(__extendsfdf2)                              # result<- op, a0-a3 changed
   1546 
   1547 .LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
   1548     STORE64(rRESULT0, rRESULT1, rOBJ)   # vA/vA+1<- v0/v1
   1549 #else
   1550     LOAD_F(fa0, a1)                     # fa0<- vB
   1551                                # optional op
   1552     cvt.d.s fv0, fa0
   1553 
   1554 .LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
   1555     STORE64_F(fv0, fv0f, rOBJ)                          # vA/vA+1<- fv0/fv0f
   1556 #endif
   1557     RETURN
   1558 
   1559 
   1560 /* ------------------------------ */
   1561     .balign 4
   1562     .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
   1563 dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
   1564 /* File: mips/TEMPLATE_FLOAT_TO_INT_VFP.S */
   1565 /* File: mips/funop.S */
   1566     /*
   1567      * Generic 32-bit unary operation.  Provide an "instr" line that
   1568      * specifies an instruction that performs "result = op a0".
   1569      * This could be a MIPS instruction or a function call.
   1570      *
   1571      * for: int-to-float, float-to-int
   1572      *
   1573      * On entry:
   1574      *     a0 = target dalvik register address
   1575      *     a1 = src dalvik register address
   1576      *
   1577      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1578      *
   1579      */
   1580     move rOBJ, a0                       # save a0
   1581 #ifdef SOFT_FLOAT
   1582     LOAD(a0, a1)                        # a0<- vBB
   1583                                # optional op
   1584     b    f2i_doconv                              # v0<- op, a0-a3 changed
   1585 .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg:
   1586     STORE(v0, rOBJ)                     # vAA<- v0
   1587 #else
   1588     LOAD_F(fa0, a1)                     # fa0<- vBB
   1589                                # optional op
   1590     b        f2i_doconv                            # fv0 = result
   1591 .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f:
   1592     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1593 #endif
   1594     RETURN
   1595 
   1596 
   1597 /*
   1598  * Not an entry point as it is used only once !!
   1599  */
   1600 f2i_doconv:
   1601 #ifdef SOFT_FLOAT
   1602         li      a1, 0x4f000000  # (float)maxint
   1603         move    rBIX, a0
   1604         JAL(__gesf2)            # is arg >= maxint?
   1605         move    t0, v0
   1606         li      v0, ~0x80000000 # return maxint (7fffffff)
   1607         bgez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
   1608 
   1609         move    a0, rBIX                # recover arg
   1610         li      a1, 0xcf000000  # (float)minint
   1611         JAL(__lesf2)
   1612 
   1613         move    t0, v0
   1614         li      v0, 0x80000000  # return minint (80000000)
   1615         blez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
   1616         move    a0, rBIX
   1617         move    a1, rBIX
   1618         JAL(__nesf2)
   1619 
   1620         move    t0, v0
   1621         li      v0, 0           # return zero for NaN
   1622         bnez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
   1623 
   1624         move    a0, rBIX
   1625         JAL(__fixsfsi)
   1626         b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
   1627 #else
   1628         l.s             fa1, .LFLOAT_TO_INT_max
   1629         c.ole.s         fcc0, fa1, fa0
   1630         l.s             fv0, .LFLOAT_TO_INT_ret_max
   1631         bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
   1632 
   1633         l.s             fa1, .LFLOAT_TO_INT_min
   1634         c.ole.s         fcc0, fa0, fa1
   1635         l.s             fv0, .LFLOAT_TO_INT_ret_min
   1636         bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
   1637 
   1638         mov.s           fa1, fa0
   1639         c.un.s          fcc0, fa0, fa1
   1640         li.s            fv0, 0
   1641         bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
   1642 
   1643         trunc.w.s       fv0, fa0
   1644         b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
   1645 #endif
   1646 
   1647 .LFLOAT_TO_INT_max:
   1648         .word   0x4f000000
   1649 .LFLOAT_TO_INT_min:
   1650         .word   0xcf000000
   1651 .LFLOAT_TO_INT_ret_max:
   1652         .word   0x7fffffff
   1653 .LFLOAT_TO_INT_ret_min:
   1654         .word   0x80000000
   1655 
   1656 
   1657 /* ------------------------------ */
   1658     .balign 4
   1659     .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
   1660 dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
   1661 /* File: mips/TEMPLATE_INT_TO_DOUBLE_VFP.S */
   1662 /* File: mips/funopWider.S */
   1663     /*
   1664      * Generic 32bit-to-64bit floating point unary operation.  Provide an
   1665      * "instr" line that specifies an instruction that performs "d0 = op s0".
   1666      *
   1667      * For: int-to-double, float-to-double
   1668      *
   1669      * On entry:
   1670      *     a0 = target dalvik register address
   1671      *     a1 = src dalvik register address
   1672      */
   1673     /* unop vA, vB */
   1674     move rOBJ, a0                       # save a0
   1675 #ifdef  SOFT_FLOAT
   1676     LOAD(a0, a1)                        # a0<- vB
   1677                                # optional op
   1678     JAL(__floatsidf)                              # result<- op, a0-a3 changed
   1679 
   1680 .LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
   1681     STORE64(rRESULT0, rRESULT1, rOBJ)   # vA/vA+1<- v0/v1
   1682 #else
   1683     LOAD_F(fa0, a1)                     # fa0<- vB
   1684                                # optional op
   1685     cvt.d.w    fv0, fa0
   1686 
   1687 .LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
   1688     STORE64_F(fv0, fv0f, rOBJ)                          # vA/vA+1<- fv0/fv0f
   1689 #endif
   1690     RETURN
   1691 
   1692 
   1693 /* ------------------------------ */
   1694     .balign 4
   1695     .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
   1696 dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
   1697 /* File: mips/TEMPLATE_INT_TO_FLOAT_VFP.S */
   1698 /* File: mips/funop.S */
   1699     /*
   1700      * Generic 32-bit unary operation.  Provide an "instr" line that
   1701      * specifies an instruction that performs "result = op a0".
   1702      * This could be a MIPS instruction or a function call.
   1703      *
   1704      * for: int-to-float, float-to-int
   1705      *
   1706      * On entry:
   1707      *     a0 = target dalvik register address
   1708      *     a1 = src dalvik register address
   1709      *
   1710      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1711      *
   1712      */
   1713     move rOBJ, a0                       # save a0
   1714 #ifdef SOFT_FLOAT
   1715     LOAD(a0, a1)                        # a0<- vBB
   1716                                # optional op
   1717     JAL(__floatsisf)                              # v0<- op, a0-a3 changed
   1718 .LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg:
   1719     STORE(v0, rOBJ)                     # vAA<- v0
   1720 #else
   1721     LOAD_F(fa0, a1)                     # fa0<- vBB
   1722                                # optional op
   1723     cvt.s.w fv0, fa0                            # fv0 = result
   1724 .LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg_f:
   1725     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1726 #endif
   1727     RETURN
   1728 
   1729 
   1730 /* ------------------------------ */
   1731     .balign 4
   1732     .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
   1733 dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
   1734 /* File: mips/TEMPLATE_CMPG_DOUBLE_VFP.S */
   1735 /* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
   1736     /*
   1737      * Compare two double precision floating-point values.  Puts 0, 1, or -1 into the
   1738      * destination register based on the results of the comparison.
   1739      *
   1740      * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
   1741      * on what value we'd like to return when one of the operands is NaN.
   1742      *
   1743      * The operation we're implementing is:
   1744      *   if (x == y)
   1745      *     return 0;
   1746      *   else if (x < y)
   1747      *     return -1;
   1748      *   else if (x > y)
   1749      *     return 1;
   1750      *   else
   1751      *     return {-1,1};  // one or both operands was NaN
   1752      *
   1753      * On entry:
   1754      *    a0 = &op1 [vBB]
   1755      *    a1 = &op2 [vCC]
   1756      *
   1757      * for: cmpl-double, cmpg-double
   1758      */
   1759     /* op vAA, vBB, vCC */
   1760 
   1761     /* "clasic" form */
   1762 #ifdef  SOFT_FLOAT
   1763     move rOBJ, a0                       # save a0
   1764     move rBIX, a1                       # save a1
   1765     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1766     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1767     JAL(__eqdf2)                        # v0<- (vBB == vCC)
   1768     li       rTEMP, 0                   # vAA<- 0
   1769     beqz     v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1770     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1771     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1772     JAL(__ltdf2)                        # a0<- (vBB < vCC)
   1773     li       rTEMP, -1                  # vAA<- -1
   1774     bltz     v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1775     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1776     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1777     JAL(__gtdf2)                        # v0<- (vBB > vCC)
   1778     li      rTEMP, 1                    # vAA<- 1
   1779     bgtz    v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1780 #else
   1781     LOAD64_F(fs0, fs0f, a0)             # fs0<- vBB
   1782     LOAD64_F(fs1, fs1f, a1)             # fs1<- vCC
   1783     c.olt.d     fcc0, fs0, fs1          # Is fs0 < fs1
   1784     li          rTEMP, -1
   1785     bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1786     c.olt.d     fcc0, fs1, fs0
   1787     li          rTEMP, 1
   1788     bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1789     c.eq.d      fcc0, fs0, fs1
   1790     li          rTEMP, 0
   1791     bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1792 #endif
   1793 
   1794     li            rTEMP, 1
   1795 
   1796 TEMPLATE_CMPG_DOUBLE_VFP_finish:
   1797     move     v0, rTEMP                  # v0<- vAA
   1798     RETURN
   1799 
   1800 
   1801 /* ------------------------------ */
   1802     .balign 4
   1803     .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
   1804 dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
   1805 /* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
   1806     /*
   1807      * Compare two double precision floating-point values.  Puts 0, 1, or -1 into the
   1808      * destination register based on the results of the comparison.
   1809      *
   1810      * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
   1811      * on what value we'd like to return when one of the operands is NaN.
   1812      *
   1813      * The operation we're implementing is:
   1814      *   if (x == y)
   1815      *     return 0;
   1816      *   else if (x < y)
   1817      *     return -1;
   1818      *   else if (x > y)
   1819      *     return 1;
   1820      *   else
   1821      *     return {-1,1};  // one or both operands was NaN
   1822      *
   1823      * On entry:
   1824      *    a0 = &op1 [vBB]
   1825      *    a1 = &op2 [vCC]
   1826      *
   1827      * for: cmpl-double, cmpg-double
   1828      */
   1829     /* op vAA, vBB, vCC */
   1830 
   1831     /* "clasic" form */
   1832 #ifdef  SOFT_FLOAT
   1833     move rOBJ, a0                       # save a0
   1834     move rBIX, a1                       # save a1
   1835     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1836     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1837     JAL(__eqdf2)                        # v0<- (vBB == vCC)
   1838     li       rTEMP, 0                   # vAA<- 0
   1839     beqz     v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1840     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1841     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1842     JAL(__ltdf2)                        # a0<- (vBB < vCC)
   1843     li       rTEMP, -1                  # vAA<- -1
   1844     bltz     v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1845     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1846     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1847     JAL(__gtdf2)                        # v0<- (vBB > vCC)
   1848     li      rTEMP, 1                    # vAA<- 1
   1849     bgtz    v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1850 #else
   1851     LOAD64_F(fs0, fs0f, a0)             # fs0<- vBB
   1852     LOAD64_F(fs1, fs1f, a1)             # fs1<- vCC
   1853     c.olt.d     fcc0, fs0, fs1          # Is fs0 < fs1
   1854     li          rTEMP, -1
   1855     bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1856     c.olt.d     fcc0, fs1, fs0
   1857     li          rTEMP, 1
   1858     bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1859     c.eq.d      fcc0, fs0, fs1
   1860     li          rTEMP, 0
   1861     bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1862 #endif
   1863 
   1864     li     rTEMP, -1
   1865 
   1866 TEMPLATE_CMPL_DOUBLE_VFP_finish:
   1867     move     v0, rTEMP                  # v0<- vAA
   1868     RETURN
   1869 
   1870 /* ------------------------------ */
   1871     .balign 4
   1872     .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
   1873 dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
   1874 /* File: mips/TEMPLATE_CMPG_FLOAT_VFP.S */
   1875 /* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
   1876     /*
   1877      * Compare two floating-point values.  Puts 0, 1, or -1 into the
   1878      * destination register based on the results of the comparison.
   1879      *
   1880      * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
   1881      * on what value we'd like to return when one of the operands is NaN.
   1882      *
   1883      * The operation we're implementing is:
   1884      *   if (x == y)
   1885      *     return 0;
   1886      *   else if (x < y)
   1887      *     return -1;
   1888      *   else if (x > y)
   1889      *     return 1;
   1890      *   else
   1891      *     return {-1,1};  // one or both operands was NaN
   1892      *
   1893      * On entry:
   1894      *    a0 = &op1 [vBB]
   1895      *    a1 = &op2 [vCC]
   1896      *
   1897      * for: cmpl-float, cmpg-float
   1898      */
   1899     /* op vAA, vBB, vCC */
   1900 
   1901     /* "clasic" form */
   1902 #ifdef  SOFT_FLOAT
   1903     LOAD(rOBJ, a0)                      # rOBJ<- vBB
   1904     LOAD(rBIX, a1)                      # rBIX<- vCC
   1905     move     a0, rOBJ                   # a0<- vBB
   1906     move     a1, rBIX                   # a1<- vCC
   1907     JAL(__eqsf2)                        # v0<- (vBB == vCC)
   1908     li       rTEMP, 0                   # vAA<- 0
   1909     beqz     v0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1910     move     a0, rOBJ                   # a0<- vBB
   1911     move     a1, rBIX                   # a1<- vCC
   1912     JAL(__ltsf2)                        # a0<- (vBB < vCC)
   1913     li       rTEMP, -1                  # vAA<- -1
   1914     bltz     v0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1915     move     a0, rOBJ                   # a0<- vBB
   1916     move     a1, rBIX                   # a1<- vCC
   1917     JAL(__gtsf2)                        # v0<- (vBB > vCC)
   1918     li      rTEMP, 1                    # vAA<- 1
   1919     bgtz    v0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1920 #else
   1921     LOAD_F(fs0, a0)                     # fs0<- vBB
   1922     LOAD_F(fs1, a1)                     # fs1<- vCC
   1923     c.olt.s     fcc0, fs0, fs1          #Is fs0 < fs1
   1924     li          rTEMP, -1
   1925     bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1926     c.olt.s     fcc0, fs1, fs0
   1927     li          rTEMP, 1
   1928     bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1929     c.eq.s      fcc0, fs0, fs1
   1930     li          rTEMP, 0
   1931     bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1932 #endif
   1933 
   1934     li     rTEMP, 1
   1935 
   1936 TEMPLATE_CMPG_FLOAT_VFP_finish:
   1937     move     v0, rTEMP                  # v0<- vAA
   1938     RETURN
   1939 
   1940 
   1941 /* ------------------------------ */
   1942     .balign 4
   1943     .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
   1944 dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
   1945 /* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
   1946     /*
   1947      * Compare two floating-point values.  Puts 0, 1, or -1 into the
   1948      * destination register based on the results of the comparison.
   1949      *
   1950      * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
   1951      * on what value we'd like to return when one of the operands is NaN.
   1952      *
   1953      * The operation we're implementing is:
   1954      *   if (x == y)
   1955      *     return 0;
   1956      *   else if (x < y)
   1957      *     return -1;
   1958      *   else if (x > y)
   1959      *     return 1;
   1960      *   else
   1961      *     return {-1,1};  // one or both operands was NaN
   1962      *
   1963      * On entry:
   1964      *    a0 = &op1 [vBB]
   1965      *    a1 = &op2 [vCC]
   1966      *
   1967      * for: cmpl-float, cmpg-float
   1968      */
   1969     /* op vAA, vBB, vCC */
   1970 
   1971     /* "clasic" form */
   1972 #ifdef  SOFT_FLOAT
   1973     LOAD(rOBJ, a0)                      # rOBJ<- vBB
   1974     LOAD(rBIX, a1)                      # rBIX<- vCC
   1975     move     a0, rOBJ                   # a0<- vBB
   1976     move     a1, rBIX                   # a1<- vCC
   1977     JAL(__eqsf2)                        # v0<- (vBB == vCC)
   1978     li       rTEMP, 0                   # vAA<- 0
   1979     beqz     v0, TEMPLATE_CMPL_FLOAT_VFP_finish
   1980     move     a0, rOBJ                   # a0<- vBB
   1981     move     a1, rBIX                   # a1<- vCC
   1982     JAL(__ltsf2)                        # a0<- (vBB < vCC)
   1983     li       rTEMP, -1                  # vAA<- -1
   1984     bltz     v0, TEMPLATE_CMPL_FLOAT_VFP_finish
   1985     move     a0, rOBJ                   # a0<- vBB
   1986     move     a1, rBIX                   # a1<- vCC
   1987     JAL(__gtsf2)                        # v0<- (vBB > vCC)
   1988     li      rTEMP, 1                    # vAA<- 1
   1989     bgtz    v0, TEMPLATE_CMPL_FLOAT_VFP_finish
   1990 #else
   1991     LOAD_F(fs0, a0)                     # fs0<- vBB
   1992     LOAD_F(fs1, a1)                     # fs1<- vCC
   1993     c.olt.s     fcc0, fs0, fs1          #Is fs0 < fs1
   1994     li          rTEMP, -1
   1995     bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
   1996     c.olt.s     fcc0, fs1, fs0
   1997     li          rTEMP, 1
   1998     bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
   1999     c.eq.s      fcc0, fs0, fs1
   2000     li          rTEMP, 0
   2001     bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
   2002 #endif
   2003 
   2004     li     rTEMP, -1
   2005 
   2006 TEMPLATE_CMPL_FLOAT_VFP_finish:
   2007     move     v0, rTEMP                  # v0<- vAA
   2008     RETURN
   2009 
   2010 /* ------------------------------ */
   2011     .balign 4
   2012     .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
   2013 dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
   2014 /* File: mips/TEMPLATE_SQRT_DOUBLE_VFP.S */
   2015 
   2016     /*
   2017      * 64-bit floating point sqrt operation.
   2018      * If the result is a NaN, bail out to library code to do
   2019      * the right thing.
   2020      *
   2021      * On entry:
   2022      *     a2 src addr of op1
   2023      * On exit:
   2024      *     v0,v1/fv0 = res
   2025      */
   2026 #ifdef  SOFT_FLOAT
   2027     LOAD64(rARG0, rARG1, a2)        # a0/a1<- vBB/vBB+1
   2028 #else
   2029     LOAD64_F(fa0, fa0f, a2)         # fa0/fa0f<- vBB/vBB+1
   2030     sqrt.d	fv0, fa0
   2031     c.eq.d	fv0, fv0
   2032     bc1t	1f
   2033 #endif
   2034     JAL(sqrt)
   2035 1:
   2036     RETURN
   2037 
   2038 /* ------------------------------ */
   2039     .balign 4
   2040     .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
   2041 dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
   2042 /* File: mips/TEMPLATE_THROW_EXCEPTION_COMMON.S */
   2043     /*
   2044      * Throw an exception from JIT'ed code.
   2045      * On entry:
   2046      *    a0    Dalvik PC that raises the exception
   2047      */
   2048     j      .LhandleException
   2049 
   2050 /* ------------------------------ */
   2051     .balign 4
   2052     .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
   2053 dvmCompiler_TEMPLATE_MEM_OP_DECODE:
   2054 /* File: mips/TEMPLATE_MEM_OP_DECODE.S */
   2055 #if defined(WITH_SELF_VERIFICATION)
   2056     /*
   2057      * This handler encapsulates heap memory ops for selfVerification mode.
   2058      *
   2059      * The call to the handler is inserted prior to a heap memory operation.
   2060      * This handler then calls a function to decode the memory op, and process
   2061      * it accordingly. Afterwards, the handler changes the return address to
   2062      * skip the memory op so it never gets executed.
   2063      */
   2064 #ifdef HARD_FLOAT
   2065     /* push f0-f31 onto stack */
   2066     sw      f0, fr0*-4(sp)              # push f0
   2067     sw      f1, fr1*-4(sp)              # push f1
   2068     sw      f2, fr2*-4(sp)              # push f2
   2069     sw      f3, fr3*-4(sp)              # push f3
   2070     sw      f4, fr4*-4(sp)              # push f4
   2071     sw      f5, fr5*-4(sp)              # push f5
   2072     sw      f6, fr6*-4(sp)              # push f6
   2073     sw      f7, fr7*-4(sp)              # push f7
   2074     sw      f8, fr8*-4(sp)              # push f8
   2075     sw      f9, fr9*-4(sp)              # push f9
   2076     sw      f10, fr10*-4(sp)            # push f10
   2077     sw      f11, fr11*-4(sp)            # push f11
   2078     sw      f12, fr12*-4(sp)            # push f12
   2079     sw      f13, fr13*-4(sp)            # push f13
   2080     sw      f14, fr14*-4(sp)            # push f14
   2081     sw      f15, fr15*-4(sp)            # push f15
   2082     sw      f16, fr16*-4(sp)            # push f16
   2083     sw      f17, fr17*-4(sp)            # push f17
   2084     sw      f18, fr18*-4(sp)            # push f18
   2085     sw      f19, fr19*-4(sp)            # push f19
   2086     sw      f20, fr20*-4(sp)            # push f20
   2087     sw      f21, fr21*-4(sp)            # push f21
   2088     sw      f22, fr22*-4(sp)            # push f22
   2089     sw      f23, fr23*-4(sp)            # push f23
   2090     sw      f24, fr24*-4(sp)            # push f24
   2091     sw      f25, fr25*-4(sp)            # push f25
   2092     sw      f26, fr26*-4(sp)            # push f26
   2093     sw      f27, fr27*-4(sp)            # push f27
   2094     sw      f28, fr28*-4(sp)            # push f28
   2095     sw      f29, fr29*-4(sp)            # push f29
   2096     sw      f30, fr30*-4(sp)            # push f30
   2097     sw      f31, fr31*-4(sp)            # push f31
   2098 
   2099     sub     sp, (32-0)*4                # adjust stack pointer
   2100 #endif
   2101 
   2102     /* push gp registers (except zero, gp, sp, and fp) */
   2103     .set noat
   2104     sw      AT, r_AT*-4(sp)             # push at
   2105     .set at
   2106     sw      v0, r_V0*-4(sp)             # push v0
   2107     sw      v1, r_V1*-4(sp)             # push v1
   2108     sw      a0, r_A0*-4(sp)             # push a0
   2109     sw      a1, r_A1*-4(sp)             # push a1
   2110     sw      a2, r_A2*-4(sp)             # push a2
   2111     sw      a3, r_A3*-4(sp)             # push a3
   2112     sw      t0, r_T0*-4(sp)             # push t0
   2113     sw      t1, r_T1*-4(sp)             # push t1
   2114     sw      t2, r_T2*-4(sp)             # push t2
   2115     sw      t3, r_T3*-4(sp)             # push t3
   2116     sw      t4, r_T4*-4(sp)             # push t4
   2117     sw      t5, r_T5*-4(sp)             # push t5
   2118     sw      t6, r_T6*-4(sp)             # push t6
   2119     sw      t7, r_T7*-4(sp)             # push t7
   2120     sw      s0, r_S0*-4(sp)             # push s0
   2121     sw      s1, r_S1*-4(sp)             # push s1
   2122     sw      s2, r_S2*-4(sp)             # push s2
   2123     sw      s3, r_S3*-4(sp)             # push s3
   2124     sw      s4, r_S4*-4(sp)             # push s4
   2125     sw      s5, r_S5*-4(sp)             # push s5
   2126     sw      s6, r_S6*-4(sp)             # push s6
   2127     sw      s7, r_S7*-4(sp)             # push s7
   2128     sw      t8, r_T8*-4(sp)             # push t8
   2129     sw      t9, r_T9*-4(sp)             # push t9
   2130     sw      k0, r_K0*-4(sp)             # push k0
   2131     sw      k1, r_K1*-4(sp)             # push k1
   2132     sw      ra, r_RA*-4(sp)             # push RA
   2133 
   2134     # Note: even if we don't save all 32 registers, we still need to
   2135     #       adjust SP by 32 registers due to the way we are storing
   2136     #       the registers on the stack.
   2137     sub     sp, (32-0)*4                # adjust stack pointer
   2138 
   2139     la     a2, .LdvmSelfVerificationMemOpDecode  # defined in footer.S
   2140     lw     a2, (a2)
   2141     move   a0, ra                       # a0<- link register
   2142     move   a1, sp                       # a1<- stack pointer
   2143     JALR(a2)
   2144 
   2145     /* pop gp registers (except zero, gp, sp, and fp) */
   2146     # Note: even if we don't save all 32 registers, we still need to
   2147     #       adjust SP by 32 registers due to the way we are storing
   2148     #       the registers on the stack.
   2149     add     sp, (32-0)*4                # adjust stack pointer
   2150     .set noat
   2151     lw      AT, r_AT*-4(sp)             # pop at
   2152     .set at
   2153     lw      v0, r_V0*-4(sp)             # pop v0
   2154     lw      v1, r_V1*-4(sp)             # pop v1
   2155     lw      a0, r_A0*-4(sp)             # pop a0
   2156     lw      a1, r_A1*-4(sp)             # pop a1
   2157     lw      a2, r_A2*-4(sp)             # pop a2
   2158     lw      a3, r_A3*-4(sp)             # pop a3
   2159     lw      t0, r_T0*-4(sp)             # pop t0
   2160     lw      t1, r_T1*-4(sp)             # pop t1
   2161     lw      t2, r_T2*-4(sp)             # pop t2
   2162     lw      t3, r_T3*-4(sp)             # pop t3
   2163     lw      t4, r_T4*-4(sp)             # pop t4
   2164     lw      t5, r_T5*-4(sp)             # pop t5
   2165     lw      t6, r_T6*-4(sp)             # pop t6
   2166     lw      t7, r_T7*-4(sp)             # pop t7
   2167     lw      s0, r_S0*-4(sp)             # pop s0
   2168     lw      s1, r_S1*-4(sp)             # pop s1
   2169     lw      s2, r_S2*-4(sp)             # pop s2
   2170     lw      s3, r_S3*-4(sp)             # pop s3
   2171     lw      s4, r_S4*-4(sp)             # pop s4
   2172     lw      s5, r_S5*-4(sp)             # pop s5
   2173     lw      s6, r_S6*-4(sp)             # pop s6
   2174     lw      s7, r_S7*-4(sp)             # pop s7
   2175     lw      t8, r_T8*-4(sp)             # pop t8
   2176     lw      t9, r_T9*-4(sp)             # pop t9
   2177     lw      k0, r_K0*-4(sp)             # pop k0
   2178     lw      k1, r_K1*-4(sp)             # pop k1
   2179     lw      ra, r_RA*-4(sp)             # pop RA
   2180 
   2181 #ifdef HARD_FLOAT
   2182     /* pop f0-f31 from stack */
   2183     add     sp, (32-0)*4                # adjust stack pointer
   2184     lw      f0, fr0*-4(sp)              # pop f0
   2185     lw      f1, fr1*-4(sp)              # pop f1
   2186     lw      f2, fr2*-4(sp)              # pop f2
   2187     lw      f3, fr3*-4(sp)              # pop f3
   2188     lw      f4, fr4*-4(sp)              # pop f4
   2189     lw      f5, fr5*-4(sp)              # pop f5
   2190     lw      f6, fr6*-4(sp)              # pop f6
   2191     lw      f7, fr7*-4(sp)              # pop f7
   2192     lw      f8, fr8*-4(sp)              # pop f8
   2193     lw      f9, fr9*-4(sp)              # pop f9
   2194     lw      f10, fr10*-4(sp)            # pop f10
   2195     lw      f11, fr11*-4(sp)            # pop f11
   2196     lw      f12, fr12*-4(sp)            # pop f12
   2197     lw      f13, fr13*-4(sp)            # pop f13
   2198     lw      f14, fr14*-4(sp)            # pop f14
   2199     lw      f15, fr15*-4(sp)            # pop f15
   2200     lw      f16, fr16*-4(sp)            # pop f16
   2201     lw      f17, fr17*-4(sp)            # pop f17
   2202     lw      f18, fr18*-4(sp)            # pop f18
   2203     lw      f19, fr19*-4(sp)            # pop f19
   2204     lw      f20, fr20*-4(sp)            # pop f20
   2205     lw      f21, fr21*-4(sp)            # pop f21
   2206     lw      f22, fr22*-4(sp)            # pop f22
   2207     lw      f23, fr23*-4(sp)            # pop f23
   2208     lw      f24, fr24*-4(sp)            # pop f24
   2209     lw      f25, fr25*-4(sp)            # pop f25
   2210     lw      f26, fr26*-4(sp)            # pop f26
   2211     lw      f27, fr27*-4(sp)            # pop f27
   2212     lw      f28, fr28*-4(sp)            # pop f28
   2213     lw      f29, fr29*-4(sp)            # pop f29
   2214     lw      f30, fr30*-4(sp)            # pop f30
   2215     lw      f31, fr31*-4(sp)            # pop f31
   2216 #endif
   2217 
   2218     RETURN
   2219 #endif
   2220 
   2221 /* ------------------------------ */
   2222     .balign 4
   2223     .global dvmCompiler_TEMPLATE_STRING_COMPARETO
   2224 dvmCompiler_TEMPLATE_STRING_COMPARETO:
   2225 /* File: mips/TEMPLATE_STRING_COMPARETO.S */
   2226     /*
   2227      * String's compareTo.
   2228      *
   2229      * Requires a0/a1 to have been previously checked for null.  Will
   2230      * return negative if this's string is < comp, 0 if they are the
   2231      * same and positive if >.
   2232      *
   2233      * IMPORTANT NOTE:
   2234      *
   2235      * This code relies on hard-coded offsets for string objects, and must be
   2236      * kept in sync with definitions in UtfString.h.  See asm-constants.h
   2237      *
   2238      * On entry:
   2239      *    a0:   this object pointer
   2240      *    a1:   comp object pointer
   2241      *
   2242      */
   2243 
   2244      subu  v0, a0, a1                # Same?
   2245      bnez  v0, 1f
   2246      RETURN
   2247 1:
   2248      lw    t0, STRING_FIELDOFF_OFFSET(a0)
   2249      lw    t1, STRING_FIELDOFF_OFFSET(a1)
   2250      lw    t2, STRING_FIELDOFF_COUNT(a0)
   2251      lw    a2, STRING_FIELDOFF_COUNT(a1)
   2252      lw    a0, STRING_FIELDOFF_VALUE(a0)
   2253      lw    a1, STRING_FIELDOFF_VALUE(a1)
   2254 
   2255     /*
   2256      * At this point, we have this/comp:
   2257      *    offset: t0/t1
   2258      *    count:  t2/a2
   2259      *    value:  a0/a1
   2260      * We're going to compute
   2261      *    a3 <- countDiff
   2262      *    a2 <- minCount
   2263      */
   2264      subu  a3, t2, a2                # a3<- countDiff
   2265      sleu  t7, t2, a2
   2266      movn  a2, t2, t7                # a2<- minCount
   2267 
   2268      /*
   2269       * Note: data pointers point to first element.
   2270       */
   2271      addu  a0, 16                    # point to contents[0]
   2272      addu  a1, 16                    # point to contents[0]
   2273 
   2274      /* Now, build pointers to the string data */
   2275      sll   t7, t0, 1                 # multiply offset by 2
   2276      addu  a0, a0, t7
   2277      sll   t7, t1, 1                 # multiply offset by 2
   2278      addu  a1, a1, t7
   2279 
   2280      /*
   2281       * At this point we have:
   2282       *   a0: *this string data
   2283       *   a1: *comp string data
   2284       *   a2: iteration count for comparison
   2285       *   a3: value to return if the first part of the string is equal
   2286       *   v0: reserved for result
   2287       *   t0-t5 available for loading string data
   2288       */
   2289 
   2290      subu  a2, 2
   2291      bltz  a2, do_remainder2
   2292 
   2293      /*
   2294       * Unroll the first two checks so we can quickly catch early mismatch
   2295       * on long strings (but preserve incoming alignment)
   2296       */
   2297      lhu   t0, 0(a0)
   2298      lhu   t1, 0(a1)
   2299      subu  v0, t0, t1
   2300      beqz  v0, 1f
   2301      RETURN
   2302 1:
   2303      lhu   t2, 2(a0)
   2304      lhu   t3, 2(a1)
   2305      subu  v0, t2, t3
   2306      beqz  v0, 2f
   2307      RETURN
   2308 2:
   2309      addu  a0, 4                     # offset to contents[2]
   2310      addu  a1, 4                     # offset to contents[2]
   2311      li    t7, 28
   2312      bgt   a2, t7, do_memcmp16
   2313      subu  a2, 3
   2314      bltz  a2, do_remainder
   2315 
   2316 loopback_triple:
   2317      lhu   t0, 0(a0)
   2318      lhu   t1, 0(a1)
   2319      subu  v0, t0, t1
   2320      beqz  v0, 1f
   2321      RETURN
   2322 1:
   2323      lhu   t2, 2(a0)
   2324      lhu   t3, 2(a1)
   2325      subu  v0, t2, t3
   2326      beqz  v0, 2f
   2327      RETURN
   2328 2:
   2329      lhu   t4, 4(a0)
   2330      lhu   t5, 4(a1)
   2331      subu  v0, t4, t5
   2332      beqz  v0, 3f
   2333      RETURN
   2334 3:
   2335      addu  a0, 6                     # offset to contents[i+3]
   2336      addu  a1, 6                     # offset to contents[i+3]
   2337      subu  a2, 3
   2338      bgez  a2, loopback_triple
   2339 
   2340 do_remainder:
   2341      addu  a2, 3
   2342      beqz  a2, returnDiff
   2343 
   2344 loopback_single:
   2345      lhu   t0, 0(a0)
   2346      lhu   t1, 0(a1)
   2347      subu  v0, t0, t1
   2348      bnez  v0, 1f
   2349      addu  a0, 2                     # offset to contents[i+1]
   2350      addu  a1, 2                     # offset to contents[i+1]
   2351      subu  a2, 1
   2352      bnez  a2, loopback_single
   2353 
   2354 returnDiff:
   2355      move  v0, a3
   2356 1:
   2357      RETURN
   2358 
   2359 do_remainder2:
   2360      addu  a2, 2
   2361      bnez  a2, loopback_single
   2362      move  v0, a3
   2363      RETURN
   2364 
   2365     /* Long string case */
   2366 do_memcmp16:
   2367      move  rOBJ, a3                  # save return value if strings are equal
   2368      JAL(__memcmp16)
   2369      seq   t0, v0, zero
   2370      movn  v0, rOBJ, t0              # overwrite return value if strings are equal
   2371      RETURN
   2372 
   2373 /* ------------------------------ */
   2374     .balign 4
   2375     .global dvmCompiler_TEMPLATE_STRING_INDEXOF
   2376 dvmCompiler_TEMPLATE_STRING_INDEXOF:
   2377 /* File: mips/TEMPLATE_STRING_INDEXOF.S */
   2378     /*
   2379      * String's indexOf.
   2380      *
   2381      * Requires a0 to have been previously checked for null.  Will
   2382      * return index of match of a1 in v0.
   2383      *
   2384      * IMPORTANT NOTE:
   2385      *
   2386      * This code relies on hard-coded offsets for string objects, and must be
   2387      * kept in sync wth definitions in UtfString.h  See asm-constants.h
   2388      *
   2389      * On entry:
   2390      *    a0:   string object pointer
   2391      *    a1:   char to match
   2392      *    a2:   Starting offset in string data
   2393      */
   2394 
   2395      lw    t0, STRING_FIELDOFF_OFFSET(a0)
   2396      lw    t1, STRING_FIELDOFF_COUNT(a0)
   2397      lw    v0, STRING_FIELDOFF_VALUE(a0)
   2398 
   2399     /*
   2400      * At this point, we have:
   2401      *    v0: object pointer
   2402      *    a1: char to match
   2403      *    a2: starting offset
   2404      *    t0: offset
   2405      *    t1: string length
   2406      */
   2407 
   2408     /* Point to first element */
   2409      addu  v0, 16                    # point to contents[0]
   2410 
   2411     /* Build pointer to start of string data */
   2412      sll   t7, t0, 1                 # multiply offset by 2
   2413      addu  v0, v0, t7
   2414 
   2415     /* Save a copy of starting data in v1 */
   2416      move  v1, v0
   2417 
   2418     /* Clamp start to [0..count] */
   2419      slt   t7, a2, zero
   2420      movn  a2, zero, t7
   2421      sgt   t7, a2, t1
   2422      movn  a2, t1, t7
   2423 
   2424     /* Build pointer to start of data to compare */
   2425      sll   t7, a2, 1                # multiply offset by 2
   2426      addu  v0, v0, t7
   2427 
   2428     /* Compute iteration count */
   2429      subu  a3, t1, a2
   2430 
   2431     /*
   2432      * At this point we have:
   2433      *   v0: start of data to test
   2434      *   a1: char to compare
   2435      *   a3: iteration count
   2436      *   v1: original start of string
   2437      *   t0-t7 available for loading string data
   2438      */
   2439      subu  a3, 4
   2440      bltz  a3, indexof_remainder
   2441 
   2442 indexof_loop4:
   2443      lhu   t0, 0(v0)
   2444      beq   t0, a1, match_0
   2445      lhu   t0, 2(v0)
   2446      beq   t0, a1, match_1
   2447      lhu   t0, 4(v0)
   2448      beq   t0, a1, match_2
   2449      lhu   t0, 6(v0)
   2450      beq   t0, a1, match_3
   2451      addu  v0, 8                     # offset to contents[i+4]
   2452      subu  a3, 4
   2453      bgez  a3, indexof_loop4
   2454 
   2455 indexof_remainder:
   2456      addu  a3, 4
   2457      beqz  a3, indexof_nomatch
   2458 
   2459 indexof_loop1:
   2460      lhu   t0, 0(v0)
   2461      beq   t0, a1, match_0
   2462      addu  v0, 2                     # offset to contents[i+1]
   2463      subu  a3, 1
   2464      bnez  a3, indexof_loop1
   2465 
   2466 indexof_nomatch:
   2467      li    v0, -1
   2468      RETURN
   2469 
   2470 match_0:
   2471      subu  v0, v1
   2472      sra   v0, v0, 1                 # divide by 2
   2473      RETURN
   2474 match_1:
   2475      addu  v0, 2
   2476      subu  v0, v1
   2477      sra   v0, v0, 1                 # divide by 2
   2478      RETURN
   2479 match_2:
   2480      addu  v0, 4
   2481      subu  v0, v1
   2482      sra   v0, v0, 1                 # divide by 2
   2483      RETURN
   2484 match_3:
   2485      addu  v0, 6
   2486      subu  v0, v1
   2487      sra   v0, v0, 1                 # divide by 2
   2488      RETURN
   2489 
   2490 /* ------------------------------ */
   2491     .balign 4
   2492     .global dvmCompiler_TEMPLATE_INTERPRET
   2493 dvmCompiler_TEMPLATE_INTERPRET:
   2494 /* File: mips/TEMPLATE_INTERPRET.S */
   2495     /*
   2496      * This handler transfers control to the interpeter without performing
   2497      * any lookups.  It may be called either as part of a normal chaining
   2498      * operation, or from the transition code in header.S.  We distinquish
   2499      * the two cases by looking at the link register.  If called from a
   2500      * translation chain, it will point to the chaining Dalvik PC.
   2501      * On entry:
   2502      *    ra - if NULL:
   2503      *        a1 - the Dalvik PC to begin interpretation.
   2504      *    else
   2505      *        [ra] contains Dalvik PC to begin interpretation
   2506      *    rSELF - pointer to thread
   2507      *    rFP - Dalvik frame pointer
   2508      */
   2509     la      t0, dvmJitToInterpPunt
   2510     move    a0, a1
   2511     beq     ra, zero, 1f
   2512     lw      a0, 0(ra)
   2513 1:
   2514     jr      t0
   2515     # doesn't return
   2516 
   2517 /* ------------------------------ */
   2518     .balign 4
   2519     .global dvmCompiler_TEMPLATE_MONITOR_ENTER
   2520 dvmCompiler_TEMPLATE_MONITOR_ENTER:
   2521 /* File: mips/TEMPLATE_MONITOR_ENTER.S */
   2522     /*
   2523      * Call out to the runtime to lock an object.  Because this thread
   2524      * may have been suspended in THREAD_MONITOR state and the Jit's
   2525      * translation cache subsequently cleared, we cannot return directly.
   2526      * Instead, unconditionally transition to the interpreter to resume.
   2527      *
   2528      * On entry:
   2529      *    a0 - self pointer
   2530      *    a1 - the object (which has already been null-checked by the caller
   2531      *    rPC - the Dalvik PC of the following instruction.
   2532      */
   2533     la     a2, .LdvmLockObject
   2534     lw     t9, (a2)
   2535     sw     zero, offThread_inJitCodeCache(a0)   # record that we're not returning
   2536     JALR(t9)                                    # dvmLockObject(self, obj)
   2537     lw     gp, STACK_OFFSET_GP(sp)
   2538 
   2539     la     a2, .LdvmJitToInterpNoChain
   2540     lw     a2, (a2)
   2541 
   2542     # Bail to interpreter - no chain [note - rPC still contains dPC]
   2543 #if defined(WITH_JIT_TUNING)
   2544     li      a0, kHeavyweightMonitor
   2545 #endif
   2546     jr      a2
   2547 
   2548 /* ------------------------------ */
   2549     .balign 4
   2550     .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
   2551 dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
   2552 /* File: mips/TEMPLATE_MONITOR_ENTER_DEBUG.S */
   2553     /*
   2554      * To support deadlock prediction, this version of MONITOR_ENTER
   2555      * will always call the heavyweight dvmLockObject, check for an
   2556      * exception and then bail out to the interpreter.
   2557      *
   2558      * On entry:
   2559      *    a0 - self pointer
   2560      *    a1 - the object (which has already been null-checked by the caller
   2561      *    rPC - the Dalvik PC of the following instruction.
   2562      *
   2563      */
   2564     la     a2, .LdvmLockObject
   2565     lw     t9, (a2)
   2566     sw     zero, offThread_inJitCodeCache(a0)   # record that we're not returning
   2567     JALR(t9)                                    # dvmLockObject(self, obj)
   2568     lw     gp, STACK_OFFSET_GP(sp)
   2569 
   2570     # test for exception
   2571     lw     a1, offThread_exception(rSELF)
   2572     beqz   a1, 1f
   2573     sub    a0, rPC, 2                           # roll dPC back to this monitor instruction
   2574     j      .LhandleException
   2575 1:
   2576     # Bail to interpreter - no chain [note - rPC still contains dPC]
   2577 #if defined(WITH_JIT_TUNING)
   2578     li     a0, kHeavyweightMonitor
   2579 #endif
   2580     la     a2, .LdvmJitToInterpNoChain
   2581     lw     a2, (a2)
   2582     jr     a2
   2583 
   2584 /* ------------------------------ */
   2585     .balign 4
   2586     .global dvmCompiler_TEMPLATE_RESTORE_STATE
   2587 dvmCompiler_TEMPLATE_RESTORE_STATE:
   2588 /* File: mips/TEMPLATE_RESTORE_STATE.S */
   2589     /*
   2590      * This handler restores state following a selfVerification memory access.
   2591      * On entry:
   2592      *    a0 - offset from rSELF to the 1st element of the coreRegs save array.
   2593      * Note: the following registers are not restored
   2594      *       zero, AT, gp, sp, fp, ra
   2595      */
   2596 
   2597     add     a0, a0, rSELF               # pointer to heapArgSpace.coreRegs[0]
   2598 #if 0
   2599     lw      zero, r_ZERO*4(a0)          # restore zero
   2600 #endif
   2601     .set noat
   2602     lw      AT, r_AT*4(a0)              # restore at
   2603     .set at
   2604     lw      v0, r_V0*4(a0)              # restore v0
   2605     lw      v1, r_V1*4(a0)              # restore v1
   2606 
   2607     lw      a1, r_A1*4(a0)              # restore a1
   2608     lw      a2, r_A2*4(a0)              # restore a2
   2609     lw      a3, r_A3*4(a0)              # restore a3
   2610 
   2611     lw      t0, r_T0*4(a0)              # restore t0
   2612     lw      t1, r_T1*4(a0)              # restore t1
   2613     lw      t2, r_T2*4(a0)              # restore t2
   2614     lw      t3, r_T3*4(a0)              # restore t3
   2615     lw      t4, r_T4*4(a0)              # restore t4
   2616     lw      t5, r_T5*4(a0)              # restore t5
   2617     lw      t6, r_T6*4(a0)              # restore t6
   2618     lw      t7, r_T7*4(a0)              # restore t7
   2619 
   2620     lw      s0, r_S0*4(a0)              # restore s0
   2621     lw      s1, r_S1*4(a0)              # restore s1
   2622     lw      s2, r_S2*4(a0)              # restore s2
   2623     lw      s3, r_S3*4(a0)              # restore s3
   2624     lw      s4, r_S4*4(a0)              # restore s4
   2625     lw      s5, r_S5*4(a0)              # restore s5
   2626     lw      s6, r_S6*4(a0)              # restore s6
   2627     lw      s7, r_S7*4(a0)              # restore s7
   2628 
   2629     lw      t8, r_T8*4(a0)              # restore t8
   2630     lw      t9, r_T9*4(a0)              # restore t9
   2631 
   2632     lw      k0, r_K0*4(a0)              # restore k0
   2633     lw      k1, r_K1*4(a0)              # restore k1
   2634 
   2635 #if 0
   2636     lw      gp, r_GP*4(a0)              # restore gp
   2637     lw      sp, r_SP*4(a0)              # restore sp
   2638     lw      fp, r_FP*4(a0)              # restore fp
   2639     lw      ra, r_RA*4(a0)              # restore ra
   2640 #endif
   2641 
   2642 /* #ifdef HARD_FLOAT */
   2643 #if 0
   2644     lw      f0, fr0*4(a0)               # restore f0
   2645     lw      f1, fr1*4(a0)               # restore f1
   2646     lw      f2, fr2*4(a0)               # restore f2
   2647     lw      f3, fr3*4(a0)               # restore f3
   2648     lw      f4, fr4*4(a0)               # restore f4
   2649     lw      f5, fr5*4(a0)               # restore f5
   2650     lw      f6, fr6*4(a0)               # restore f6
   2651     lw      f7, fr7*4(a0)               # restore f7
   2652     lw      f8, fr8*4(a0)               # restore f8
   2653     lw      f9, fr9*4(a0)               # restore f9
   2654     lw      f10, fr10*4(a0)             # restore f10
   2655     lw      f11, fr11*4(a0)             # restore f11
   2656     lw      f12, fr12*4(a0)             # restore f12
   2657     lw      f13, fr13*4(a0)             # restore f13
   2658     lw      f14, fr14*4(a0)             # restore f14
   2659     lw      f15, fr15*4(a0)             # restore f15
   2660     lw      f16, fr16*4(a0)             # restore f16
   2661     lw      f17, fr17*4(a0)             # restore f17
   2662     lw      f18, fr18*4(a0)             # restore f18
   2663     lw      f19, fr19*4(a0)             # restore f19
   2664     lw      f20, fr20*4(a0)             # restore f20
   2665     lw      f21, fr21*4(a0)             # restore f21
   2666     lw      f22, fr22*4(a0)             # restore f22
   2667     lw      f23, fr23*4(a0)             # restore f23
   2668     lw      f24, fr24*4(a0)             # restore f24
   2669     lw      f25, fr25*4(a0)             # restore f25
   2670     lw      f26, fr26*4(a0)             # restore f26
   2671     lw      f27, fr27*4(a0)             # restore f27
   2672     lw      f28, fr28*4(a0)             # restore f28
   2673     lw      f29, fr29*4(a0)             # restore f29
   2674     lw      f30, fr30*4(a0)             # restore f30
   2675     lw      f31, fr31*4(a0)             # restore f31
   2676 #endif
   2677 
   2678     lw      a0, r_A1*4(a0)              # restore a0
   2679     RETURN
   2680 
   2681 /* ------------------------------ */
   2682     .balign 4
   2683     .global dvmCompiler_TEMPLATE_SAVE_STATE
   2684 dvmCompiler_TEMPLATE_SAVE_STATE:
   2685 /* File: mips/TEMPLATE_SAVE_STATE.S */
   2686     /*
   2687      * This handler performs a register save for selfVerification mode.
   2688      * On entry:
   2689      *    Top of stack + 4: a1 value to save
   2690      *    Top of stack + 0: a0 value to save
   2691      *    a0 - offset from rSELF to the beginning of the heapArgSpace record
   2692      *    a1 - the value of regMap
   2693      *
   2694      * The handler must save regMap, r0-r31, f0-f31 if FPU, and then return with
   2695      * r0-r31 with their original values (note that this means a0 and a1 must take
   2696      * the values on the stack - not the ones in those registers on entry.
   2697      * Finally, the two registers previously pushed must be popped.
   2698      * Note: the following registers are not saved
   2699      *       zero, AT, gp, sp, fp, ra
   2700      */
   2701     add     a0, a0, rSELF               # pointer to heapArgSpace
   2702     sw      a1, 0(a0)                   # save regMap
   2703     add     a0, a0, 4                   # pointer to coreRegs
   2704 #if 0
   2705     sw      zero, r_ZERO*4(a0)          # save zero
   2706 #endif
   2707     .set noat
   2708     sw      AT, r_AT*4(a0)              # save at
   2709     .set at
   2710     sw      v0, r_V0*4(a0)              # save v0
   2711     sw      v1, r_V1*4(a0)              # save v1
   2712 
   2713     lw      a1, 0(sp)                   # recover a0 value
   2714     sw      a1, r_A0*4(a0)              # save a0
   2715     lw      a1, 4(sp)                   # recover a1 value
   2716     sw      a1, r_A1*4(a0)              # save a1
   2717     sw      a2, r_A2*4(a0)              # save a2
   2718     sw      a3, r_A3*4(a0)              # save a3
   2719 
   2720     sw      t0, r_T0*4(a0)              # save t0
   2721     sw      t1, r_T1*4(a0)              # save t1
   2722     sw      t2, r_T2*4(a0)              # save t2
   2723     sw      t3, r_T3*4(a0)              # save t3
   2724     sw      t4, r_T4*4(a0)              # save t4
   2725     sw      t5, r_T5*4(a0)              # save t5
   2726     sw      t6, r_T6*4(a0)              # save t6
   2727     sw      t7, r_T7*4(a0)              # save t7
   2728 
   2729     sw      s0, r_S0*4(a0)              # save s0
   2730     sw      s1, r_S1*4(a0)              # save s1
   2731     sw      s2, r_S2*4(a0)              # save s2
   2732     sw      s3, r_S3*4(a0)              # save s3
   2733     sw      s4, r_S4*4(a0)              # save s4
   2734     sw      s5, r_S5*4(a0)              # save s5
   2735     sw      s6, r_S6*4(a0)              # save s6
   2736     sw      s7, r_S7*4(a0)              # save s7
   2737 
   2738     sw      t8, r_T8*4(a0)              # save t8
   2739     sw      t9, r_T9*4(a0)              # save t9
   2740 
   2741     sw      k0, r_K0*4(a0)              # save k0
   2742     sw      k1, r_K1*4(a0)              # save k1
   2743 
   2744 #if 0
   2745     sw      gp, r_GP*4(a0)              # save gp
   2746     sw      sp, r_SP*4(a0)              # save sp (need to adjust??? )
   2747     sw      fp, r_FP*4(a0)              # save fp
   2748     sw      ra, r_RA*4(a0)              # save ra
   2749 #endif
   2750 
   2751 /* #ifdef HARD_FLOAT */
   2752 #if 0
   2753     sw      f0, fr0*4(a0)               # save f0
   2754     sw      f1, fr1*4(a0)               # save f1
   2755     sw      f2, fr2*4(a0)               # save f2
   2756     sw      f3, fr3*4(a0)               # save f3
   2757     sw      f4, fr4*4(a0)               # save f4
   2758     sw      f5, fr5*4(a0)               # save f5
   2759     sw      f6, fr6*4(a0)               # save f6
   2760     sw      f7, fr7*4(a0)               # save f7
   2761     sw      f8, fr8*4(a0)               # save f8
   2762     sw      f9, fr9*4(a0)               # save f9
   2763     sw      f10, fr10*4(a0)             # save f10
   2764     sw      f11, fr11*4(a0)             # save f11
   2765     sw      f12, fr12*4(a0)             # save f12
   2766     sw      f13, fr13*4(a0)             # save f13
   2767     sw      f14, fr14*4(a0)             # save f14
   2768     sw      f15, fr15*4(a0)             # save f15
   2769     sw      f16, fr16*4(a0)             # save f16
   2770     sw      f17, fr17*4(a0)             # save f17
   2771     sw      f18, fr18*4(a0)             # save f18
   2772     sw      f19, fr19*4(a0)             # save f19
   2773     sw      f20, fr20*4(a0)             # save f20
   2774     sw      f21, fr21*4(a0)             # save f21
   2775     sw      f22, fr22*4(a0)             # save f22
   2776     sw      f23, fr23*4(a0)             # save f23
   2777     sw      f24, fr24*4(a0)             # save f24
   2778     sw      f25, fr25*4(a0)             # save f25
   2779     sw      f26, fr26*4(a0)             # save f26
   2780     sw      f27, fr27*4(a0)             # save f27
   2781     sw      f28, fr28*4(a0)             # save f28
   2782     sw      f29, fr29*4(a0)             # save f29
   2783     sw      f30, fr30*4(a0)             # save f30
   2784     sw      f31, fr31*4(a0)             # save f31
   2785 #endif
   2786 
   2787     lw      a1, 0(sp)                   # recover a0 value
   2788     lw      a1, 4(sp)                   # recover a1 value
   2789     sub     sp, sp, 8                   # adjust stack ptr
   2790     RETURN
   2791 
   2792 /* ------------------------------ */
   2793     .balign 4
   2794     .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
   2795 dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
   2796 /* File: mips/TEMPLATE_PERIODIC_PROFILING.S */
   2797     /*
   2798      * Increment profile counter for this trace, and decrement
   2799      * sample counter.  If sample counter goes below zero, turn
   2800      * off profiling.
   2801      *
   2802      * On entry
   2803      * (ra-16) is address of pointer to counter.  Note: the counter
   2804      *    actually exists 16 bytes before the return target for mips.
   2805      *     - 4 bytes for prof count addr.
   2806      *     - 4 bytes for chain cell offset (2bytes 32 bit aligned).
   2807      *     - 4 bytes for call TEMPLATE_PERIODIC_PROFILING.
   2808      *     - 4 bytes for call delay slot.
   2809      */
   2810      lw     a0, -16(ra)
   2811      lw     a1, offThread_pProfileCountdown(rSELF)
   2812      lw     a2, 0(a0)                   # get counter
   2813      lw     a3, 0(a1)                   # get countdown timer
   2814      addu   a2, 1
   2815      sub    a3, 1                       # FIXME - bug in ARM code???
   2816      bltz   a3, .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
   2817      sw     a2, 0(a0)
   2818      sw     a3, 0(a1)
   2819      RETURN
   2820 .LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
   2821      move   rTEMP, ra                   # preserve ra
   2822      la     a0, dvmJitTraceProfilingOff
   2823      JALR(a0)
   2824      jr     rTEMP
   2825 
   2826 /* ------------------------------ */
   2827     .balign 4
   2828     .global dvmCompiler_TEMPLATE_RETURN_PROF
   2829 dvmCompiler_TEMPLATE_RETURN_PROF:
   2830 /* File: mips/TEMPLATE_RETURN_PROF.S */
   2831 #define TEMPLATE_INLINE_PROFILING
   2832 /* File: mips/TEMPLATE_RETURN.S */
   2833     /*
   2834      * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
   2835      * If the stored value in returnAddr
   2836      * is non-zero, the caller is compiled by the JIT thus return to the
   2837      * address in the code cache following the invoke instruction. Otherwise
   2838      * return to the special dvmJitToInterpNoChain entry point.
   2839      */
   2840 #if defined(TEMPLATE_INLINE_PROFILING)
   2841     # preserve a0-a2 and ra
   2842     SCRATCH_STORE(a0, 0)
   2843     SCRATCH_STORE(a1, 4)
   2844     SCRATCH_STORE(a2, 8)
   2845     SCRATCH_STORE(ra, 12)
   2846 
   2847     # a0=rSELF
   2848     move    a0, rSELF
   2849     la      t9, dvmFastMethodTraceExit
   2850     JALR(t9)
   2851     lw      gp, STACK_OFFSET_GP(sp)
   2852 
   2853     # restore a0-a2 and ra
   2854     SCRATCH_LOAD(ra, 12)
   2855     SCRATCH_LOAD(a2, 8)
   2856     SCRATCH_LOAD(a1, 4)
   2857     SCRATCH_LOAD(a0, 0)
   2858 #endif
   2859     SAVEAREA_FROM_FP(a0, rFP)           # a0<- saveArea (old)
   2860     lw      t0, offStackSaveArea_prevFrame(a0)     # t0<- saveArea->prevFrame
   2861     lbu     t1, offThread_breakFlags(rSELF)        # t1<- breakFlags
   2862     lw      rPC, offStackSaveArea_savedPc(a0)      # rPC<- saveArea->savedPc
   2863 #if !defined(WITH_SELF_VERIFICATION)
   2864     lw      t2,  offStackSaveArea_returnAddr(a0)   # t2<- chaining cell ret
   2865 #else
   2866     move    t2, zero                               # disable chaining
   2867 #endif
   2868     lw      a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
   2869                                                    # a2<- method we're returning to
   2870 #if !defined(WITH_SELF_VERIFICATION)
   2871     beq     a2, zero, 1f                           # bail to interpreter
   2872 #else
   2873     bne     a2, zero, 2f
   2874     JALR(ra)                                       # punt to interpreter and compare state
   2875     # DOUG: assume this does not return ???
   2876 2:
   2877 #endif
   2878     la      t4, .LdvmJitToInterpNoChainNoProfile   # defined in footer.S
   2879     lw      a1, (t4)
   2880     move    rFP, t0                                # publish new FP
   2881     beq     a2, zero, 4f
   2882     lw      t0, offMethod_clazz(a2)                # t0<- method->clazz
   2883 4:
   2884 
   2885     sw      a2, offThread_method(rSELF)            # self->method = newSave->method
   2886     lw      a0, offClassObject_pDvmDex(t0)         # a0<- method->clazz->pDvmDex
   2887     sw      rFP, offThread_curFrame(rSELF)         # self->curFrame = fp
   2888     add     rPC, rPC, 3*2                          # publish new rPC
   2889     sw      a0, offThread_methodClassDex(rSELF)
   2890     movn    t2, zero, t1                           # check the breadFlags and
   2891                                                    # clear the chaining cell address
   2892     sw      t2, offThread_inJitCodeCache(rSELF)    # in code cache or not
   2893     beq     t2, zero, 3f                           # chaining cell exists?
   2894     JALR(t2)                                       # jump to the chaining cell
   2895     # DOUG: assume this does not return ???
   2896 3:
   2897 #if defined(WITH_JIT_TUNING)
   2898     li      a0, kCallsiteInterpreted
   2899 #endif
   2900     j       a1                                     # callsite is interpreted
   2901 1:
   2902     sw      zero, offThread_inJitCodeCache(rSELF)  # reset inJitCodeCache
   2903     SAVE_PC_TO_SELF()                              # SAVE_PC_FP_TO_SELF()
   2904     SAVE_FP_TO_SELF()
   2905     la      t4, .LdvmMterpStdBail                  # defined in footer.S
   2906     lw      a2, (t4)
   2907     move    a0, rSELF                              # Expecting rSELF in a0
   2908     JALR(a2)                                       # exit the interpreter
   2909     # DOUG: assume this does not return ???
   2910 
   2911 #undef TEMPLATE_INLINE_PROFILING
   2912 
   2913 /* ------------------------------ */
   2914     .balign 4
   2915     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF
   2916 dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF:
   2917 /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT_PROF.S */
   2918 #define TEMPLATE_INLINE_PROFILING
   2919 /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
   2920     /*
   2921      * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
   2922      * into rPC then jump to dvmJitToInterpNoChain to dispatch the
   2923      * runtime-resolved callee.
   2924      */
   2925     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
   2926     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
   2927     lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
   2928     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
   2929     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
   2930     move   a3, a1                                 # a3<- returnCell
   2931     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
   2932     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
   2933     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
   2934     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
   2935     sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
   2936     sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
   2937     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
   2938     RETURN                                        # return to raise stack overflow excep.
   2939 
   2940 1:
   2941     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
   2942     lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
   2943     lw     t0, offMethod_accessFlags(a0)          # t0<- methodToCall->accessFlags
   2944     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
   2945     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
   2946     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
   2947 
   2948     # set up newSaveArea
   2949     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
   2950     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
   2951     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
   2952     beqz   t8, 2f                                 # breakFlags != 0
   2953     RETURN                                        # bail to the interpreter
   2954 
   2955 2:
   2956     and    t6, t0, ACC_NATIVE
   2957     beqz   t6, 3f
   2958 #if !defined(WITH_SELF_VERIFICATION)
   2959     j      .LinvokeNative
   2960 #else
   2961     RETURN                                        # bail to the interpreter
   2962 #endif
   2963 
   2964 3:
   2965     # continue executing the next instruction through the interpreter
   2966     la     t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
   2967     lw     rTEMP, (t0)
   2968     lw     a3, offClassObject_pDvmDex(t9)         # a3<- method->clazz->pDvmDex
   2969 
   2970     # Update "thread" values for the new method
   2971     sw     a0, offThread_method(rSELF)            # self->method = methodToCall
   2972     sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
   2973     move   rFP, a1                                # fp = newFp
   2974     sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
   2975 #if defined(TEMPLATE_INLINE_PROFILING)
   2976     # preserve a0-a3
   2977     SCRATCH_STORE(a0, 0)
   2978     SCRATCH_STORE(a1, 4)
   2979     SCRATCH_STORE(a2, 8)
   2980     SCRATCH_STORE(a3, 12)
   2981 
   2982     # a0=methodToCall, a1=rSELF
   2983     move   a1, rSELF
   2984     la     t9, dvmFastMethodTraceEnter
   2985     JALR(t9)
   2986     lw     gp, STACK_OFFSET_GP(sp)
   2987 
   2988     # restore a0-a3
   2989     SCRATCH_LOAD(a3, 12)
   2990     SCRATCH_LOAD(a2, 8)
   2991     SCRATCH_LOAD(a1, 4)
   2992     SCRATCH_LOAD(a0, 0)
   2993 #endif
   2994 
   2995     # Start executing the callee
   2996 #if defined(WITH_JIT_TUNING)
   2997     li     a0, kInlineCacheMiss
   2998 #endif
   2999     jr     rTEMP                                  # dvmJitToInterpTraceSelectNoChain
   3000 
   3001 #undef TEMPLATE_INLINE_PROFILING
   3002 
   3003 /* ------------------------------ */
   3004     .balign 4
   3005     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF
   3006 dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF:
   3007 /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN_PROF.S */
   3008 #define TEMPLATE_INLINE_PROFILING
   3009 /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
   3010     /*
   3011      * For monomorphic callsite, setup the Dalvik frame and return to the
   3012      * Thumb code through the link register to transfer control to the callee
   3013      * method through a dedicated chaining cell.
   3014      */
   3015     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
   3016     # methodToCall is guaranteed to be non-native
   3017 .LinvokeChainProf:
   3018     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
   3019     lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
   3020     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
   3021     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
   3022     move   a3, a1                                 # a3<- returnCell
   3023     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
   3024     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
   3025     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
   3026     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
   3027     add    t2, ra, 8                              # setup the punt-to-interp address
   3028                                                   # 8 bytes skips branch and delay slot
   3029     sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
   3030     sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
   3031     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
   3032     jr     t2                                     # return to raise stack overflow excep.
   3033 
   3034 1:
   3035     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
   3036     lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
   3037     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
   3038     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
   3039     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
   3040 
   3041     # set up newSaveArea
   3042     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
   3043     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
   3044     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
   3045     beqz   t8, 2f                                 # breakFlags != 0
   3046     jr     t2                                     # bail to the interpreter
   3047 
   3048 2:
   3049     lw     a3, offClassObject_pDvmDex(t9)         # a3<- methodToCall->clazz->pDvmDex
   3050 
   3051     # Update "thread" values for the new method
   3052     sw     a0, offThread_method(rSELF)            # self->method = methodToCall
   3053     sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
   3054     move   rFP, a1                                # fp = newFp
   3055     sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
   3056 #if defined(TEMPLATE_INLINE_PROFILING)
   3057     # preserve a0-a2 and ra
   3058     SCRATCH_STORE(a0, 0)
   3059     SCRATCH_STORE(a1, 4)
   3060     SCRATCH_STORE(a2, 8)
   3061     SCRATCH_STORE(ra, 12)
   3062 
   3063     move   a1, rSELF
   3064     # a0=methodToCall, a1=rSELF
   3065     la     t9, dvmFastMethodTraceEnter
   3066     jalr   t9
   3067     lw     gp, STACK_OFFSET_GP(sp)
   3068 
   3069     # restore a0-a2 and ra
   3070     SCRATCH_LOAD(ra, 12)
   3071     SCRATCH_LOAD(a2, 8)
   3072     SCRATCH_LOAD(a1, 4)
   3073     SCRATCH_LOAD(a0, 0)
   3074 #endif
   3075     RETURN                                        # return to the callee-chaining cell
   3076 
   3077 #undef TEMPLATE_INLINE_PROFILING
   3078 
   3079 /* ------------------------------ */
   3080     .balign 4
   3081     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF
   3082 dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF:
   3083 /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF.S */
   3084 #define TEMPLATE_INLINE_PROFILING
   3085 /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
   3086     /*
   3087      * For polymorphic callsite, check whether the cached class pointer matches
   3088      * the current one. If so setup the Dalvik frame and return to the
   3089      * Thumb code through the link register to transfer control to the callee
   3090      * method through a dedicated chaining cell.
   3091      *
   3092      * The predicted chaining cell is declared in ArmLIR.h with the
   3093      * following layout:
   3094      *
   3095      *  typedef struct PredictedChainingCell {
   3096      *      u4 branch;
   3097      *      u4 delay_slot;
   3098      *      const ClassObject *clazz;
   3099      *      const Method *method;
   3100      *      u4 counter;
   3101      *  } PredictedChainingCell;
   3102      *
   3103      * Upon returning to the callsite:
   3104      *    - lr   : to branch to the chaining cell
   3105      *    - lr+8 : to punt to the interpreter
   3106      *    - lr+16: to fully resolve the callee and may rechain.
   3107      *             a3 <- class
   3108      */
   3109     # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
   3110     lw      a3, offObject_clazz(a0)     # a3 <- this->class
   3111     lw      rIBASE, 8(a2)                   # t0 <- predictedChainCell->clazz
   3112     lw      a0, 12(a2)                  # a0 <- predictedChainCell->method
   3113     lw      t1, offThread_icRechainCount(rSELF)    # t1 <- shared rechainCount
   3114 
   3115 #if defined(WITH_JIT_TUNING)
   3116     la      rINST, .LdvmICHitCount
   3117     #add     t2, t2, 1
   3118     bne    a3, rIBASE, 1f
   3119     nop
   3120     lw      t2, 0(rINST)
   3121     add     t2, t2, 1
   3122     sw      t2, 0(rINST)
   3123 1:
   3124     #add     t2, t2, 1
   3125 #endif
   3126     beq     a3, rIBASE, .LinvokeChainProf       # branch if predicted chain is valid
   3127     lw      rINST, offClassObject_vtable(a3)     # rINST <- this->class->vtable
   3128     beqz    rIBASE, 2f                      # initialized class or not
   3129     sub     a1, t1, 1                   # count--
   3130     sw      a1, offThread_icRechainCount(rSELF)   # write back to InterpState
   3131     b       3f
   3132 2:
   3133     move    a1, zero
   3134 3:
   3135     add     ra, ra, 16                  # return to fully-resolve landing pad
   3136     /*
   3137      * a1 <- count
   3138      * a2 <- &predictedChainCell
   3139      * a3 <- this->class
   3140      * rPC <- dPC
   3141      * rINST <- this->class->vtable
   3142      */
   3143     RETURN
   3144 
   3145 #undef TEMPLATE_INLINE_PROFILING
   3146 
   3147 /* ------------------------------ */
   3148     .balign 4
   3149     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF
   3150 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF:
   3151 /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE_PROF.S */
   3152 #define TEMPLATE_INLINE_PROFILING
   3153 /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
   3154     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
   3155     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
   3156     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
   3157     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
   3158     move   a3, a1                                 # a3<- returnCell
   3159     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
   3160     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
   3161     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
   3162     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
   3163     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
   3164     RETURN                                        # return to raise stack overflow excep.
   3165 
   3166 1:
   3167     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
   3168     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
   3169     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
   3170     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
   3171 
   3172     # set up newSaveArea
   3173     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
   3174     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
   3175     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
   3176     lw     rTEMP, offMethod_nativeFunc(a0)        # t9<- method->nativeFunc
   3177 #if !defined(WITH_SELF_VERIFICATION)
   3178     beqz   t8, 2f                                 # breakFlags != 0
   3179     RETURN                                        # bail to the interpreter
   3180 2:
   3181 #else
   3182     RETURN                                        # bail to the interpreter unconditionally
   3183 #endif
   3184 
   3185     # go ahead and transfer control to the native code
   3186     lw     t6, offThread_jniLocal_topCookie(rSELF)  # t6<- thread->localRef->...
   3187     sw     a1, offThread_curFrame(rSELF)          # self->curFrame = newFp
   3188     sw     zero, offThread_inJitCodeCache(rSELF)  # not in the jit code cache
   3189     sw     t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
   3190                                                   # newFp->localRefCookie=top
   3191     SAVEAREA_FROM_FP(rBIX, a1)                    # rBIX<- new stack save area
   3192     move   a2, a0                                 # a2<- methodToCall
   3193     move   a0, a1                                 # a0<- newFp
   3194     add    a1, rSELF, offThread_retval            # a1<- &retval
   3195     move   a3, rSELF                              # a3<- self
   3196 #if defined(TEMPLATE_INLINE_PROFILING)
   3197     # a2: methodToCall
   3198     # preserve a0-a3
   3199     SCRATCH_STORE(a0, 0)
   3200     SCRATCH_STORE(a1, 4)
   3201     SCRATCH_STORE(a2, 8)
   3202     SCRATCH_STORE(a3, 12)
   3203 
   3204     move   a0, a2
   3205     move   a1, rSELF
   3206     # a0=JNIMethod, a1=rSELF
   3207     la      t9, dvmFastMethodTraceEnter
   3208     JALR(t9)                                      # off to the native code
   3209     lw     gp, STACK_OFFSET_GP(sp)
   3210 
   3211     # restore a0-a3
   3212     SCRATCH_LOAD(a3, 12)
   3213     SCRATCH_LOAD(a2, 8)
   3214     SCRATCH_LOAD(a1, 4)
   3215     SCRATCH_LOAD(a0, 0)
   3216 
   3217     move   rOBJ, a2                               # save a2
   3218 #endif
   3219 
   3220     JALR(rTEMP)                                   # off to the native code
   3221     lw     gp, STACK_OFFSET_GP(sp)
   3222 
   3223 #if defined(TEMPLATE_INLINE_PROFILING)
   3224     move   a0, rOBJ
   3225     move   a1, rSELF
   3226     # a0=JNIMethod, a1=rSELF
   3227     la      t9, dvmFastNativeMethodTraceExit
   3228     JALR(t9)
   3229     lw     gp, STACK_OFFSET_GP(sp)
   3230 #endif
   3231 
   3232     # native return; rBIX=newSaveArea
   3233     # equivalent to dvmPopJniLocals
   3234     lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
   3235     lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
   3236     lw     a1, offThread_exception(rSELF)            # check for exception
   3237     sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
   3238     sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
   3239     lw     a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
   3240 
   3241     # a0 = dalvikCallsitePC
   3242     bnez   a1, .LhandleException                     # handle exception if any
   3243 
   3244     sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
   3245     beqz   a2, 3f
   3246     jr     a2                                        # go if return chaining cell still exist
   3247 
   3248 3:
   3249     # continue executing the next instruction through the interpreter
   3250     la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
   3251     lw     a1, (a1)
   3252     add    rPC, a0, 3*2                              # reconstruct new rPC (advance 3 dalvik instr)
   3253 
   3254 #if defined(WITH_JIT_TUNING)
   3255     li     a0, kCallsiteInterpreted
   3256 #endif
   3257     jr     a1
   3258 
   3259 #undef TEMPLATE_INLINE_PROFILING
   3260 
   3261     .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
   3262 /* File: mips/footer.S */
   3263 /*
   3264  * ===========================================================================
   3265  *  Common subroutines and data
   3266  * ===========================================================================
   3267  */
   3268 
   3269     .section .data.rel.ro
   3270     .align  4
   3271 .LinvokeNative:
   3272     # Prep for the native call
   3273     # a1 = newFP, a0 = methodToCall
   3274     lw     t9, offThread_jniLocal_topCookie(rSELF)  # t9<- thread->localRef->...
   3275     sw     zero, offThread_inJitCodeCache(rSELF)    # not in jit code cache
   3276     sw     a1, offThread_curFrame(rSELF)            # self->curFrame = newFp
   3277     sw     t9, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
   3278                                                  # newFp->localRefCookie=top
   3279     lhu     ra, offThread_subMode(rSELF)
   3280     SAVEAREA_FROM_FP(rBIX, a1)                   # rBIX<- new stack save area
   3281 
   3282     move    a2, a0                               # a2<- methodToCall
   3283     move    a0, a1                               # a0<- newFp
   3284     add     a1, rSELF, offThread_retval          # a1<- &retval
   3285     move    a3, rSELF                            # a3<- self
   3286     andi    ra, kSubModeMethodTrace
   3287     beqz    ra, 121f
   3288     # a2: methodToCall
   3289     # preserve a0-a3
   3290     SCRATCH_STORE(a0, 0)
   3291     SCRATCH_STORE(a1, 4)
   3292     SCRATCH_STORE(a2, 8)
   3293     SCRATCH_STORE(a3, 12)
   3294     move    rTEMP, a2                            # preserve a2
   3295 
   3296     move    a0, rTEMP
   3297     move    a1, rSELF
   3298     la      t9, dvmFastMethodTraceEnter
   3299     JALR(t9)
   3300     lw      gp, STACK_OFFSET_GP(sp)
   3301 
   3302     # restore a0-a3
   3303     SCRATCH_LOAD(a3, 12)
   3304     SCRATCH_LOAD(a2, 8)
   3305     SCRATCH_LOAD(a1, 4)
   3306     SCRATCH_LOAD(a0, 0)
   3307 
   3308     lw      t9, offMethod_nativeFunc(a2)
   3309     JALR(t9)                                      # call methodToCall->nativeFunc
   3310     lw      gp, STACK_OFFSET_GP(sp)
   3311 
   3312     move    a0, rTEMP
   3313     move    a1, rSELF
   3314     la      t9, dvmFastNativeMethodTraceExit
   3315     JALR(t9)
   3316     lw      gp, STACK_OFFSET_GP(sp)
   3317     b       212f
   3318 
   3319 121:
   3320     lw      t9, offMethod_nativeFunc(a2)
   3321     JALR(t9)                                     # call methodToCall->nativeFunc
   3322     lw      gp, STACK_OFFSET_GP(sp)
   3323 
   3324 212:
   3325     # native return; rBIX=newSaveArea
   3326     # equivalent to dvmPopJniLocals
   3327     lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
   3328     lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
   3329     lw     a1, offThread_exception(rSELF)            # check for exception
   3330     sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
   3331     sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
   3332     lw     a0, offStackSaveArea_savedPc(rBIX)        # reload rPC
   3333 
   3334     # a0 = dalvikCallsitePC
   3335     bnez   a1, .LhandleException                     # handle exception if any
   3336 
   3337     sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
   3338     beqz   a2, 3f
   3339     jr     a2                                        # go if return chaining cell still exist
   3340 
   3341 3:
   3342     # continue executing the next instruction through the interpreter
   3343     la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
   3344     lw     a1, (a1)
   3345     add    rPC, a0, 3*2                              # reconstruct new rPC
   3346 
   3347 #if defined(WITH_JIT_TUNING)
   3348     li     a0, kCallsiteInterpreted
   3349 #endif
   3350     jr     a1
   3351 
   3352 
   3353 /*
   3354  * On entry:
   3355  * a0  Faulting Dalvik PC
   3356  */
   3357 .LhandleException:
   3358 #if defined(WITH_SELF_VERIFICATION)
   3359     la     t0, .LdeadFood
   3360     lw     t0, (t0)                  # should not see this under self-verification mode
   3361     jr     t0
   3362 .LdeadFood:
   3363     .word   0xdeadf00d
   3364 #endif
   3365     sw     zero, offThread_inJitCodeCache(rSELF)  # in interpreter land
   3366     la     a1, .LdvmMterpCommonExceptionThrown  # PIC way of getting &func
   3367     lw     a1, (a1)
   3368     la     rIBASE, .LdvmAsmInstructionStart     # PIC way of getting &func
   3369     lw     rIBASE, (rIBASE)
   3370     move   rPC, a0                              # reload the faulting Dalvid address
   3371     jr     a1                                   # branch to dvmMterpCommonExeceptionThrown
   3372 
   3373     .align  4
   3374 .LdvmAsmInstructionStart:
   3375     .word   dvmAsmInstructionStart
   3376 .LdvmJitToInterpNoChainNoProfile:
   3377     .word   dvmJitToInterpNoChainNoProfile
   3378 .LdvmJitToInterpTraceSelectNoChain:
   3379     .word   dvmJitToInterpTraceSelectNoChain
   3380 .LdvmJitToInterpNoChain:
   3381     .word   dvmJitToInterpNoChain
   3382 .LdvmMterpStdBail:
   3383     .word   dvmMterpStdBail
   3384 .LdvmMterpCommonExceptionThrown:
   3385     .word   dvmMterpCommonExceptionThrown
   3386 .LdvmLockObject:
   3387     .word   dvmLockObject
   3388 #if defined(WITH_JIT_TUNING)
   3389 .LdvmICHitCount:
   3390     .word   gDvmICHitCount
   3391 #endif
   3392 #if defined(WITH_SELF_VERIFICATION)
   3393 .LdvmSelfVerificationMemOpDecode:
   3394     .word   dvmSelfVerificationMemOpDecode
   3395 #endif
   3396 
   3397     .global dmvCompilerTemplateEnd
   3398 dmvCompilerTemplateEnd:
   3399 
   3400 #endif /* WITH_JIT */
   3401 
   3402