Home | History | Annotate | Download | only in out
      1 /*
      2  * This file was generated automatically by gen-template.py for 'mips'.
      3  *
      4  * --> DO NOT EDIT <--
      5  */
      6 
      7 /* File: mips/header.S */
      8 /*
      9  * Copyright (C) 2008 The Android Open Source Project
     10  *
     11  * Licensed under the Apache License, Version 2.0 (the "License");
     12  * you may not use this file except in compliance with the License.
     13  * You may obtain a copy of the License at
     14  *
     15  *      http://www.apache.org/licenses/LICENSE-2.0
     16  *
     17  * Unless required by applicable law or agreed to in writing, software
     18  * distributed under the License is distributed on an "AS IS" BASIS,
     19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     20  * See the License for the specific language governing permissions and
     21  * limitations under the License.
     22  */
     23 
     24 #if defined(WITH_JIT)
     25 
     26 /*
     27  * This is a #include, not a %include, because we want the C pre-processor
     28  * to expand the macros into assembler assignment statements.
     29  */
     30 #include "../../../mterp/common/asm-constants.h"
     31 #include "../../../mterp/common/mips-defines.h"
     32 #include "../../../mterp/common/jit-config.h"
     33 #include <asm/regdef.h>
     34 #include <asm/fpregdef.h>
     35 
     36 #ifdef	__mips_hard_float
     37 #define		HARD_FLOAT
     38 #else
     39 #define		SOFT_FLOAT
     40 #endif
     41 
     42 /* MIPS definitions and declarations
     43 
     44    reg	nick		purpose
     45    s0	rPC		interpreted program counter, used for fetching instructions
     46    s1	rFP		interpreted frame pointer, used for accessing locals and args
     47    s2	rSELF		pointer to thread
     48    s3	rIBASE		interpreted instruction base pointer, used for computed goto
     49    s4	rINST		first 16-bit code unit of current instruction
     50 */
     51 
     52 /* register offsets */
     53 #define r_ZERO      0
     54 #define r_AT        1
     55 #define r_V0        2
     56 #define r_V1        3
     57 #define r_A0        4
     58 #define r_A1        5
     59 #define r_A2        6
     60 #define r_A3        7
     61 #define r_T0        8
     62 #define r_T1        9
     63 #define r_T2        10
     64 #define r_T3        11
     65 #define r_T4        12
     66 #define r_T5        13
     67 #define r_T6        14
     68 #define r_T7        15
     69 #define r_S0        16
     70 #define r_S1        17
     71 #define r_S2        18
     72 #define r_S3        19
     73 #define r_S4        20
     74 #define r_S5        21
     75 #define r_S6        22
     76 #define r_S7        23
     77 #define r_T8        24
     78 #define r_T9        25
     79 #define r_K0        26
     80 #define r_K1        27
     81 #define r_GP        28
     82 #define r_SP        29
     83 #define r_FP        30
     84 #define r_RA        31
     85 #define r_F0        32
     86 #define r_F1        33
     87 #define r_F2        34
     88 #define r_F3        35
     89 #define r_F4        36
     90 #define r_F5        37
     91 #define r_F6        38
     92 #define r_F7        39
     93 #define r_F8        40
     94 #define r_F9        41
     95 #define r_F10       42
     96 #define r_F11       43
     97 #define r_F12       44
     98 #define r_F13       45
     99 #define r_F14       46
    100 #define r_F15       47
    101 #define r_F16       48
    102 #define r_F17       49
    103 #define r_F18       50
    104 #define r_F19       51
    105 #define r_F20       52
    106 #define r_F21       53
    107 #define r_F22       54
    108 #define r_F23       55
    109 #define r_F24       56
    110 #define r_F25       57
    111 #define r_F26       58
    112 #define r_F27       59
    113 #define r_F28       60
    114 #define r_F29       61
    115 #define r_F30       62
    116 #define r_F31       63
    117 
    118 /* single-purpose registers, given names for clarity */
    119 #define rPC	s0
    120 #define rFP	s1
    121 #define rSELF	s2
    122 #define rIBASE	s3
    123 #define rINST	s4
    124 #define rOBJ	s5
    125 #define rBIX	s6
    126 #define rTEMP	s7
    127 
    128 /* The long arguments sent to function calls in Big-endian mode should be register
    129 swapped when sent to functions in little endian mode. In other words long variable
    130 sent as a0(MSW), a1(LSW) for a function call in LE mode should be sent as a1, a0 in
    131 Big Endian mode */
    132 
    133 #ifdef HAVE_LITTLE_ENDIAN
    134 #define rARG0     a0
    135 #define rARG1     a1
    136 #define rARG2     a2
    137 #define rARG3     a3
    138 #define rRESULT0  v0
    139 #define rRESULT1  v1
    140 #else
    141 #define rARG0     a1
    142 #define rARG1     a0
    143 #define rARG2     a3
    144 #define rARG3     a2
    145 #define rRESULT0  v1
    146 #define rRESULT1  v0
    147 #endif
    148 
    149 
    150 /* save/restore the PC and/or FP from the thread struct */
    151 #define LOAD_PC_FROM_SELF()	lw	rPC, offThread_pc(rSELF)
    152 #define SAVE_PC_TO_SELF()	sw	rPC, offThread_pc(rSELF)
    153 #define LOAD_FP_FROM_SELF()	lw	rFP, offThread_curFrame(rSELF)
    154 #define SAVE_FP_TO_SELF()	sw	rFP, offThread_curFrame(rSELF)
    155 
    156 #define EXPORT_PC() \
    157 	sw	rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    158 
    159 #define SAVEAREA_FROM_FP(rd, _fpreg) \
    160 	subu	rd, _fpreg, sizeofStackSaveArea
    161 
    162 #define FETCH_INST()			lhu	rINST, (rPC)
    163 
    164 #define FETCH_ADVANCE_INST(_count)	lhu     rINST, (_count*2)(rPC); \
    165 					addu	rPC, rPC, (_count * 2)
    166 
    167 #define FETCH_ADVANCE_INST_RB(rd)	addu	rPC, rPC, rd;	\
    168 					lhu     rINST, (rPC)
    169 
    170 #define FETCH(rd, _count)		lhu	rd, (_count * 2)(rPC)
    171 #define FETCH_S(rd, _count)		lh	rd, (_count * 2)(rPC)
    172 
    173 #ifdef HAVE_LITTLE_ENDIAN
    174 
    175 #define FETCH_B(rd, _count)            lbu     rd, (_count * 2)(rPC)
    176 #define FETCH_C(rd, _count)            lbu     rd, (_count * 2 + 1)(rPC)
    177 
    178 #else
    179 
    180 #define FETCH_B(rd, _count)            lbu     rd, (_count * 2 + 1)(rPC)
    181 #define FETCH_C(rd, _count)            lbu     rd, (_count * 2)(rPC)
    182 
    183 #endif
    184 
    185 #define GET_INST_OPCODE(rd)		and	rd, rINST, 0xFF
    186 
    187 #define GOTO_OPCODE(rd)			sll  rd, rd, -1000;	\
    188 					addu rd, rIBASE, rd;	\
    189 					jr  rd
    190 
    191 
    192 #define LOAD(rd, rbase)			lw  rd, 0(rbase)
    193 #define LOAD_F(rd, rbase)		l.s rd, (rbase)
    194 #define STORE(rd, rbase)		sw  rd, 0(rbase)
    195 #define STORE_F(rd, rbase)		s.s rd, (rbase)
    196 
    197 #define GET_VREG(rd, rix)		LOAD_eas2(rd,rFP,rix)
    198 
    199 #define GET_VREG_F(rd, rix)		EAS2(AT, rFP, rix);		\
    200 					.set noat;  l.s rd, (AT); .set at
    201 
    202 #define SET_VREG(rd, rix)		STORE_eas2(rd, rFP, rix)
    203 
    204 #define SET_VREG_GOTO(rd, rix, dst)	.set noreorder;		\
    205 					sll  dst, dst, -1000;	\
    206 					addu dst, rIBASE, dst;			\
    207 					sll  t8, rix, 2;	\
    208 					addu t8, t8, rFP;	\
    209 					jr  dst;		\
    210 					sw  rd, 0(t8);		\
    211 					.set reorder
    212 
    213 #define SET_VREG_F(rd, rix)		EAS2(AT, rFP, rix);		\
    214 					.set noat;  s.s	rd, (AT); .set at
    215 
    216 
    217 #define GET_OPA(rd)			srl     rd, rINST, 8
    218 #ifndef		MIPS32R2
    219 #define GET_OPA4(rd)			GET_OPA(rd);  and  rd, 0xf
    220 #else
    221 #define GET_OPA4(rd)			ext	rd, rd, 8, 4
    222 #endif
    223 #define GET_OPB(rd)			srl     rd, rINST, 12
    224 
    225 #define LOAD_rSELF_OFF(rd,off)		lw    rd, offThread_##off##(rSELF)
    226 
    227 #define LOAD_rSELF_method(rd)		LOAD_rSELF_OFF(rd, method)
    228 #define LOAD_rSELF_methodClassDex(rd)	LOAD_rSELF_OFF(rd, methodClassDex)
    229 #define LOAD_rSELF_interpStackEnd(rd)	LOAD_rSELF_OFF(rd, interpStackEnd)
    230 #define LOAD_rSELF_retval(rd)		LOAD_rSELF_OFF(rd, retval)
    231 #define LOAD_rSELF_pActiveProfilers(rd)	LOAD_rSELF_OFF(rd, pActiveProfilers)
    232 #define LOAD_rSELF_bailPtr(rd)		LOAD_rSELF_OFF(rd, bailPtr)
    233 
    234 #define GET_JIT_PROF_TABLE(rd)		LOAD_rSELF_OFF(rd,pJitProfTable)
    235 #define GET_JIT_THRESHOLD(rd)		LOAD_rSELF_OFF(rd,jitThreshold)
    236 
    237 /*
    238  * Form an Effective Address rd = rbase + roff<<n;
    239  * Uses reg AT
    240  */
    241 #define EASN(rd,rbase,roff,rshift)	.set noat;		\
    242 					sll  AT, roff, rshift;	\
    243 					addu rd, rbase, AT;	\
    244 					.set at
    245 
    246 #define EAS1(rd,rbase,roff)		EASN(rd,rbase,roff,1)
    247 #define EAS2(rd,rbase,roff)		EASN(rd,rbase,roff,2)
    248 #define EAS3(rd,rbase,roff)		EASN(rd,rbase,roff,3)
    249 #define EAS4(rd,rbase,roff)		EASN(rd,rbase,roff,4)
    250 
    251 /*
    252  * Form an Effective Shift Right rd = rbase + roff>>n;
    253  * Uses reg AT
    254  */
    255 #define ESRN(rd,rbase,roff,rshift)	.set noat;		\
    256 					srl  AT, roff, rshift;	\
    257 					addu rd, rbase, AT;	\
    258 					.set at
    259 
    260 #define LOAD_eas2(rd,rbase,roff)	EAS2(AT, rbase, roff);  \
    261 					.set noat;  lw  rd, 0(AT); .set at
    262 
    263 #define STORE_eas2(rd,rbase,roff)	EAS2(AT, rbase, roff);  \
    264 					.set noat;  sw  rd, 0(AT); .set at
    265 
    266 #define LOAD_RB_OFF(rd,rbase,off)	lw	rd, off(rbase)
    267 #define LOADu2_RB_OFF(rd,rbase,off)	lhu	rd, off(rbase)
    268 #define STORE_RB_OFF(rd,rbase,off)	sw	rd, off(rbase)
    269 
    270 #ifdef HAVE_LITTLE_ENDIAN
    271 
    272 #define STORE64_off(rlo,rhi,rbase,off)	        sw	rlo, off(rbase);	\
    273 					        sw	rhi, (off+4)(rbase)
    274 #define LOAD64_off(rlo,rhi,rbase,off)	        lw	rlo, off(rbase);	\
    275 					        lw	rhi, (off+4)(rbase)
    276 
    277 #define STORE64_off_F(rlo,rhi,rbase,off)	s.s	rlo, off(rbase);	\
    278 						s.s	rhi, (off+4)(rbase)
    279 #define LOAD64_off_F(rlo,rhi,rbase,off)		l.s	rlo, off(rbase);	\
    280 						l.s	rhi, (off+4)(rbase)
    281 #else
    282 
    283 #define STORE64_off(rlo,rhi,rbase,off)	        sw	rlo, (off+4)(rbase);	\
    284 					        sw	rhi, (off)(rbase)
    285 #define LOAD64_off(rlo,rhi,rbase,off)	        lw	rlo, (off+4)(rbase);	\
    286 					        lw	rhi, (off)(rbase)
    287 #define STORE64_off_F(rlo,rhi,rbase,off)	s.s	rlo, (off+4)(rbase);	\
    288 						s.s	rhi, (off)(rbase)
    289 #define LOAD64_off_F(rlo,rhi,rbase,off)		l.s	rlo, (off+4)(rbase);	\
    290 						l.s	rhi, (off)(rbase)
    291 #endif
    292 
    293 #define STORE64(rlo,rhi,rbase)		STORE64_off(rlo,rhi,rbase,0)
    294 #define LOAD64(rlo,rhi,rbase)		LOAD64_off(rlo,rhi,rbase,0)
    295 
    296 #define STORE64_F(rlo,rhi,rbase)	STORE64_off_F(rlo,rhi,rbase,0)
    297 #define LOAD64_F(rlo,rhi,rbase)		LOAD64_off_F(rlo,rhi,rbase,0)
    298 
    299 #define STORE64_lo(rd,rbase)		sw	rd, 0(rbase)
    300 #define STORE64_hi(rd,rbase)		sw	rd, 4(rbase)
    301 
    302 
    303 #define LOAD_offThread_exception(rd,rbase)		LOAD_RB_OFF(rd,rbase,offThread_exception)
    304 #define LOAD_base_offArrayObject_length(rd,rbase)	LOAD_RB_OFF(rd,rbase,offArrayObject_length)
    305 #define LOAD_base_offClassObject_accessFlags(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_accessFlags)
    306 #define LOAD_base_offClassObject_descriptor(rd,rbase)   LOAD_RB_OFF(rd,rbase,offClassObject_descriptor)
    307 #define LOAD_base_offClassObject_super(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_super)
    308 
    309 #define LOAD_base_offClassObject_vtable(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_vtable)
    310 #define LOAD_base_offClassObject_vtableCount(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_vtableCount)
    311 #define LOAD_base_offDvmDex_pResClasses(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResClasses)
    312 #define LOAD_base_offDvmDex_pResFields(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResFields)
    313 
    314 #define LOAD_base_offDvmDex_pResMethods(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResMethods)
    315 #define LOAD_base_offDvmDex_pResStrings(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResStrings)
    316 #define LOAD_base_offInstField_byteOffset(rd,rbase)	LOAD_RB_OFF(rd,rbase,offInstField_byteOffset)
    317 #define LOAD_base_offStaticField_value(rd,rbase)	LOAD_RB_OFF(rd,rbase,offStaticField_value)
    318 #define LOAD_base_offMethod_clazz(rd,rbase)		LOAD_RB_OFF(rd,rbase,offMethod_clazz)
    319 
    320 #define LOAD_base_offMethod_name(rd,rbase)		LOAD_RB_OFF(rd,rbase,offMethod_name)
    321 #define LOAD_base_offObject_clazz(rd,rbase)		LOAD_RB_OFF(rd,rbase,offObject_clazz)
    322 
    323 #define LOADu2_offMethod_methodIndex(rd,rbase)		LOADu2_RB_OFF(rd,rbase,offMethod_methodIndex)
    324 
    325 
    326 #define STORE_offThread_exception(rd,rbase)		STORE_RB_OFF(rd,rbase,offThread_exception)
    327 
    328 
    329 #define	STACK_STORE(rd,off)	sw   rd, off(sp)
    330 #define	STACK_LOAD(rd,off)	lw   rd, off(sp)
    331 #define CREATE_STACK(n)	 	subu sp, sp, n
    332 #define DELETE_STACK(n)	 	addu sp, sp, n
    333 
    334 #define SAVE_RA(offset)	 	STACK_STORE(ra, offset)
    335 #define LOAD_RA(offset)	 	STACK_LOAD(ra, offset)
    336 
    337 #define LOAD_ADDR(dest,addr)	la   dest, addr
    338 #define LOAD_IMM(dest, imm)	li   dest, imm
    339 #define MOVE_REG(dest,src)	move dest, src
    340 #define	RETURN			jr   ra
    341 #define	STACK_SIZE		128
    342 
    343 #define STACK_OFFSET_ARG04	16
    344 #define STACK_OFFSET_GP		84
    345 #define STACK_OFFSET_rFP	112
    346 
    347 /* This directive will make sure all subsequent jal restore gp at a known offset */
    348         .cprestore STACK_OFFSET_GP
    349 
    350 #define JAL(func)		move rTEMP, ra;				\
    351 				jal  func;				\
    352 				move ra, rTEMP
    353 
    354 #define JALR(reg)		move rTEMP, ra;				\
    355 				jalr ra, reg;				\
    356 				move ra, rTEMP
    357 
    358 #define BAL(n)			bal  n
    359 
    360 #define	STACK_STORE_RA()  	CREATE_STACK(STACK_SIZE);		\
    361 				STACK_STORE(gp, STACK_OFFSET_GP);	\
    362 				STACK_STORE(ra, 124)
    363 
    364 #define	STACK_STORE_S0()  	STACK_STORE_RA();			\
    365 				STACK_STORE(s0, 116)
    366 
    367 #define	STACK_STORE_S0S1()  	STACK_STORE_S0();			\
    368 				STACK_STORE(s1, STACK_OFFSET_rFP)
    369 
    370 #define	STACK_LOAD_RA()		STACK_LOAD(ra, 124);			\
    371 				STACK_LOAD(gp, STACK_OFFSET_GP);	\
    372 				DELETE_STACK(STACK_SIZE)
    373 
    374 #define	STACK_LOAD_S0()  	STACK_LOAD(s0, 116);			\
    375 				STACK_LOAD_RA()
    376 
    377 #define	STACK_LOAD_S0S1()  	STACK_LOAD(s1, STACK_OFFSET_rFP);	\
    378 				STACK_LOAD_S0()
    379 
    380 #define STACK_STORE_FULL()	CREATE_STACK(STACK_SIZE);	\
    381 				STACK_STORE(ra, 124);		\
    382 				STACK_STORE(fp, 120);		\
    383 				STACK_STORE(s0, 116);		\
    384 				STACK_STORE(s1, STACK_OFFSET_rFP);	\
    385 				STACK_STORE(s2, 108);		\
    386 				STACK_STORE(s3, 104);		\
    387 				STACK_STORE(s4, 100);		\
    388 				STACK_STORE(s5, 96);		\
    389 				STACK_STORE(s6, 92);		\
    390 				STACK_STORE(s7, 88);
    391 
    392 #define STACK_LOAD_FULL()	STACK_LOAD(gp, STACK_OFFSET_GP);	\
    393 				STACK_LOAD(s7, 88);	\
    394 				STACK_LOAD(s6, 92);	\
    395 				STACK_LOAD(s5, 96);	\
    396 				STACK_LOAD(s4, 100);	\
    397 				STACK_LOAD(s3, 104);	\
    398 				STACK_LOAD(s2, 108);	\
    399 				STACK_LOAD(s1, STACK_OFFSET_rFP);	\
    400 				STACK_LOAD(s0, 116);	\
    401 				STACK_LOAD(fp, 120);	\
    402 				STACK_LOAD(ra, 124);	\
    403 				DELETE_STACK(STACK_SIZE)
    404 
    405 /*
    406  * first 8 words are reserved for function calls
    407  * Maximum offset is STACK_OFFSET_SCRMX-STACK_OFFSET_SCR
    408  */
    409 #define STACK_OFFSET_SCR   32
    410 #define SCRATCH_STORE(r,off) \
    411     STACK_STORE(r, STACK_OFFSET_SCR+off);
    412 #define SCRATCH_LOAD(r,off) \
    413     STACK_LOAD(r, STACK_OFFSET_SCR+off);
    414 
    415 /* File: mips/platform.S */
    416 /*
    417  * ===========================================================================
    418  *  CPU-version-specific defines and utility
    419  * ===========================================================================
    420  */
    421 
    422 
    423 
    424     .global dvmCompilerTemplateStart
    425     .type   dvmCompilerTemplateStart, %function
    426     .section .data.rel.ro
    427 
    428 dvmCompilerTemplateStart:
    429 
    430 /* ------------------------------ */
    431     .balign 4
    432     .global dvmCompiler_TEMPLATE_CMP_LONG
    433 dvmCompiler_TEMPLATE_CMP_LONG:
    434 /* File: mips/TEMPLATE_CMP_LONG.S */
    435     /*
    436      * Compare two 64-bit values
    437      *    x = y     return  0
    438      *    x < y     return -1
    439      *    x > y     return  1
    440      *
    441      * I think I can improve on the ARM code by the following observation
    442      *    slt   t0,  x.hi, y.hi;        # (x.hi < y.hi) ? 1:0
    443      *    sgt   t1,  x.hi, y.hi;        # (y.hi > x.hi) ? 1:0
    444      *    subu  v0, t0, t1              # v0= -1:1:0 for [ < > = ]
    445      *
    446      * This code assumes the register pair ordering will depend on endianess (a1:a0 or a0:a1).
    447      *    a1:a0 => vBB
    448      *    a3:a2 => vCC
    449      */
    450     /* cmp-long vAA, vBB, vCC */
    451     slt    t0, rARG1, rARG3             # compare hi
    452     sgt    t1, rARG1, rARG3
    453     subu   v0, t1, t0                   # v0<- (-1,1,0)
    454     bnez   v0, .LTEMPLATE_CMP_LONG_finish
    455                                         # at this point x.hi==y.hi
    456     sltu   t0, rARG0, rARG2             # compare lo
    457     sgtu   t1, rARG0, rARG2
    458     subu   v0, t1, t0                   # v0<- (-1,1,0) for [< > =]
    459 .LTEMPLATE_CMP_LONG_finish:
    460     RETURN
    461 
    462 /* ------------------------------ */
    463     .balign 4
    464     .global dvmCompiler_TEMPLATE_RETURN
    465 dvmCompiler_TEMPLATE_RETURN:
    466 /* File: mips/TEMPLATE_RETURN.S */
    467     /*
    468      * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
    469      * If the stored value in returnAddr
    470      * is non-zero, the caller is compiled by the JIT thus return to the
    471      * address in the code cache following the invoke instruction. Otherwise
    472      * return to the special dvmJitToInterpNoChain entry point.
    473      */
    474 #if defined(TEMPLATE_INLINE_PROFILING)
    475     # preserve a0-a2 and ra
    476     SCRATCH_STORE(a0, 0)
    477     SCRATCH_STORE(a1, 4)
    478     SCRATCH_STORE(a2, 8)
    479     SCRATCH_STORE(ra, 12)
    480 
    481     # a0=rSELF
    482     move    a0, rSELF
    483     la      t9, dvmFastMethodTraceExit
    484     JALR(t9)
    485     lw      gp, STACK_OFFSET_GP(sp)
    486 
    487     # restore a0-a2 and ra
    488     SCRATCH_LOAD(ra, 12)
    489     SCRATCH_LOAD(a2, 8)
    490     SCRATCH_LOAD(a1, 4)
    491     SCRATCH_LOAD(a0, 0)
    492 #endif
    493     SAVEAREA_FROM_FP(a0, rFP)           # a0<- saveArea (old)
    494     lw      t0, offStackSaveArea_prevFrame(a0)     # t0<- saveArea->prevFrame
    495     lbu     t1, offThread_breakFlags(rSELF)        # t1<- breakFlags
    496     lw      rPC, offStackSaveArea_savedPc(a0)      # rPC<- saveArea->savedPc
    497 #if !defined(WITH_SELF_VERIFICATION)
    498     lw      t2,  offStackSaveArea_returnAddr(a0)   # t2<- chaining cell ret
    499 #else
    500     move    t2, zero                               # disable chaining
    501 #endif
    502     lw      a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
    503                                                    # a2<- method we're returning to
    504 #if !defined(WITH_SELF_VERIFICATION)
    505     beq     a2, zero, 1f                           # bail to interpreter
    506 #else
    507     bne     a2, zero, 2f
    508     JALR(ra)                                       # punt to interpreter and compare state
    509     # DOUG: assume this does not return ???
    510 2:
    511 #endif
    512     la      t4, .LdvmJitToInterpNoChainNoProfile   # defined in footer.S
    513     lw      a1, (t4)
    514     move    rFP, t0                                # publish new FP
    515     beq     a2, zero, 4f
    516     lw      t0, offMethod_clazz(a2)                # t0<- method->clazz
    517 4:
    518 
    519     sw      a2, offThread_method(rSELF)            # self->method = newSave->method
    520     lw      a0, offClassObject_pDvmDex(t0)         # a0<- method->clazz->pDvmDex
    521     sw      rFP, offThread_curFrame(rSELF)         # self->curFrame = fp
    522     add     rPC, rPC, 3*2                          # publish new rPC
    523     sw      a0, offThread_methodClassDex(rSELF)
    524     movn    t2, zero, t1                           # check the breadFlags and
    525                                                    # clear the chaining cell address
    526     sw      t2, offThread_inJitCodeCache(rSELF)    # in code cache or not
    527     beq     t2, zero, 3f                           # chaining cell exists?
    528     JALR(t2)                                       # jump to the chaining cell
    529     # DOUG: assume this does not return ???
    530 3:
    531 #if defined(WITH_JIT_TUNING)
    532     li      a0, kCallsiteInterpreted
    533 #endif
    534     j       a1                                     # callsite is interpreted
    535 1:
    536     sw      zero, offThread_inJitCodeCache(rSELF)  # reset inJitCodeCache
    537     SAVE_PC_TO_SELF()                              # SAVE_PC_FP_TO_SELF()
    538     SAVE_FP_TO_SELF()
    539     la      t4, .LdvmMterpStdBail                  # defined in footer.S
    540     lw      a2, (t4)
    541     move    a0, rSELF                              # Expecting rSELF in a0
    542     JALR(a2)                                       # exit the interpreter
    543     # DOUG: assume this does not return ???
    544 
    545 /* ------------------------------ */
    546     .balign 4
    547     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
    548 dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
    549 /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
    550     /*
    551      * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
    552      * into rPC then jump to dvmJitToInterpNoChain to dispatch the
    553      * runtime-resolved callee.
    554      */
    555     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
    556     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
    557     lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
    558     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
    559     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
    560     move   a3, a1                                 # a3<- returnCell
    561     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
    562     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
    563     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
    564     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
    565     sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
    566     sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
    567     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
    568     RETURN                                        # return to raise stack overflow excep.
    569 
    570 1:
    571     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
    572     lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
    573     lw     t0, offMethod_accessFlags(a0)          # t0<- methodToCall->accessFlags
    574     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    575     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
    576     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
    577 
    578     # set up newSaveArea
    579     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
    580     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
    581     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
    582     beqz   t8, 2f                                 # breakFlags != 0
    583     RETURN                                        # bail to the interpreter
    584 
    585 2:
    586     and    t6, t0, ACC_NATIVE
    587     beqz   t6, 3f
    588 #if !defined(WITH_SELF_VERIFICATION)
    589     j      .LinvokeNative
    590 #else
    591     RETURN                                        # bail to the interpreter
    592 #endif
    593 
    594 3:
    595     # continue executing the next instruction through the interpreter
    596     la     t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
    597     lw     rTEMP, (t0)
    598     lw     a3, offClassObject_pDvmDex(t9)         # a3<- method->clazz->pDvmDex
    599 
    600     # Update "thread" values for the new method
    601     sw     a0, offThread_method(rSELF)            # self->method = methodToCall
    602     sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
    603     move   rFP, a1                                # fp = newFp
    604     sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
    605 #if defined(TEMPLATE_INLINE_PROFILING)
    606     # preserve rTEMP,a1-a3
    607     SCRATCH_STORE(rTEMP, 0)
    608     SCRATCH_STORE(a1, 4)
    609     SCRATCH_STORE(a2, 8)
    610     SCRATCH_STORE(a3, 12)
    611 
    612     # a0=methodToCall, a1=rSELF
    613     move   a1, rSELF
    614     la     t9, dvmFastMethodTraceEnter
    615     JALR(t9)
    616     lw     gp, STACK_OFFSET_GP(sp)
    617 
    618     # restore rTEMP,a1-a3
    619     SCRATCH_LOAD(a3, 12)
    620     SCRATCH_LOAD(a2, 8)
    621     SCRATCH_LOAD(a1, 4)
    622     SCRATCH_LOAD(rTEMP, 0)
    623 #endif
    624 
    625     # Start executing the callee
    626 #if defined(WITH_JIT_TUNING)
    627     li     a0, kInlineCacheMiss
    628 #endif
    629     jr     rTEMP                                  # dvmJitToInterpTraceSelectNoChain
    630 
    631 /* ------------------------------ */
    632     .balign 4
    633     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
    634 dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
    635 /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
    636     /*
    637      * For monomorphic callsite, setup the Dalvik frame and return to the
    638      * Thumb code through the link register to transfer control to the callee
    639      * method through a dedicated chaining cell.
    640      */
    641     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
    642     # methodToCall is guaranteed to be non-native
    643 .LinvokeChain:
    644     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
    645     lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
    646     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
    647     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
    648     move   a3, a1                                 # a3<- returnCell
    649     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
    650     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
    651     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
    652     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
    653     add    t2, ra, 8                              # setup the punt-to-interp address
    654                                                   # 8 bytes skips branch and delay slot
    655     sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
    656     sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
    657     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
    658     jr     t2                                     # return to raise stack overflow excep.
    659 
    660 1:
    661     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
    662     lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
    663     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    664     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
    665     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
    666 
    667     # set up newSaveArea
    668     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
    669     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
    670     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
    671     beqz   t8, 2f                                 # breakFlags != 0
    672     jr     t2                                     # bail to the interpreter
    673 
    674 2:
    675     lw     a3, offClassObject_pDvmDex(t9)         # a3<- methodToCall->clazz->pDvmDex
    676 
    677     # Update "thread" values for the new method
    678     sw     a0, offThread_method(rSELF)            # self->method = methodToCall
    679     sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
    680     move   rFP, a1                                # fp = newFp
    681     sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
    682 #if defined(TEMPLATE_INLINE_PROFILING)
    683     # preserve a0-a2 and ra
    684     SCRATCH_STORE(a0, 0)
    685     SCRATCH_STORE(a1, 4)
    686     SCRATCH_STORE(a2, 8)
    687     SCRATCH_STORE(ra, 12)
    688 
    689     move   a1, rSELF
    690     # a0=methodToCall, a1=rSELF
    691     la     t9, dvmFastMethodTraceEnter
    692     jalr   t9
    693     lw     gp, STACK_OFFSET_GP(sp)
    694 
    695     # restore a0-a2 and ra
    696     SCRATCH_LOAD(ra, 12)
    697     SCRATCH_LOAD(a2, 8)
    698     SCRATCH_LOAD(a1, 4)
    699     SCRATCH_LOAD(a0, 0)
    700 #endif
    701     RETURN                                        # return to the callee-chaining cell
    702 
    703 /* ------------------------------ */
    704     .balign 4
    705     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
    706 dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
    707 /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
    708     /*
    709      * For polymorphic callsite, check whether the cached class pointer matches
    710      * the current one. If so setup the Dalvik frame and return to the
    711      * Thumb code through the link register to transfer control to the callee
    712      * method through a dedicated chaining cell.
    713      *
    714      * The predicted chaining cell is declared in ArmLIR.h with the
    715      * following layout:
    716      *
    717      *  typedef struct PredictedChainingCell {
    718      *      u4 branch;
    719      *      u4 delay_slot;
    720      *      const ClassObject *clazz;
    721      *      const Method *method;
    722      *      u4 counter;
    723      *  } PredictedChainingCell;
    724      *
    725      * Upon returning to the callsite:
    726      *    - lr   : to branch to the chaining cell
    727      *    - lr+8 : to punt to the interpreter
    728      *    - lr+16: to fully resolve the callee and may rechain.
    729      *             a3 <- class
    730      */
    731     # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
    732     lw      a3, offObject_clazz(a0)     # a3 <- this->class
    733     lw      rIBASE, 8(a2)                   # t0 <- predictedChainCell->clazz
    734     lw      a0, 12(a2)                  # a0 <- predictedChainCell->method
    735     lw      t1, offThread_icRechainCount(rSELF)    # t1 <- shared rechainCount
    736 
    737 #if defined(WITH_JIT_TUNING)
    738     la      rINST, .LdvmICHitCount
    739     #add     t2, t2, 1
    740     bne    a3, rIBASE, 1f
    741     nop
    742     lw      t2, 0(rINST)
    743     add     t2, t2, 1
    744     sw      t2, 0(rINST)
    745 1:
    746     #add     t2, t2, 1
    747 #endif
    748     beq     a3, rIBASE, .LinvokeChain       # branch if predicted chain is valid
    749     lw      rINST, offClassObject_vtable(a3)     # rINST <- this->class->vtable
    750     beqz    rIBASE, 2f                      # initialized class or not
    751     sub     a1, t1, 1                   # count--
    752     sw      a1, offThread_icRechainCount(rSELF)   # write back to InterpState
    753     b       3f
    754 2:
    755     move    a1, zero
    756 3:
    757     add     ra, ra, 16                  # return to fully-resolve landing pad
    758     /*
    759      * a1 <- count
    760      * a2 <- &predictedChainCell
    761      * a3 <- this->class
    762      * rPC <- dPC
    763      * rINST <- this->class->vtable
    764      */
    765     RETURN
    766 
    767 /* ------------------------------ */
    768     .balign 4
    769     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
    770 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
    771 /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
    772     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
    773     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
    774     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
    775     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
    776     move   a3, a1                                 # a3<- returnCell
    777     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
    778     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
    779     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
    780     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
    781     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
    782     RETURN                                        # return to raise stack overflow excep.
    783 
    784 1:
    785     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
    786     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    787     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
    788     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
    789 
    790     # set up newSaveArea
    791     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
    792     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
    793     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
    794     lw     rTEMP, offMethod_nativeFunc(a0)        # t9<- method->nativeFunc
    795 #if !defined(WITH_SELF_VERIFICATION)
    796     beqz   t8, 2f                                 # breakFlags != 0
    797     RETURN                                        # bail to the interpreter
    798 2:
    799 #else
    800     RETURN                                        # bail to the interpreter unconditionally
    801 #endif
    802 
    803     # go ahead and transfer control to the native code
    804     lw     t6, offThread_jniLocal_topCookie(rSELF)  # t6<- thread->localRef->...
    805     sw     a1, offThread_curFrame(rSELF)          # self->curFrame = newFp
    806     sw     zero, offThread_inJitCodeCache(rSELF)  # not in the jit code cache
    807     sw     t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
    808                                                   # newFp->localRefCookie=top
    809     SAVEAREA_FROM_FP(rBIX, a1)                    # rBIX<- new stack save area
    810     move   a2, a0                                 # a2<- methodToCall
    811     move   a0, a1                                 # a0<- newFp
    812     add    a1, rSELF, offThread_retval            # a1<- &retval
    813     move   a3, rSELF                              # a3<- self
    814 #if defined(TEMPLATE_INLINE_PROFILING)
    815     # a2: methodToCall
    816     # preserve rTEMP,a0-a3
    817     SCRATCH_STORE(a0, 0)
    818     SCRATCH_STORE(a1, 4)
    819     SCRATCH_STORE(a2, 8)
    820     SCRATCH_STORE(a3, 12)
    821     SCRATCH_STORE(rTEMP, 16)
    822 
    823     move   a0, a2
    824     move   a1, rSELF
    825     # a0=JNIMethod, a1=rSELF
    826     la      t9, dvmFastMethodTraceEnter
    827     JALR(t9)                                      # off to the native code
    828     lw     gp, STACK_OFFSET_GP(sp)
    829 
    830     # restore rTEMP,a0-a3
    831     SCRATCH_LOAD(rTEMP, 16)
    832     SCRATCH_LOAD(a3, 12)
    833     SCRATCH_LOAD(a2, 8)
    834     SCRATCH_LOAD(a1, 4)
    835     SCRATCH_LOAD(a0, 0)
    836 
    837     move   rOBJ, a2                               # save a2
    838 #endif
    839     move   t9, rTEMP
    840     JALR(t9)                                   # off to the native code
    841     lw     gp, STACK_OFFSET_GP(sp)
    842 
    843 #if defined(TEMPLATE_INLINE_PROFILING)
    844     move   a0, rOBJ
    845     move   a1, rSELF
    846     # a0=JNIMethod, a1=rSELF
    847     la      t9, dvmFastNativeMethodTraceExit
    848     JALR(t9)
    849     lw     gp, STACK_OFFSET_GP(sp)
    850 #endif
    851 
    852     # native return; rBIX=newSaveArea
    853     # equivalent to dvmPopJniLocals
    854     lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
    855     lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
    856     lw     a1, offThread_exception(rSELF)            # check for exception
    857     sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
    858     sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
    859     lw     a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
    860 
    861     # a0 = dalvikCallsitePC
    862     bnez   a1, .LhandleException                     # handle exception if any
    863 
    864     sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
    865     beqz   a2, 3f
    866     jr     a2                                        # go if return chaining cell still exist
    867 
    868 3:
    869     # continue executing the next instruction through the interpreter
    870     la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
    871     lw     a1, (a1)
    872     add    rPC, a0, 3*2                              # reconstruct new rPC (advance 3 dalvik instr)
    873 
    874 #if defined(WITH_JIT_TUNING)
    875     li     a0, kCallsiteInterpreted
    876 #endif
    877     jr     a1
    878 
    879 /* ------------------------------ */
    880     .balign 4
    881     .global dvmCompiler_TEMPLATE_MUL_LONG
    882 dvmCompiler_TEMPLATE_MUL_LONG:
    883 /* File: mips/TEMPLATE_MUL_LONG.S */
    884     /*
    885      * Signed 64-bit integer multiply.
    886      *
    887      * For JIT: op1 in a0/a1, op2 in a2/a3, return in v0/v1
    888      *
    889      * Consider WXxYZ (a1a0 x a3a2) with a long multiply:
    890      *
    891      *         a1   a0
    892      *   x     a3   a2
    893      *   -------------
    894      *       a2a1 a2a0
    895      *       a3a0
    896      *  a3a1 (<= unused)
    897      *  ---------------
    898      *         v1   v0
    899      *
    900      */
    901     /* mul-long vAA, vBB, vCC */
    902     mul     rRESULT1,rARG3,rARG0              #  v1= a3a0
    903     multu   rARG2,rARG0
    904     mfhi    t1
    905     mflo    rRESULT0                          #  v0= a2a0
    906     mul     t0,rARG2,rARG1                    #  t0= a2a1
    907     addu    rRESULT1,rRESULT1,t1              #  v1= a3a0 + hi(a2a0)
    908     addu    rRESULT1,rRESULT1,t0              #  v1= a3a0 + hi(a2a0) + a2a1;
    909     RETURN
    910 
    911 /* ------------------------------ */
    912     .balign 4
    913     .global dvmCompiler_TEMPLATE_SHL_LONG
    914 dvmCompiler_TEMPLATE_SHL_LONG:
    915 /* File: mips/TEMPLATE_SHL_LONG.S */
    916     /*
    917      * Long integer shift.  This is different from the generic 32/64-bit
    918      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    919      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    920      * 6 bits.
    921      */
    922     /* shl-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
    923     sll     rRESULT0, rARG0, a2		#  rlo<- alo << (shift&31)
    924     not     rRESULT1, a2		#  rhi<- 31-shift  (shift is 5b)
    925     srl     rARG0, 1
    926     srl     rARG0, rRESULT1		#  alo<- alo >> (32-(shift&31))
    927     sll     rRESULT1, rARG1, a2		#  rhi<- ahi << (shift&31)
    928     or      rRESULT1, rARG0		#  rhi<- rhi | alo
    929     andi    a2, 0x20			#  shift< shift & 0x20
    930     movn    rRESULT1, rRESULT0, a2	#  rhi<- rlo (if shift&0x20)
    931     movn    rRESULT0, zero, a2		#  rlo<- 0  (if shift&0x20)
    932     RETURN
    933 
    934 /* ------------------------------ */
    935     .balign 4
    936     .global dvmCompiler_TEMPLATE_SHR_LONG
    937 dvmCompiler_TEMPLATE_SHR_LONG:
    938 /* File: mips/TEMPLATE_SHR_LONG.S */
    939     /*
    940      * Long integer shift.  This is different from the generic 32/64-bit
    941      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    942      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    943      * 6 bits.
    944      */
    945     /* shr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
    946     sra     rRESULT1, rARG1, a2		#  rhi<- ahi >> (shift&31)
    947     srl     rRESULT0, rARG0, a2		#  rlo<- alo >> (shift&31)
    948     sra     a3, rARG1, 31		#  a3<- sign(ah)
    949     not     rARG0, a2			#  alo<- 31-shift (shift is 5b)
    950     sll     rARG1, 1
    951     sll     rARG1, rARG0		#  ahi<- ahi << (32-(shift&31))
    952     or      rRESULT0, rARG1		#  rlo<- rlo | ahi
    953     andi    a2, 0x20			#  shift & 0x20
    954     movn    rRESULT0, rRESULT1, a2	#  rlo<- rhi (if shift&0x20)
    955     movn    rRESULT1, a3, a2		#  rhi<- sign(ahi) (if shift&0x20)
    956     RETURN
    957 
    958 /* ------------------------------ */
    959     .balign 4
    960     .global dvmCompiler_TEMPLATE_USHR_LONG
    961 dvmCompiler_TEMPLATE_USHR_LONG:
    962 /* File: mips/TEMPLATE_USHR_LONG.S */
    963     /*
    964      * Long integer shift.  This is different from the generic 32/64-bit
    965      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    966      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    967      * 6 bits.
    968      */
    969     /* ushr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
    970     srl     rRESULT1, rARG1, a2		#  rhi<- ahi >> (shift&31)
    971     srl     rRESULT0, rARG0, a2		#  rlo<- alo >> (shift&31)
    972     not     rARG0, a2			#  alo<- 31-n  (shift is 5b)
    973     sll     rARG1, 1
    974     sll     rARG1, rARG0		#  ahi<- ahi << (32-(shift&31))
    975     or      rRESULT0, rARG1		#  rlo<- rlo | ahi
    976     andi    a2, 0x20			#  shift & 0x20
    977     movn    rRESULT0, rRESULT1, a2	#  rlo<- rhi (if shift&0x20)
    978     movn    rRESULT1, zero, a2		#  rhi<- 0 (if shift&0x20)
    979     RETURN
    980 
    981 /* ------------------------------ */
    982     .balign 4
    983     .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
    984 dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
    985 /* File: mips/TEMPLATE_ADD_FLOAT_VFP.S */
    986 /* File: mips/fbinop.S */
    987     /*
    988      * Generic 32-bit binary float operation. a0 = a1 op a2.
    989      *
    990      * For: add-fp, sub-fp, mul-fp, div-fp
    991      *
    992      * On entry:
    993      *     a0 = target dalvik register address
    994      *     a1 = op1 address
    995      *     a2 = op2 address
    996      *
    997      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
    998      *
    999      */
   1000     move rOBJ, a0                       # save a0
   1001 #ifdef  SOFT_FLOAT
   1002     LOAD(a0, a1)                        # a0<- vBB
   1003     LOAD(a1, a2)                        # a1<- vCC
   1004     .if 0
   1005     beqz    a1, common_errDivideByZero  # is second operand zero?
   1006     .endif
   1007                                # optional op
   1008     JAL(__addsf3)                              # v0 = result
   1009     STORE(v0, rOBJ)                     # vAA <- v0
   1010 #else
   1011     LOAD_F(fa0, a1)                     # fa0<- vBB
   1012     LOAD_F(fa1, a2)                     # fa1<- vCC
   1013     .if 0
   1014     # is second operand zero?
   1015     li.s        ft0, 0
   1016     c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
   1017     bc1t        fcc0, common_errDivideByZero
   1018     .endif
   1019                                # optional op
   1020     add.s fv0, fa0, fa1                            # fv0 = result
   1021     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1022 #endif
   1023     RETURN
   1024 
   1025 
   1026 /* ------------------------------ */
   1027     .balign 4
   1028     .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
   1029 dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
   1030 /* File: mips/TEMPLATE_SUB_FLOAT_VFP.S */
   1031 /* File: mips/fbinop.S */
   1032     /*
   1033      * Generic 32-bit binary float operation. a0 = a1 op a2.
   1034      *
   1035      * For: add-fp, sub-fp, mul-fp, div-fp
   1036      *
   1037      * On entry:
   1038      *     a0 = target dalvik register address
   1039      *     a1 = op1 address
   1040      *     a2 = op2 address
   1041      *
   1042      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1043      *
   1044      */
   1045     move rOBJ, a0                       # save a0
   1046 #ifdef  SOFT_FLOAT
   1047     LOAD(a0, a1)                        # a0<- vBB
   1048     LOAD(a1, a2)                        # a1<- vCC
   1049     .if 0
   1050     beqz    a1, common_errDivideByZero  # is second operand zero?
   1051     .endif
   1052                                # optional op
   1053     JAL(__subsf3)                              # v0 = result
   1054     STORE(v0, rOBJ)                     # vAA <- v0
   1055 #else
   1056     LOAD_F(fa0, a1)                     # fa0<- vBB
   1057     LOAD_F(fa1, a2)                     # fa1<- vCC
   1058     .if 0
   1059     # is second operand zero?
   1060     li.s        ft0, 0
   1061     c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
   1062     bc1t        fcc0, common_errDivideByZero
   1063     .endif
   1064                                # optional op
   1065     sub.s fv0, fa0, fa1                            # fv0 = result
   1066     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1067 #endif
   1068     RETURN
   1069 
   1070 
   1071 /* ------------------------------ */
   1072     .balign 4
   1073     .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
   1074 dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
   1075 /* File: mips/TEMPLATE_MUL_FLOAT_VFP.S */
   1076 /* File: mips/fbinop.S */
   1077     /*
   1078      * Generic 32-bit binary float operation. a0 = a1 op a2.
   1079      *
   1080      * For: add-fp, sub-fp, mul-fp, div-fp
   1081      *
   1082      * On entry:
   1083      *     a0 = target dalvik register address
   1084      *     a1 = op1 address
   1085      *     a2 = op2 address
   1086      *
   1087      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1088      *
   1089      */
   1090     move rOBJ, a0                       # save a0
   1091 #ifdef  SOFT_FLOAT
   1092     LOAD(a0, a1)                        # a0<- vBB
   1093     LOAD(a1, a2)                        # a1<- vCC
   1094     .if 0
   1095     beqz    a1, common_errDivideByZero  # is second operand zero?
   1096     .endif
   1097                                # optional op
   1098     JAL(__mulsf3)                              # v0 = result
   1099     STORE(v0, rOBJ)                     # vAA <- v0
   1100 #else
   1101     LOAD_F(fa0, a1)                     # fa0<- vBB
   1102     LOAD_F(fa1, a2)                     # fa1<- vCC
   1103     .if 0
   1104     # is second operand zero?
   1105     li.s        ft0, 0
   1106     c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
   1107     bc1t        fcc0, common_errDivideByZero
   1108     .endif
   1109                                # optional op
   1110     mul.s fv0, fa0, fa1                            # fv0 = result
   1111     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1112 #endif
   1113     RETURN
   1114 
   1115 
   1116 /* ------------------------------ */
   1117     .balign 4
   1118     .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
   1119 dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
   1120 /* File: mips/TEMPLATE_DIV_FLOAT_VFP.S */
   1121 /* File: mips/fbinop.S */
   1122     /*
   1123      * Generic 32-bit binary float operation. a0 = a1 op a2.
   1124      *
   1125      * For: add-fp, sub-fp, mul-fp, div-fp
   1126      *
   1127      * On entry:
   1128      *     a0 = target dalvik register address
   1129      *     a1 = op1 address
   1130      *     a2 = op2 address
   1131      *
   1132      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1133      *
   1134      */
   1135     move rOBJ, a0                       # save a0
   1136 #ifdef  SOFT_FLOAT
   1137     LOAD(a0, a1)                        # a0<- vBB
   1138     LOAD(a1, a2)                        # a1<- vCC
   1139     .if 0
   1140     beqz    a1, common_errDivideByZero  # is second operand zero?
   1141     .endif
   1142                                # optional op
   1143     JAL(__divsf3)                              # v0 = result
   1144     STORE(v0, rOBJ)                     # vAA <- v0
   1145 #else
   1146     LOAD_F(fa0, a1)                     # fa0<- vBB
   1147     LOAD_F(fa1, a2)                     # fa1<- vCC
   1148     .if 0
   1149     # is second operand zero?
   1150     li.s        ft0, 0
   1151     c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
   1152     bc1t        fcc0, common_errDivideByZero
   1153     .endif
   1154                                # optional op
   1155     div.s fv0, fa0, fa1                            # fv0 = result
   1156     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1157 #endif
   1158     RETURN
   1159 
   1160 
   1161 /* ------------------------------ */
   1162     .balign 4
   1163     .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
   1164 dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
   1165 /* File: mips/TEMPLATE_ADD_DOUBLE_VFP.S */
   1166 /* File: mips/fbinopWide.S */
   1167     /*
   1168      * Generic 64-bit binary operation.  Provide an "instr" line that
   1169      * specifies an instruction that performs "result = a0-a1 op a2-a3".
   1170      * This could be an MIPS instruction or a function call.
   1171      * If "chkzero" is set to 1, we perform a divide-by-zero check on
   1172      * vCC (a1).  Useful for integer division and modulus.
   1173      *
   1174      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
   1175      *      xor-long, add-double, sub-double, mul-double, div-double,
   1176      *      rem-double
   1177      *
   1178      * On entry:
   1179      *     a0 = target dalvik register address
   1180      *     a1 = op1 address
   1181      *     a2 = op2 address
   1182      *
   1183      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1184      */
   1185     move rOBJ, a0                       # save a0
   1186 #ifdef  SOFT_FLOAT
   1187     move t0, a1                         # save a1
   1188     move t1, a2                         # save a2
   1189     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
   1190     LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
   1191     .if 0
   1192     or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
   1193     beqz        t0, common_errDivideByZero
   1194     .endif
   1195                                # optional op
   1196     JAL(__adddf3)                              # result<- op, a0-a3 changed
   1197     STORE64(rRESULT0, rRESULT1, rOBJ)
   1198 #else
   1199     LOAD64_F(fa0, fa0f, a1)
   1200     LOAD64_F(fa1, fa1f, a2)
   1201     .if 0
   1202     li.d        ft0, 0
   1203     c.eq.d      fcc0, fa1, ft0
   1204     bc1t        fcc0, common_errDivideByZero
   1205     .endif
   1206                                # optional op
   1207     add.d fv0, fa0, fa1
   1208     STORE64_F(fv0, fv0f, rOBJ)
   1209 #endif
   1210     RETURN
   1211 
   1212 
   1213 /* ------------------------------ */
   1214     .balign 4
   1215     .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
   1216 dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
   1217 /* File: mips/TEMPLATE_SUB_DOUBLE_VFP.S */
   1218 /* File: mips/fbinopWide.S */
   1219     /*
   1220      * Generic 64-bit binary operation.  Provide an "instr" line that
   1221      * specifies an instruction that performs "result = a0-a1 op a2-a3".
   1222      * This could be an MIPS instruction or a function call.
   1223      * If "chkzero" is set to 1, we perform a divide-by-zero check on
   1224      * vCC (a1).  Useful for integer division and modulus.
   1225      *
   1226      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
   1227      *      xor-long, add-double, sub-double, mul-double, div-double,
   1228      *      rem-double
   1229      *
   1230      * On entry:
   1231      *     a0 = target dalvik register address
   1232      *     a1 = op1 address
   1233      *     a2 = op2 address
   1234      *
   1235      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1236      */
   1237     move rOBJ, a0                       # save a0
   1238 #ifdef  SOFT_FLOAT
   1239     move t0, a1                         # save a1
   1240     move t1, a2                         # save a2
   1241     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
   1242     LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
   1243     .if 0
   1244     or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
   1245     beqz        t0, common_errDivideByZero
   1246     .endif
   1247                                # optional op
   1248     JAL(__subdf3)                              # result<- op, a0-a3 changed
   1249     STORE64(rRESULT0, rRESULT1, rOBJ)
   1250 #else
   1251     LOAD64_F(fa0, fa0f, a1)
   1252     LOAD64_F(fa1, fa1f, a2)
   1253     .if 0
   1254     li.d        ft0, 0
   1255     c.eq.d      fcc0, fa1, ft0
   1256     bc1t        fcc0, common_errDivideByZero
   1257     .endif
   1258                                # optional op
   1259     sub.d fv0, fa0, fa1
   1260     STORE64_F(fv0, fv0f, rOBJ)
   1261 #endif
   1262     RETURN
   1263 
   1264 
   1265 /* ------------------------------ */
   1266     .balign 4
   1267     .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
   1268 dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
   1269 /* File: mips/TEMPLATE_MUL_DOUBLE_VFP.S */
   1270 /* File: mips/fbinopWide.S */
   1271     /*
   1272      * Generic 64-bit binary operation.  Provide an "instr" line that
   1273      * specifies an instruction that performs "result = a0-a1 op a2-a3".
   1274      * This could be an MIPS instruction or a function call.
   1275      * If "chkzero" is set to 1, we perform a divide-by-zero check on
   1276      * vCC (a1).  Useful for integer division and modulus.
   1277      *
   1278      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
   1279      *      xor-long, add-double, sub-double, mul-double, div-double,
   1280      *      rem-double
   1281      *
   1282      * On entry:
   1283      *     a0 = target dalvik register address
   1284      *     a1 = op1 address
   1285      *     a2 = op2 address
   1286      *
   1287      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1288      */
   1289     move rOBJ, a0                       # save a0
   1290 #ifdef  SOFT_FLOAT
   1291     move t0, a1                         # save a1
   1292     move t1, a2                         # save a2
   1293     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
   1294     LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
   1295     .if 0
   1296     or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
   1297     beqz        t0, common_errDivideByZero
   1298     .endif
   1299                                # optional op
   1300     JAL(__muldf3)                              # result<- op, a0-a3 changed
   1301     STORE64(rRESULT0, rRESULT1, rOBJ)
   1302 #else
   1303     LOAD64_F(fa0, fa0f, a1)
   1304     LOAD64_F(fa1, fa1f, a2)
   1305     .if 0
   1306     li.d        ft0, 0
   1307     c.eq.d      fcc0, fa1, ft0
   1308     bc1t        fcc0, common_errDivideByZero
   1309     .endif
   1310                                # optional op
   1311     mul.d fv0, fa0, fa1
   1312     STORE64_F(fv0, fv0f, rOBJ)
   1313 #endif
   1314     RETURN
   1315 
   1316 
   1317 /* ------------------------------ */
   1318     .balign 4
   1319     .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
   1320 dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
   1321 /* File: mips/TEMPLATE_DIV_DOUBLE_VFP.S */
   1322 /* File: mips/fbinopWide.S */
   1323     /*
   1324      * Generic 64-bit binary operation.  Provide an "instr" line that
   1325      * specifies an instruction that performs "result = a0-a1 op a2-a3".
   1326      * This could be an MIPS instruction or a function call.
   1327      * If "chkzero" is set to 1, we perform a divide-by-zero check on
   1328      * vCC (a1).  Useful for integer division and modulus.
   1329      *
   1330      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
   1331      *      xor-long, add-double, sub-double, mul-double, div-double,
   1332      *      rem-double
   1333      *
   1334      * On entry:
   1335      *     a0 = target dalvik register address
   1336      *     a1 = op1 address
   1337      *     a2 = op2 address
   1338      *
   1339      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1340      */
   1341     move rOBJ, a0                       # save a0
   1342 #ifdef  SOFT_FLOAT
   1343     move t0, a1                         # save a1
   1344     move t1, a2                         # save a2
   1345     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
   1346     LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
   1347     .if 0
   1348     or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
   1349     beqz        t0, common_errDivideByZero
   1350     .endif
   1351                                # optional op
   1352     JAL(__divdf3)                              # result<- op, a0-a3 changed
   1353     STORE64(rRESULT0, rRESULT1, rOBJ)
   1354 #else
   1355     LOAD64_F(fa0, fa0f, a1)
   1356     LOAD64_F(fa1, fa1f, a2)
   1357     .if 0
   1358     li.d        ft0, 0
   1359     c.eq.d      fcc0, fa1, ft0
   1360     bc1t        fcc0, common_errDivideByZero
   1361     .endif
   1362                                # optional op
   1363     div.d fv0, fa0, fa1
   1364     STORE64_F(fv0, fv0f, rOBJ)
   1365 #endif
   1366     RETURN
   1367 
   1368 
   1369 /* ------------------------------ */
   1370     .balign 4
   1371     .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
   1372 dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
   1373 /* File: mips/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
   1374 /* File: mips/funopNarrower.S */
   1375     /*
   1376      * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
   1377      * that specifies an instruction that performs "result = op a0/a1", where
   1378      * "result" is a 32-bit quantity in a0.
   1379      *
   1380      * For: long-to-float, double-to-int, double-to-float
   1381      * If hard floating point support is available, use fa0 as the parameter, except for
   1382      * long-to-float opcode.
   1383      * (This would work for long-to-int, but that instruction is actually
   1384      * an exact match for OP_MOVE.)
   1385      *
   1386      * On entry:
   1387      *     a0 = target dalvik register address
   1388      *     a1 = src dalvik register address
   1389      *
   1390      */
   1391     move rINST, a0                      # save a0
   1392 #ifdef  SOFT_FLOAT
   1393     move t0, a1                         # save a1
   1394     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vB/vB+1
   1395                                # optional op
   1396     JAL(__truncdfsf2)                              # v0<- op, a0-a3 changed
   1397 .LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg:
   1398     STORE(v0, rINST)                    # vA<- v0
   1399 #else
   1400     LOAD64_F(fa0, fa0f, a1)
   1401                                # optional op
   1402     cvt.s.d  fv0,fa0                            # fv0 = result
   1403 .LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg_f:
   1404     STORE_F(fv0, rINST)                 # vA<- fv0
   1405 #endif
   1406     RETURN
   1407 
   1408 
   1409 /* ------------------------------ */
   1410     .balign 4
   1411     .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
   1412 dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
   1413 /* File: mips/TEMPLATE_DOUBLE_TO_INT_VFP.S */
   1414 /* File: mips/funopNarrower.S */
   1415     /*
   1416      * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
   1417      * that specifies an instruction that performs "result = op a0/a1", where
   1418      * "result" is a 32-bit quantity in a0.
   1419      *
   1420      * For: long-to-float, double-to-int, double-to-float
   1421      * If hard floating point support is available, use fa0 as the parameter, except for
   1422      * long-to-float opcode.
   1423      * (This would work for long-to-int, but that instruction is actually
   1424      * an exact match for OP_MOVE.)
   1425      *
   1426      * On entry:
   1427      *     a0 = target dalvik register address
   1428      *     a1 = src dalvik register address
   1429      *
   1430      */
   1431     move rINST, a0                      # save a0
   1432 #ifdef  SOFT_FLOAT
   1433     move t0, a1                         # save a1
   1434     LOAD64(rARG0, rARG1, t0)            # a0/a1<- vB/vB+1
   1435                                # optional op
   1436     b    d2i_doconv                              # v0<- op, a0-a3 changed
   1437 .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg:
   1438     STORE(v0, rINST)                    # vA<- v0
   1439 #else
   1440     LOAD64_F(fa0, fa0f, a1)
   1441                                # optional op
   1442     b    d2i_doconv                            # fv0 = result
   1443 .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f:
   1444     STORE_F(fv0, rINST)                 # vA<- fv0
   1445 #endif
   1446     RETURN
   1447 
   1448 
   1449 /*
   1450  * Convert the double in a0/a1 to an int in a0.
   1451  *
   1452  * We have to clip values to int min/max per the specification.  The
   1453  * expected common case is a "reasonable" value that converts directly
   1454  * to modest integer.  The EABI convert function isn't doing this for us.
   1455  * Use rBIX / rOBJ as global to hold arguments (they are not bound to a global var)
   1456  */
   1457 
   1458 d2i_doconv:
   1459 #ifdef SOFT_FLOAT
   1460     la          t0, .LDOUBLE_TO_INT_max
   1461     LOAD64(rARG2, rARG3, t0)
   1462     move        rBIX, rARG0                       # save a0
   1463     move        rOBJ, rARG1                       #  and a1
   1464     JAL(__gedf2)                               # is arg >= maxint?
   1465 
   1466     move        t0, v0
   1467     li          v0, ~0x80000000                # return maxint (7fffffff)
   1468     bgez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg       # nonzero == yes
   1469 
   1470     move        rARG0, rBIX                       # recover arg
   1471     move        rARG1, rOBJ
   1472     la          t0, .LDOUBLE_TO_INT_min
   1473     LOAD64(rARG2, rARG3, t0)
   1474     JAL(__ledf2)                               # is arg <= minint?
   1475 
   1476     move        t0, v0
   1477     li          v0, 0x80000000                 # return minint (80000000)
   1478     blez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg       # nonzero == yes
   1479 
   1480     move        rARG0, rBIX                  # recover arg
   1481     move        rARG1, rOBJ
   1482     move        rARG2, rBIX                  # compare against self
   1483     move        rARG3, rOBJ
   1484     JAL(__nedf2)                        # is arg == self?
   1485 
   1486     move        t0, v0                  # zero == no
   1487     li          v0, 0
   1488     bnez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg        # return zero for NaN
   1489 
   1490     move        rARG0, rBIX                  # recover arg
   1491     move        rARG1, rOBJ
   1492     JAL(__fixdfsi)                      # convert double to int
   1493     b           .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg
   1494 #else
   1495     la          t0, .LDOUBLE_TO_INT_max
   1496     LOAD64_F(fa1, fa1f, t0)
   1497     c.ole.d     fcc0, fa1, fa0
   1498     l.s         fv0, .LDOUBLE_TO_INT_maxret
   1499     bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
   1500 
   1501     la          t0, .LDOUBLE_TO_INT_min
   1502     LOAD64_F(fa1, fa1f, t0)
   1503     c.ole.d     fcc0, fa0, fa1
   1504     l.s         fv0, .LDOUBLE_TO_INT_minret
   1505     bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
   1506 
   1507     mov.d       fa1, fa0
   1508     c.un.d      fcc0, fa0, fa1
   1509     li.s        fv0, 0
   1510     bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
   1511 
   1512     trunc.w.d   fv0, fa0
   1513     b           .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
   1514 #endif
   1515 
   1516 
   1517 .LDOUBLE_TO_INT_max:
   1518     .dword   0x41dfffffffc00000
   1519 .LDOUBLE_TO_INT_min:
   1520     .dword   0xc1e0000000000000                  # minint, as a double (high word)
   1521 .LDOUBLE_TO_INT_maxret:
   1522     .word   0x7fffffff
   1523 .LDOUBLE_TO_INT_minret:
   1524     .word   0x80000000
   1525 
   1526 /* ------------------------------ */
   1527     .balign 4
   1528     .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
   1529 dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
   1530 /* File: mips/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
   1531 /* File: mips/funopWider.S */
   1532     /*
   1533      * Generic 32bit-to-64bit floating point unary operation.  Provide an
   1534      * "instr" line that specifies an instruction that performs "d0 = op s0".
   1535      *
   1536      * For: int-to-double, float-to-double
   1537      *
   1538      * On entry:
   1539      *     a0 = target dalvik register address
   1540      *     a1 = src dalvik register address
   1541      */
   1542     /* unop vA, vB */
   1543     move rOBJ, a0                       # save a0
   1544 #ifdef  SOFT_FLOAT
   1545     LOAD(a0, a1)                        # a0<- vB
   1546                                # optional op
   1547     JAL(__extendsfdf2)                              # result<- op, a0-a3 changed
   1548 
   1549 .LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
   1550     STORE64(rRESULT0, rRESULT1, rOBJ)   # vA/vA+1<- v0/v1
   1551 #else
   1552     LOAD_F(fa0, a1)                     # fa0<- vB
   1553                                # optional op
   1554     cvt.d.s fv0, fa0
   1555 
   1556 .LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
   1557     STORE64_F(fv0, fv0f, rOBJ)                          # vA/vA+1<- fv0/fv0f
   1558 #endif
   1559     RETURN
   1560 
   1561 
   1562 /* ------------------------------ */
   1563     .balign 4
   1564     .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
   1565 dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
   1566 /* File: mips/TEMPLATE_FLOAT_TO_INT_VFP.S */
   1567 /* File: mips/funop.S */
   1568     /*
   1569      * Generic 32-bit unary operation.  Provide an "instr" line that
   1570      * specifies an instruction that performs "result = op a0".
   1571      * This could be a MIPS instruction or a function call.
   1572      *
   1573      * for: int-to-float, float-to-int
   1574      *
   1575      * On entry:
   1576      *     a0 = target dalvik register address
   1577      *     a1 = src dalvik register address
   1578      *
   1579      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1580      *
   1581      */
   1582     move rOBJ, a0                       # save a0
   1583 #ifdef SOFT_FLOAT
   1584     LOAD(a0, a1)                        # a0<- vBB
   1585                                # optional op
   1586     b    f2i_doconv                              # v0<- op, a0-a3 changed
   1587 .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg:
   1588     STORE(v0, rOBJ)                     # vAA<- v0
   1589 #else
   1590     LOAD_F(fa0, a1)                     # fa0<- vBB
   1591                                # optional op
   1592     b        f2i_doconv                            # fv0 = result
   1593 .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f:
   1594     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1595 #endif
   1596     RETURN
   1597 
   1598 
   1599 /*
   1600  * Not an entry point as it is used only once !!
   1601  */
   1602 f2i_doconv:
   1603 #ifdef SOFT_FLOAT
   1604         li      a1, 0x4f000000  # (float)maxint
   1605         move    rBIX, a0
   1606         JAL(__gesf2)            # is arg >= maxint?
   1607         move    t0, v0
   1608         li      v0, ~0x80000000 # return maxint (7fffffff)
   1609         bgez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
   1610 
   1611         move    a0, rBIX                # recover arg
   1612         li      a1, 0xcf000000  # (float)minint
   1613         JAL(__lesf2)
   1614 
   1615         move    t0, v0
   1616         li      v0, 0x80000000  # return minint (80000000)
   1617         blez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
   1618         move    a0, rBIX
   1619         move    a1, rBIX
   1620         JAL(__nesf2)
   1621 
   1622         move    t0, v0
   1623         li      v0, 0           # return zero for NaN
   1624         bnez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
   1625 
   1626         move    a0, rBIX
   1627         JAL(__fixsfsi)
   1628         b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
   1629 #else
   1630         l.s             fa1, .LFLOAT_TO_INT_max
   1631         c.ole.s         fcc0, fa1, fa0
   1632         l.s             fv0, .LFLOAT_TO_INT_ret_max
   1633         bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
   1634 
   1635         l.s             fa1, .LFLOAT_TO_INT_min
   1636         c.ole.s         fcc0, fa0, fa1
   1637         l.s             fv0, .LFLOAT_TO_INT_ret_min
   1638         bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
   1639 
   1640         mov.s           fa1, fa0
   1641         c.un.s          fcc0, fa0, fa1
   1642         li.s            fv0, 0
   1643         bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
   1644 
   1645         trunc.w.s       fv0, fa0
   1646         b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
   1647 #endif
   1648 
   1649 .LFLOAT_TO_INT_max:
   1650         .word   0x4f000000
   1651 .LFLOAT_TO_INT_min:
   1652         .word   0xcf000000
   1653 .LFLOAT_TO_INT_ret_max:
   1654         .word   0x7fffffff
   1655 .LFLOAT_TO_INT_ret_min:
   1656         .word   0x80000000
   1657 
   1658 
   1659 /* ------------------------------ */
   1660     .balign 4
   1661     .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
   1662 dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
   1663 /* File: mips/TEMPLATE_INT_TO_DOUBLE_VFP.S */
   1664 /* File: mips/funopWider.S */
   1665     /*
   1666      * Generic 32bit-to-64bit floating point unary operation.  Provide an
   1667      * "instr" line that specifies an instruction that performs "d0 = op s0".
   1668      *
   1669      * For: int-to-double, float-to-double
   1670      *
   1671      * On entry:
   1672      *     a0 = target dalvik register address
   1673      *     a1 = src dalvik register address
   1674      */
   1675     /* unop vA, vB */
   1676     move rOBJ, a0                       # save a0
   1677 #ifdef  SOFT_FLOAT
   1678     LOAD(a0, a1)                        # a0<- vB
   1679                                # optional op
   1680     JAL(__floatsidf)                              # result<- op, a0-a3 changed
   1681 
   1682 .LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
   1683     STORE64(rRESULT0, rRESULT1, rOBJ)   # vA/vA+1<- v0/v1
   1684 #else
   1685     LOAD_F(fa0, a1)                     # fa0<- vB
   1686                                # optional op
   1687     cvt.d.w    fv0, fa0
   1688 
   1689 .LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
   1690     STORE64_F(fv0, fv0f, rOBJ)                          # vA/vA+1<- fv0/fv0f
   1691 #endif
   1692     RETURN
   1693 
   1694 
   1695 /* ------------------------------ */
   1696     .balign 4
   1697     .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
   1698 dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
   1699 /* File: mips/TEMPLATE_INT_TO_FLOAT_VFP.S */
   1700 /* File: mips/funop.S */
   1701     /*
   1702      * Generic 32-bit unary operation.  Provide an "instr" line that
   1703      * specifies an instruction that performs "result = op a0".
   1704      * This could be a MIPS instruction or a function call.
   1705      *
   1706      * for: int-to-float, float-to-int
   1707      *
   1708      * On entry:
   1709      *     a0 = target dalvik register address
   1710      *     a1 = src dalvik register address
   1711      *
   1712      * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
   1713      *
   1714      */
   1715     move rOBJ, a0                       # save a0
   1716 #ifdef SOFT_FLOAT
   1717     LOAD(a0, a1)                        # a0<- vBB
   1718                                # optional op
   1719     JAL(__floatsisf)                              # v0<- op, a0-a3 changed
   1720 .LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg:
   1721     STORE(v0, rOBJ)                     # vAA<- v0
   1722 #else
   1723     LOAD_F(fa0, a1)                     # fa0<- vBB
   1724                                # optional op
   1725     cvt.s.w fv0, fa0                            # fv0 = result
   1726 .LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg_f:
   1727     STORE_F(fv0, rOBJ)                  # vAA <- fv0
   1728 #endif
   1729     RETURN
   1730 
   1731 
   1732 /* ------------------------------ */
   1733     .balign 4
   1734     .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
   1735 dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
   1736 /* File: mips/TEMPLATE_CMPG_DOUBLE_VFP.S */
   1737 /* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
   1738     /*
   1739      * Compare two double precision floating-point values.  Puts 0, 1, or -1 into the
   1740      * destination register based on the results of the comparison.
   1741      *
   1742      * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
   1743      * on what value we'd like to return when one of the operands is NaN.
   1744      *
   1745      * The operation we're implementing is:
   1746      *   if (x == y)
   1747      *     return 0;
   1748      *   else if (x < y)
   1749      *     return -1;
   1750      *   else if (x > y)
   1751      *     return 1;
   1752      *   else
   1753      *     return {-1,1};  // one or both operands was NaN
   1754      *
   1755      * On entry:
   1756      *    a0 = &op1 [vBB]
   1757      *    a1 = &op2 [vCC]
   1758      *
   1759      * for: cmpl-double, cmpg-double
   1760      */
   1761     /* op vAA, vBB, vCC */
   1762 
   1763     /* "clasic" form */
   1764 #ifdef  SOFT_FLOAT
   1765     move rOBJ, a0                       # save a0
   1766     move rBIX, a1                       # save a1
   1767     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1768     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1769     JAL(__eqdf2)                        # v0<- (vBB == vCC)
   1770     li       rTEMP, 0                   # vAA<- 0
   1771     beqz     v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1772     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1773     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1774     JAL(__ltdf2)                        # a0<- (vBB < vCC)
   1775     li       rTEMP, -1                  # vAA<- -1
   1776     bltz     v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1777     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1778     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1779     JAL(__gtdf2)                        # v0<- (vBB > vCC)
   1780     li      rTEMP, 1                    # vAA<- 1
   1781     bgtz    v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1782 #else
   1783     LOAD64_F(fs0, fs0f, a0)             # fs0<- vBB
   1784     LOAD64_F(fs1, fs1f, a1)             # fs1<- vCC
   1785     c.olt.d     fcc0, fs0, fs1          # Is fs0 < fs1
   1786     li          rTEMP, -1
   1787     bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1788     c.olt.d     fcc0, fs1, fs0
   1789     li          rTEMP, 1
   1790     bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1791     c.eq.d      fcc0, fs0, fs1
   1792     li          rTEMP, 0
   1793     bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
   1794 #endif
   1795 
   1796     li            rTEMP, 1
   1797 
   1798 TEMPLATE_CMPG_DOUBLE_VFP_finish:
   1799     move     v0, rTEMP                  # v0<- vAA
   1800     RETURN
   1801 
   1802 
   1803 /* ------------------------------ */
   1804     .balign 4
   1805     .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
   1806 dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
   1807 /* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
   1808     /*
   1809      * Compare two double precision floating-point values.  Puts 0, 1, or -1 into the
   1810      * destination register based on the results of the comparison.
   1811      *
   1812      * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
   1813      * on what value we'd like to return when one of the operands is NaN.
   1814      *
   1815      * The operation we're implementing is:
   1816      *   if (x == y)
   1817      *     return 0;
   1818      *   else if (x < y)
   1819      *     return -1;
   1820      *   else if (x > y)
   1821      *     return 1;
   1822      *   else
   1823      *     return {-1,1};  // one or both operands was NaN
   1824      *
   1825      * On entry:
   1826      *    a0 = &op1 [vBB]
   1827      *    a1 = &op2 [vCC]
   1828      *
   1829      * for: cmpl-double, cmpg-double
   1830      */
   1831     /* op vAA, vBB, vCC */
   1832 
   1833     /* "clasic" form */
   1834 #ifdef  SOFT_FLOAT
   1835     move rOBJ, a0                       # save a0
   1836     move rBIX, a1                       # save a1
   1837     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1838     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1839     JAL(__eqdf2)                        # v0<- (vBB == vCC)
   1840     li       rTEMP, 0                   # vAA<- 0
   1841     beqz     v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1842     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1843     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1844     JAL(__ltdf2)                        # a0<- (vBB < vCC)
   1845     li       rTEMP, -1                  # vAA<- -1
   1846     bltz     v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1847     LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
   1848     LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
   1849     JAL(__gtdf2)                        # v0<- (vBB > vCC)
   1850     li      rTEMP, 1                    # vAA<- 1
   1851     bgtz    v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1852 #else
   1853     LOAD64_F(fs0, fs0f, a0)             # fs0<- vBB
   1854     LOAD64_F(fs1, fs1f, a1)             # fs1<- vCC
   1855     c.olt.d     fcc0, fs0, fs1          # Is fs0 < fs1
   1856     li          rTEMP, -1
   1857     bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1858     c.olt.d     fcc0, fs1, fs0
   1859     li          rTEMP, 1
   1860     bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1861     c.eq.d      fcc0, fs0, fs1
   1862     li          rTEMP, 0
   1863     bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
   1864 #endif
   1865 
   1866     li     rTEMP, -1
   1867 
   1868 TEMPLATE_CMPL_DOUBLE_VFP_finish:
   1869     move     v0, rTEMP                  # v0<- vAA
   1870     RETURN
   1871 
   1872 /* ------------------------------ */
   1873     .balign 4
   1874     .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
   1875 dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
   1876 /* File: mips/TEMPLATE_CMPG_FLOAT_VFP.S */
   1877 /* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
   1878     /*
   1879      * Compare two floating-point values.  Puts 0, 1, or -1 into the
   1880      * destination register based on the results of the comparison.
   1881      *
   1882      * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
   1883      * on what value we'd like to return when one of the operands is NaN.
   1884      *
   1885      * The operation we're implementing is:
   1886      *   if (x == y)
   1887      *     return 0;
   1888      *   else if (x < y)
   1889      *     return -1;
   1890      *   else if (x > y)
   1891      *     return 1;
   1892      *   else
   1893      *     return {-1,1};  // one or both operands was NaN
   1894      *
   1895      * On entry:
   1896      *    a0 = &op1 [vBB]
   1897      *    a1 = &op2 [vCC]
   1898      *
   1899      * for: cmpl-float, cmpg-float
   1900      */
   1901     /* op vAA, vBB, vCC */
   1902 
   1903     /* "clasic" form */
   1904 #ifdef  SOFT_FLOAT
   1905     LOAD(rOBJ, a0)                      # rOBJ<- vBB
   1906     LOAD(rBIX, a1)                      # rBIX<- vCC
   1907     move     a0, rOBJ                   # a0<- vBB
   1908     move     a1, rBIX                   # a1<- vCC
   1909     JAL(__eqsf2)                        # v0<- (vBB == vCC)
   1910     li       rTEMP, 0                   # vAA<- 0
   1911     beqz     v0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1912     move     a0, rOBJ                   # a0<- vBB
   1913     move     a1, rBIX                   # a1<- vCC
   1914     JAL(__ltsf2)                        # a0<- (vBB < vCC)
   1915     li       rTEMP, -1                  # vAA<- -1
   1916     bltz     v0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1917     move     a0, rOBJ                   # a0<- vBB
   1918     move     a1, rBIX                   # a1<- vCC
   1919     JAL(__gtsf2)                        # v0<- (vBB > vCC)
   1920     li      rTEMP, 1                    # vAA<- 1
   1921     bgtz    v0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1922 #else
   1923     LOAD_F(fs0, a0)                     # fs0<- vBB
   1924     LOAD_F(fs1, a1)                     # fs1<- vCC
   1925     c.olt.s     fcc0, fs0, fs1          #Is fs0 < fs1
   1926     li          rTEMP, -1
   1927     bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1928     c.olt.s     fcc0, fs1, fs0
   1929     li          rTEMP, 1
   1930     bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1931     c.eq.s      fcc0, fs0, fs1
   1932     li          rTEMP, 0
   1933     bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
   1934 #endif
   1935 
   1936     li     rTEMP, 1
   1937 
   1938 TEMPLATE_CMPG_FLOAT_VFP_finish:
   1939     move     v0, rTEMP                  # v0<- vAA
   1940     RETURN
   1941 
   1942 
   1943 /* ------------------------------ */
   1944     .balign 4
   1945     .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
   1946 dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
   1947 /* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
   1948     /*
   1949      * Compare two floating-point values.  Puts 0, 1, or -1 into the
   1950      * destination register based on the results of the comparison.
   1951      *
   1952      * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
   1953      * on what value we'd like to return when one of the operands is NaN.
   1954      *
   1955      * The operation we're implementing is:
   1956      *   if (x == y)
   1957      *     return 0;
   1958      *   else if (x < y)
   1959      *     return -1;
   1960      *   else if (x > y)
   1961      *     return 1;
   1962      *   else
   1963      *     return {-1,1};  // one or both operands was NaN
   1964      *
   1965      * On entry:
   1966      *    a0 = &op1 [vBB]
   1967      *    a1 = &op2 [vCC]
   1968      *
   1969      * for: cmpl-float, cmpg-float
   1970      */
   1971     /* op vAA, vBB, vCC */
   1972 
   1973     /* "clasic" form */
   1974 #ifdef  SOFT_FLOAT
   1975     LOAD(rOBJ, a0)                      # rOBJ<- vBB
   1976     LOAD(rBIX, a1)                      # rBIX<- vCC
   1977     move     a0, rOBJ                   # a0<- vBB
   1978     move     a1, rBIX                   # a1<- vCC
   1979     JAL(__eqsf2)                        # v0<- (vBB == vCC)
   1980     li       rTEMP, 0                   # vAA<- 0
   1981     beqz     v0, TEMPLATE_CMPL_FLOAT_VFP_finish
   1982     move     a0, rOBJ                   # a0<- vBB
   1983     move     a1, rBIX                   # a1<- vCC
   1984     JAL(__ltsf2)                        # a0<- (vBB < vCC)
   1985     li       rTEMP, -1                  # vAA<- -1
   1986     bltz     v0, TEMPLATE_CMPL_FLOAT_VFP_finish
   1987     move     a0, rOBJ                   # a0<- vBB
   1988     move     a1, rBIX                   # a1<- vCC
   1989     JAL(__gtsf2)                        # v0<- (vBB > vCC)
   1990     li      rTEMP, 1                    # vAA<- 1
   1991     bgtz    v0, TEMPLATE_CMPL_FLOAT_VFP_finish
   1992 #else
   1993     LOAD_F(fs0, a0)                     # fs0<- vBB
   1994     LOAD_F(fs1, a1)                     # fs1<- vCC
   1995     c.olt.s     fcc0, fs0, fs1          #Is fs0 < fs1
   1996     li          rTEMP, -1
   1997     bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
   1998     c.olt.s     fcc0, fs1, fs0
   1999     li          rTEMP, 1
   2000     bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
   2001     c.eq.s      fcc0, fs0, fs1
   2002     li          rTEMP, 0
   2003     bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
   2004 #endif
   2005 
   2006     li     rTEMP, -1
   2007 
   2008 TEMPLATE_CMPL_FLOAT_VFP_finish:
   2009     move     v0, rTEMP                  # v0<- vAA
   2010     RETURN
   2011 
   2012 /* ------------------------------ */
   2013     .balign 4
   2014     .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
   2015 dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
   2016 /* File: mips/TEMPLATE_SQRT_DOUBLE_VFP.S */
   2017 
   2018     /*
   2019      * 64-bit floating point sqrt operation.
   2020      * If the result is a NaN, bail out to library code to do
   2021      * the right thing.
   2022      *
   2023      * On entry:
   2024      *     a2 src addr of op1
   2025      * On exit:
   2026      *     v0,v1/fv0 = res
   2027      */
   2028 #ifdef  SOFT_FLOAT
   2029     LOAD64(rARG0, rARG1, a2)        # a0/a1<- vBB/vBB+1
   2030 #else
   2031     LOAD64_F(fa0, fa0f, a2)         # fa0/fa0f<- vBB/vBB+1
   2032     sqrt.d	fv0, fa0
   2033     c.eq.d	fv0, fv0
   2034     bc1t	1f
   2035 #endif
   2036     JAL(sqrt)
   2037 1:
   2038     RETURN
   2039 
   2040 /* ------------------------------ */
   2041     .balign 4
   2042     .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
   2043 dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
   2044 /* File: mips/TEMPLATE_THROW_EXCEPTION_COMMON.S */
   2045     /*
   2046      * Throw an exception from JIT'ed code.
   2047      * On entry:
   2048      *    a0    Dalvik PC that raises the exception
   2049      */
   2050     j      .LhandleException
   2051 
   2052 /* ------------------------------ */
   2053     .balign 4
   2054     .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
   2055 dvmCompiler_TEMPLATE_MEM_OP_DECODE:
   2056 /* File: mips/TEMPLATE_MEM_OP_DECODE.S */
   2057 #if defined(WITH_SELF_VERIFICATION)
   2058     /*
   2059      * This handler encapsulates heap memory ops for selfVerification mode.
   2060      *
   2061      * The call to the handler is inserted prior to a heap memory operation.
   2062      * This handler then calls a function to decode the memory op, and process
   2063      * it accordingly. Afterwards, the handler changes the return address to
   2064      * skip the memory op so it never gets executed.
   2065      */
   2066 #ifdef HARD_FLOAT
   2067     /* push f0-f31 onto stack */
   2068     sw      f0, fr0*-4(sp)              # push f0
   2069     sw      f1, fr1*-4(sp)              # push f1
   2070     sw      f2, fr2*-4(sp)              # push f2
   2071     sw      f3, fr3*-4(sp)              # push f3
   2072     sw      f4, fr4*-4(sp)              # push f4
   2073     sw      f5, fr5*-4(sp)              # push f5
   2074     sw      f6, fr6*-4(sp)              # push f6
   2075     sw      f7, fr7*-4(sp)              # push f7
   2076     sw      f8, fr8*-4(sp)              # push f8
   2077     sw      f9, fr9*-4(sp)              # push f9
   2078     sw      f10, fr10*-4(sp)            # push f10
   2079     sw      f11, fr11*-4(sp)            # push f11
   2080     sw      f12, fr12*-4(sp)            # push f12
   2081     sw      f13, fr13*-4(sp)            # push f13
   2082     sw      f14, fr14*-4(sp)            # push f14
   2083     sw      f15, fr15*-4(sp)            # push f15
   2084     sw      f16, fr16*-4(sp)            # push f16
   2085     sw      f17, fr17*-4(sp)            # push f17
   2086     sw      f18, fr18*-4(sp)            # push f18
   2087     sw      f19, fr19*-4(sp)            # push f19
   2088     sw      f20, fr20*-4(sp)            # push f20
   2089     sw      f21, fr21*-4(sp)            # push f21
   2090     sw      f22, fr22*-4(sp)            # push f22
   2091     sw      f23, fr23*-4(sp)            # push f23
   2092     sw      f24, fr24*-4(sp)            # push f24
   2093     sw      f25, fr25*-4(sp)            # push f25
   2094     sw      f26, fr26*-4(sp)            # push f26
   2095     sw      f27, fr27*-4(sp)            # push f27
   2096     sw      f28, fr28*-4(sp)            # push f28
   2097     sw      f29, fr29*-4(sp)            # push f29
   2098     sw      f30, fr30*-4(sp)            # push f30
   2099     sw      f31, fr31*-4(sp)            # push f31
   2100 
   2101     sub     sp, (32-0)*4                # adjust stack pointer
   2102 #endif
   2103 
   2104     /* push gp registers (except zero, gp, sp, and fp) */
   2105     .set noat
   2106     sw      AT, r_AT*-4(sp)             # push at
   2107     .set at
   2108     sw      v0, r_V0*-4(sp)             # push v0
   2109     sw      v1, r_V1*-4(sp)             # push v1
   2110     sw      a0, r_A0*-4(sp)             # push a0
   2111     sw      a1, r_A1*-4(sp)             # push a1
   2112     sw      a2, r_A2*-4(sp)             # push a2
   2113     sw      a3, r_A3*-4(sp)             # push a3
   2114     sw      t0, r_T0*-4(sp)             # push t0
   2115     sw      t1, r_T1*-4(sp)             # push t1
   2116     sw      t2, r_T2*-4(sp)             # push t2
   2117     sw      t3, r_T3*-4(sp)             # push t3
   2118     sw      t4, r_T4*-4(sp)             # push t4
   2119     sw      t5, r_T5*-4(sp)             # push t5
   2120     sw      t6, r_T6*-4(sp)             # push t6
   2121     sw      t7, r_T7*-4(sp)             # push t7
   2122     sw      s0, r_S0*-4(sp)             # push s0
   2123     sw      s1, r_S1*-4(sp)             # push s1
   2124     sw      s2, r_S2*-4(sp)             # push s2
   2125     sw      s3, r_S3*-4(sp)             # push s3
   2126     sw      s4, r_S4*-4(sp)             # push s4
   2127     sw      s5, r_S5*-4(sp)             # push s5
   2128     sw      s6, r_S6*-4(sp)             # push s6
   2129     sw      s7, r_S7*-4(sp)             # push s7
   2130     sw      t8, r_T8*-4(sp)             # push t8
   2131     sw      t9, r_T9*-4(sp)             # push t9
   2132     sw      k0, r_K0*-4(sp)             # push k0
   2133     sw      k1, r_K1*-4(sp)             # push k1
   2134     sw      ra, r_RA*-4(sp)             # push RA
   2135 
   2136     # Note: even if we don't save all 32 registers, we still need to
   2137     #       adjust SP by 32 registers due to the way we are storing
   2138     #       the registers on the stack.
   2139     sub     sp, (32-0)*4                # adjust stack pointer
   2140 
   2141     la     a2, .LdvmSelfVerificationMemOpDecode  # defined in footer.S
   2142     lw     a2, (a2)
   2143     move   a0, ra                       # a0<- link register
   2144     move   a1, sp                       # a1<- stack pointer
   2145     JALR(a2)
   2146 
   2147     /* pop gp registers (except zero, gp, sp, and fp) */
   2148     # Note: even if we don't save all 32 registers, we still need to
   2149     #       adjust SP by 32 registers due to the way we are storing
   2150     #       the registers on the stack.
   2151     add     sp, (32-0)*4                # adjust stack pointer
   2152     .set noat
   2153     lw      AT, r_AT*-4(sp)             # pop at
   2154     .set at
   2155     lw      v0, r_V0*-4(sp)             # pop v0
   2156     lw      v1, r_V1*-4(sp)             # pop v1
   2157     lw      a0, r_A0*-4(sp)             # pop a0
   2158     lw      a1, r_A1*-4(sp)             # pop a1
   2159     lw      a2, r_A2*-4(sp)             # pop a2
   2160     lw      a3, r_A3*-4(sp)             # pop a3
   2161     lw      t0, r_T0*-4(sp)             # pop t0
   2162     lw      t1, r_T1*-4(sp)             # pop t1
   2163     lw      t2, r_T2*-4(sp)             # pop t2
   2164     lw      t3, r_T3*-4(sp)             # pop t3
   2165     lw      t4, r_T4*-4(sp)             # pop t4
   2166     lw      t5, r_T5*-4(sp)             # pop t5
   2167     lw      t6, r_T6*-4(sp)             # pop t6
   2168     lw      t7, r_T7*-4(sp)             # pop t7
   2169     lw      s0, r_S0*-4(sp)             # pop s0
   2170     lw      s1, r_S1*-4(sp)             # pop s1
   2171     lw      s2, r_S2*-4(sp)             # pop s2
   2172     lw      s3, r_S3*-4(sp)             # pop s3
   2173     lw      s4, r_S4*-4(sp)             # pop s4
   2174     lw      s5, r_S5*-4(sp)             # pop s5
   2175     lw      s6, r_S6*-4(sp)             # pop s6
   2176     lw      s7, r_S7*-4(sp)             # pop s7
   2177     lw      t8, r_T8*-4(sp)             # pop t8
   2178     lw      t9, r_T9*-4(sp)             # pop t9
   2179     lw      k0, r_K0*-4(sp)             # pop k0
   2180     lw      k1, r_K1*-4(sp)             # pop k1
   2181     lw      ra, r_RA*-4(sp)             # pop RA
   2182 
   2183 #ifdef HARD_FLOAT
   2184     /* pop f0-f31 from stack */
   2185     add     sp, (32-0)*4                # adjust stack pointer
   2186     lw      f0, fr0*-4(sp)              # pop f0
   2187     lw      f1, fr1*-4(sp)              # pop f1
   2188     lw      f2, fr2*-4(sp)              # pop f2
   2189     lw      f3, fr3*-4(sp)              # pop f3
   2190     lw      f4, fr4*-4(sp)              # pop f4
   2191     lw      f5, fr5*-4(sp)              # pop f5
   2192     lw      f6, fr6*-4(sp)              # pop f6
   2193     lw      f7, fr7*-4(sp)              # pop f7
   2194     lw      f8, fr8*-4(sp)              # pop f8
   2195     lw      f9, fr9*-4(sp)              # pop f9
   2196     lw      f10, fr10*-4(sp)            # pop f10
   2197     lw      f11, fr11*-4(sp)            # pop f11
   2198     lw      f12, fr12*-4(sp)            # pop f12
   2199     lw      f13, fr13*-4(sp)            # pop f13
   2200     lw      f14, fr14*-4(sp)            # pop f14
   2201     lw      f15, fr15*-4(sp)            # pop f15
   2202     lw      f16, fr16*-4(sp)            # pop f16
   2203     lw      f17, fr17*-4(sp)            # pop f17
   2204     lw      f18, fr18*-4(sp)            # pop f18
   2205     lw      f19, fr19*-4(sp)            # pop f19
   2206     lw      f20, fr20*-4(sp)            # pop f20
   2207     lw      f21, fr21*-4(sp)            # pop f21
   2208     lw      f22, fr22*-4(sp)            # pop f22
   2209     lw      f23, fr23*-4(sp)            # pop f23
   2210     lw      f24, fr24*-4(sp)            # pop f24
   2211     lw      f25, fr25*-4(sp)            # pop f25
   2212     lw      f26, fr26*-4(sp)            # pop f26
   2213     lw      f27, fr27*-4(sp)            # pop f27
   2214     lw      f28, fr28*-4(sp)            # pop f28
   2215     lw      f29, fr29*-4(sp)            # pop f29
   2216     lw      f30, fr30*-4(sp)            # pop f30
   2217     lw      f31, fr31*-4(sp)            # pop f31
   2218 #endif
   2219 
   2220     RETURN
   2221 #endif
   2222 
   2223 /* ------------------------------ */
   2224     .balign 4
   2225     .global dvmCompiler_TEMPLATE_STRING_COMPARETO
   2226 dvmCompiler_TEMPLATE_STRING_COMPARETO:
   2227 /* File: mips/TEMPLATE_STRING_COMPARETO.S */
   2228     /*
   2229      * String's compareTo.
   2230      *
   2231      * Requires a0/a1 to have been previously checked for null.  Will
   2232      * return negative if this's string is < comp, 0 if they are the
   2233      * same and positive if >.
   2234      *
   2235      * IMPORTANT NOTE:
   2236      *
   2237      * This code relies on hard-coded offsets for string objects, and must be
   2238      * kept in sync with definitions in UtfString.h.  See asm-constants.h
   2239      *
   2240      * On entry:
   2241      *    a0:   this object pointer
   2242      *    a1:   comp object pointer
   2243      *
   2244      */
   2245 
   2246      subu  v0, a0, a1                # Same?
   2247      bnez  v0, 1f
   2248      RETURN
   2249 1:
   2250      lw    t0, STRING_FIELDOFF_OFFSET(a0)
   2251      lw    t1, STRING_FIELDOFF_OFFSET(a1)
   2252      lw    t2, STRING_FIELDOFF_COUNT(a0)
   2253      lw    a2, STRING_FIELDOFF_COUNT(a1)
   2254      lw    a0, STRING_FIELDOFF_VALUE(a0)
   2255      lw    a1, STRING_FIELDOFF_VALUE(a1)
   2256 
   2257     /*
   2258      * At this point, we have this/comp:
   2259      *    offset: t0/t1
   2260      *    count:  t2/a2
   2261      *    value:  a0/a1
   2262      * We're going to compute
   2263      *    a3 <- countDiff
   2264      *    a2 <- minCount
   2265      */
   2266      subu  a3, t2, a2                # a3<- countDiff
   2267      sleu  t7, t2, a2
   2268      movn  a2, t2, t7                # a2<- minCount
   2269 
   2270      /*
   2271       * Note: data pointers point to first element.
   2272       */
   2273      addu  a0, 16                    # point to contents[0]
   2274      addu  a1, 16                    # point to contents[0]
   2275 
   2276      /* Now, build pointers to the string data */
   2277      sll   t7, t0, 1                 # multiply offset by 2
   2278      addu  a0, a0, t7
   2279      sll   t7, t1, 1                 # multiply offset by 2
   2280      addu  a1, a1, t7
   2281 
   2282      /*
   2283       * At this point we have:
   2284       *   a0: *this string data
   2285       *   a1: *comp string data
   2286       *   a2: iteration count for comparison
   2287       *   a3: value to return if the first part of the string is equal
   2288       *   v0: reserved for result
   2289       *   t0-t5 available for loading string data
   2290       */
   2291 
   2292      subu  a2, 2
   2293      bltz  a2, do_remainder2
   2294 
   2295      /*
   2296       * Unroll the first two checks so we can quickly catch early mismatch
   2297       * on long strings (but preserve incoming alignment)
   2298       */
   2299      lhu   t0, 0(a0)
   2300      lhu   t1, 0(a1)
   2301      subu  v0, t0, t1
   2302      beqz  v0, 1f
   2303      RETURN
   2304 1:
   2305      lhu   t2, 2(a0)
   2306      lhu   t3, 2(a1)
   2307      subu  v0, t2, t3
   2308      beqz  v0, 2f
   2309      RETURN
   2310 2:
   2311      addu  a0, 4                     # offset to contents[2]
   2312      addu  a1, 4                     # offset to contents[2]
   2313      li    t7, 28
   2314      bgt   a2, t7, do_memcmp16
   2315      subu  a2, 3
   2316      bltz  a2, do_remainder
   2317 
   2318 loopback_triple:
   2319      lhu   t0, 0(a0)
   2320      lhu   t1, 0(a1)
   2321      subu  v0, t0, t1
   2322      beqz  v0, 1f
   2323      RETURN
   2324 1:
   2325      lhu   t2, 2(a0)
   2326      lhu   t3, 2(a1)
   2327      subu  v0, t2, t3
   2328      beqz  v0, 2f
   2329      RETURN
   2330 2:
   2331      lhu   t4, 4(a0)
   2332      lhu   t5, 4(a1)
   2333      subu  v0, t4, t5
   2334      beqz  v0, 3f
   2335      RETURN
   2336 3:
   2337      addu  a0, 6                     # offset to contents[i+3]
   2338      addu  a1, 6                     # offset to contents[i+3]
   2339      subu  a2, 3
   2340      bgez  a2, loopback_triple
   2341 
   2342 do_remainder:
   2343      addu  a2, 3
   2344      beqz  a2, returnDiff
   2345 
   2346 loopback_single:
   2347      lhu   t0, 0(a0)
   2348      lhu   t1, 0(a1)
   2349      subu  v0, t0, t1
   2350      bnez  v0, 1f
   2351      addu  a0, 2                     # offset to contents[i+1]
   2352      addu  a1, 2                     # offset to contents[i+1]
   2353      subu  a2, 1
   2354      bnez  a2, loopback_single
   2355 
   2356 returnDiff:
   2357      move  v0, a3
   2358 1:
   2359      RETURN
   2360 
   2361 do_remainder2:
   2362      addu  a2, 2
   2363      bnez  a2, loopback_single
   2364      move  v0, a3
   2365      RETURN
   2366 
   2367     /* Long string case */
   2368 do_memcmp16:
   2369      move  rOBJ, a3                  # save return value if strings are equal
   2370      JAL(__memcmp16)
   2371      seq   t0, v0, zero
   2372      movn  v0, rOBJ, t0              # overwrite return value if strings are equal
   2373      RETURN
   2374 
   2375 /* ------------------------------ */
   2376     .balign 4
   2377     .global dvmCompiler_TEMPLATE_STRING_INDEXOF
   2378 dvmCompiler_TEMPLATE_STRING_INDEXOF:
   2379 /* File: mips/TEMPLATE_STRING_INDEXOF.S */
   2380     /*
   2381      * String's indexOf.
   2382      *
   2383      * Requires a0 to have been previously checked for null.  Will
   2384      * return index of match of a1 in v0.
   2385      *
   2386      * IMPORTANT NOTE:
   2387      *
   2388      * This code relies on hard-coded offsets for string objects, and must be
   2389      * kept in sync wth definitions in UtfString.h  See asm-constants.h
   2390      *
   2391      * On entry:
   2392      *    a0:   string object pointer
   2393      *    a1:   char to match
   2394      *    a2:   Starting offset in string data
   2395      */
   2396 
   2397      lw    t0, STRING_FIELDOFF_OFFSET(a0)
   2398      lw    t1, STRING_FIELDOFF_COUNT(a0)
   2399      lw    v0, STRING_FIELDOFF_VALUE(a0)
   2400 
   2401     /*
   2402      * At this point, we have:
   2403      *    v0: object pointer
   2404      *    a1: char to match
   2405      *    a2: starting offset
   2406      *    t0: offset
   2407      *    t1: string length
   2408      */
   2409 
   2410     /* Point to first element */
   2411      addu  v0, 16                    # point to contents[0]
   2412 
   2413     /* Build pointer to start of string data */
   2414      sll   t7, t0, 1                 # multiply offset by 2
   2415      addu  v0, v0, t7
   2416 
   2417     /* Save a copy of starting data in v1 */
   2418      move  v1, v0
   2419 
   2420     /* Clamp start to [0..count] */
   2421      slt   t7, a2, zero
   2422      movn  a2, zero, t7
   2423      sgt   t7, a2, t1
   2424      movn  a2, t1, t7
   2425 
   2426     /* Build pointer to start of data to compare */
   2427      sll   t7, a2, 1                # multiply offset by 2
   2428      addu  v0, v0, t7
   2429 
   2430     /* Compute iteration count */
   2431      subu  a3, t1, a2
   2432 
   2433     /*
   2434      * At this point we have:
   2435      *   v0: start of data to test
   2436      *   a1: char to compare
   2437      *   a3: iteration count
   2438      *   v1: original start of string
   2439      *   t0-t7 available for loading string data
   2440      */
   2441      subu  a3, 4
   2442      bltz  a3, indexof_remainder
   2443 
   2444 indexof_loop4:
   2445      lhu   t0, 0(v0)
   2446      beq   t0, a1, match_0
   2447      lhu   t0, 2(v0)
   2448      beq   t0, a1, match_1
   2449      lhu   t0, 4(v0)
   2450      beq   t0, a1, match_2
   2451      lhu   t0, 6(v0)
   2452      beq   t0, a1, match_3
   2453      addu  v0, 8                     # offset to contents[i+4]
   2454      subu  a3, 4
   2455      bgez  a3, indexof_loop4
   2456 
   2457 indexof_remainder:
   2458      addu  a3, 4
   2459      beqz  a3, indexof_nomatch
   2460 
   2461 indexof_loop1:
   2462      lhu   t0, 0(v0)
   2463      beq   t0, a1, match_0
   2464      addu  v0, 2                     # offset to contents[i+1]
   2465      subu  a3, 1
   2466      bnez  a3, indexof_loop1
   2467 
   2468 indexof_nomatch:
   2469      li    v0, -1
   2470      RETURN
   2471 
   2472 match_0:
   2473      subu  v0, v1
   2474      sra   v0, v0, 1                 # divide by 2
   2475      RETURN
   2476 match_1:
   2477      addu  v0, 2
   2478      subu  v0, v1
   2479      sra   v0, v0, 1                 # divide by 2
   2480      RETURN
   2481 match_2:
   2482      addu  v0, 4
   2483      subu  v0, v1
   2484      sra   v0, v0, 1                 # divide by 2
   2485      RETURN
   2486 match_3:
   2487      addu  v0, 6
   2488      subu  v0, v1
   2489      sra   v0, v0, 1                 # divide by 2
   2490      RETURN
   2491 
   2492 /* ------------------------------ */
   2493     .balign 4
   2494     .global dvmCompiler_TEMPLATE_INTERPRET
   2495 dvmCompiler_TEMPLATE_INTERPRET:
   2496 /* File: mips/TEMPLATE_INTERPRET.S */
   2497     /*
   2498      * This handler transfers control to the interpeter without performing
   2499      * any lookups.  It may be called either as part of a normal chaining
   2500      * operation, or from the transition code in header.S.  We distinquish
   2501      * the two cases by looking at the link register.  If called from a
   2502      * translation chain, it will point to the chaining Dalvik PC.
   2503      * On entry:
   2504      *    ra - if NULL:
   2505      *        a1 - the Dalvik PC to begin interpretation.
   2506      *    else
   2507      *        [ra] contains Dalvik PC to begin interpretation
   2508      *    rSELF - pointer to thread
   2509      *    rFP - Dalvik frame pointer
   2510      */
   2511     la      t0, dvmJitToInterpPunt
   2512     move    a0, a1
   2513     beq     ra, zero, 1f
   2514     lw      a0, 0(ra)
   2515 1:
   2516     jr      t0
   2517     # doesn't return
   2518 
   2519 /* ------------------------------ */
   2520     .balign 4
   2521     .global dvmCompiler_TEMPLATE_MONITOR_ENTER
   2522 dvmCompiler_TEMPLATE_MONITOR_ENTER:
   2523 /* File: mips/TEMPLATE_MONITOR_ENTER.S */
   2524     /*
   2525      * Call out to the runtime to lock an object.  Because this thread
   2526      * may have been suspended in THREAD_MONITOR state and the Jit's
   2527      * translation cache subsequently cleared, we cannot return directly.
   2528      * Instead, unconditionally transition to the interpreter to resume.
   2529      *
   2530      * On entry:
   2531      *    a0 - self pointer
   2532      *    a1 - the object (which has already been null-checked by the caller
   2533      *    rPC - the Dalvik PC of the following instruction.
   2534      */
   2535     la     a2, .LdvmLockObject
   2536     lw     t9, (a2)
   2537     sw     zero, offThread_inJitCodeCache(a0)   # record that we're not returning
   2538     JALR(t9)                                    # dvmLockObject(self, obj)
   2539     lw     gp, STACK_OFFSET_GP(sp)
   2540 
   2541     la     a2, .LdvmJitToInterpNoChain
   2542     lw     a2, (a2)
   2543 
   2544     # Bail to interpreter - no chain [note - rPC still contains dPC]
   2545 #if defined(WITH_JIT_TUNING)
   2546     li      a0, kHeavyweightMonitor
   2547 #endif
   2548     jr      a2
   2549 
   2550 /* ------------------------------ */
   2551     .balign 4
   2552     .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
   2553 dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
   2554 /* File: mips/TEMPLATE_MONITOR_ENTER_DEBUG.S */
   2555     /*
   2556      * To support deadlock prediction, this version of MONITOR_ENTER
   2557      * will always call the heavyweight dvmLockObject, check for an
   2558      * exception and then bail out to the interpreter.
   2559      *
   2560      * On entry:
   2561      *    a0 - self pointer
   2562      *    a1 - the object (which has already been null-checked by the caller
   2563      *    rPC - the Dalvik PC of the following instruction.
   2564      *
   2565      */
   2566     la     a2, .LdvmLockObject
   2567     lw     t9, (a2)
   2568     sw     zero, offThread_inJitCodeCache(a0)   # record that we're not returning
   2569     JALR(t9)                                    # dvmLockObject(self, obj)
   2570     lw     gp, STACK_OFFSET_GP(sp)
   2571 
   2572     # test for exception
   2573     lw     a1, offThread_exception(rSELF)
   2574     beqz   a1, 1f
   2575     sub    a0, rPC, 2                           # roll dPC back to this monitor instruction
   2576     j      .LhandleException
   2577 1:
   2578     # Bail to interpreter - no chain [note - rPC still contains dPC]
   2579 #if defined(WITH_JIT_TUNING)
   2580     li     a0, kHeavyweightMonitor
   2581 #endif
   2582     la     a2, .LdvmJitToInterpNoChain
   2583     lw     a2, (a2)
   2584     jr     a2
   2585 
   2586 /* ------------------------------ */
   2587     .balign 4
   2588     .global dvmCompiler_TEMPLATE_RESTORE_STATE
   2589 dvmCompiler_TEMPLATE_RESTORE_STATE:
   2590 /* File: mips/TEMPLATE_RESTORE_STATE.S */
   2591     /*
   2592      * This handler restores state following a selfVerification memory access.
   2593      * On entry:
   2594      *    a0 - offset from rSELF to the 1st element of the coreRegs save array.
   2595      * Note: the following registers are not restored
   2596      *       zero, AT, gp, sp, fp, ra
   2597      */
   2598 
   2599     add     a0, a0, rSELF               # pointer to heapArgSpace.coreRegs[0]
   2600 #if 0
   2601     lw      zero, r_ZERO*4(a0)          # restore zero
   2602 #endif
   2603     .set noat
   2604     lw      AT, r_AT*4(a0)              # restore at
   2605     .set at
   2606     lw      v0, r_V0*4(a0)              # restore v0
   2607     lw      v1, r_V1*4(a0)              # restore v1
   2608 
   2609     lw      a1, r_A1*4(a0)              # restore a1
   2610     lw      a2, r_A2*4(a0)              # restore a2
   2611     lw      a3, r_A3*4(a0)              # restore a3
   2612 
   2613     lw      t0, r_T0*4(a0)              # restore t0
   2614     lw      t1, r_T1*4(a0)              # restore t1
   2615     lw      t2, r_T2*4(a0)              # restore t2
   2616     lw      t3, r_T3*4(a0)              # restore t3
   2617     lw      t4, r_T4*4(a0)              # restore t4
   2618     lw      t5, r_T5*4(a0)              # restore t5
   2619     lw      t6, r_T6*4(a0)              # restore t6
   2620     lw      t7, r_T7*4(a0)              # restore t7
   2621 
   2622     lw      s0, r_S0*4(a0)              # restore s0
   2623     lw      s1, r_S1*4(a0)              # restore s1
   2624     lw      s2, r_S2*4(a0)              # restore s2
   2625     lw      s3, r_S3*4(a0)              # restore s3
   2626     lw      s4, r_S4*4(a0)              # restore s4
   2627     lw      s5, r_S5*4(a0)              # restore s5
   2628     lw      s6, r_S6*4(a0)              # restore s6
   2629     lw      s7, r_S7*4(a0)              # restore s7
   2630 
   2631     lw      t8, r_T8*4(a0)              # restore t8
   2632     lw      t9, r_T9*4(a0)              # restore t9
   2633 
   2634     lw      k0, r_K0*4(a0)              # restore k0
   2635     lw      k1, r_K1*4(a0)              # restore k1
   2636 
   2637 #if 0
   2638     lw      gp, r_GP*4(a0)              # restore gp
   2639     lw      sp, r_SP*4(a0)              # restore sp
   2640     lw      fp, r_FP*4(a0)              # restore fp
   2641     lw      ra, r_RA*4(a0)              # restore ra
   2642 #endif
   2643 
   2644 /* #ifdef HARD_FLOAT */
   2645 #if 0
   2646     lw      f0, fr0*4(a0)               # restore f0
   2647     lw      f1, fr1*4(a0)               # restore f1
   2648     lw      f2, fr2*4(a0)               # restore f2
   2649     lw      f3, fr3*4(a0)               # restore f3
   2650     lw      f4, fr4*4(a0)               # restore f4
   2651     lw      f5, fr5*4(a0)               # restore f5
   2652     lw      f6, fr6*4(a0)               # restore f6
   2653     lw      f7, fr7*4(a0)               # restore f7
   2654     lw      f8, fr8*4(a0)               # restore f8
   2655     lw      f9, fr9*4(a0)               # restore f9
   2656     lw      f10, fr10*4(a0)             # restore f10
   2657     lw      f11, fr11*4(a0)             # restore f11
   2658     lw      f12, fr12*4(a0)             # restore f12
   2659     lw      f13, fr13*4(a0)             # restore f13
   2660     lw      f14, fr14*4(a0)             # restore f14
   2661     lw      f15, fr15*4(a0)             # restore f15
   2662     lw      f16, fr16*4(a0)             # restore f16
   2663     lw      f17, fr17*4(a0)             # restore f17
   2664     lw      f18, fr18*4(a0)             # restore f18
   2665     lw      f19, fr19*4(a0)             # restore f19
   2666     lw      f20, fr20*4(a0)             # restore f20
   2667     lw      f21, fr21*4(a0)             # restore f21
   2668     lw      f22, fr22*4(a0)             # restore f22
   2669     lw      f23, fr23*4(a0)             # restore f23
   2670     lw      f24, fr24*4(a0)             # restore f24
   2671     lw      f25, fr25*4(a0)             # restore f25
   2672     lw      f26, fr26*4(a0)             # restore f26
   2673     lw      f27, fr27*4(a0)             # restore f27
   2674     lw      f28, fr28*4(a0)             # restore f28
   2675     lw      f29, fr29*4(a0)             # restore f29
   2676     lw      f30, fr30*4(a0)             # restore f30
   2677     lw      f31, fr31*4(a0)             # restore f31
   2678 #endif
   2679 
   2680     lw      a0, r_A1*4(a0)              # restore a0
   2681     RETURN
   2682 
   2683 /* ------------------------------ */
   2684     .balign 4
   2685     .global dvmCompiler_TEMPLATE_SAVE_STATE
   2686 dvmCompiler_TEMPLATE_SAVE_STATE:
   2687 /* File: mips/TEMPLATE_SAVE_STATE.S */
   2688     /*
   2689      * This handler performs a register save for selfVerification mode.
   2690      * On entry:
   2691      *    Top of stack + 4: a1 value to save
   2692      *    Top of stack + 0: a0 value to save
   2693      *    a0 - offset from rSELF to the beginning of the heapArgSpace record
   2694      *    a1 - the value of regMap
   2695      *
   2696      * The handler must save regMap, r0-r31, f0-f31 if FPU, and then return with
   2697      * r0-r31 with their original values (note that this means a0 and a1 must take
   2698      * the values on the stack - not the ones in those registers on entry.
   2699      * Finally, the two registers previously pushed must be popped.
   2700      * Note: the following registers are not saved
   2701      *       zero, AT, gp, sp, fp, ra
   2702      */
   2703     add     a0, a0, rSELF               # pointer to heapArgSpace
   2704     sw      a1, 0(a0)                   # save regMap
   2705     add     a0, a0, 4                   # pointer to coreRegs
   2706 #if 0
   2707     sw      zero, r_ZERO*4(a0)          # save zero
   2708 #endif
   2709     .set noat
   2710     sw      AT, r_AT*4(a0)              # save at
   2711     .set at
   2712     sw      v0, r_V0*4(a0)              # save v0
   2713     sw      v1, r_V1*4(a0)              # save v1
   2714 
   2715     lw      a1, 0(sp)                   # recover a0 value
   2716     sw      a1, r_A0*4(a0)              # save a0
   2717     lw      a1, 4(sp)                   # recover a1 value
   2718     sw      a1, r_A1*4(a0)              # save a1
   2719     sw      a2, r_A2*4(a0)              # save a2
   2720     sw      a3, r_A3*4(a0)              # save a3
   2721 
   2722     sw      t0, r_T0*4(a0)              # save t0
   2723     sw      t1, r_T1*4(a0)              # save t1
   2724     sw      t2, r_T2*4(a0)              # save t2
   2725     sw      t3, r_T3*4(a0)              # save t3
   2726     sw      t4, r_T4*4(a0)              # save t4
   2727     sw      t5, r_T5*4(a0)              # save t5
   2728     sw      t6, r_T6*4(a0)              # save t6
   2729     sw      t7, r_T7*4(a0)              # save t7
   2730 
   2731     sw      s0, r_S0*4(a0)              # save s0
   2732     sw      s1, r_S1*4(a0)              # save s1
   2733     sw      s2, r_S2*4(a0)              # save s2
   2734     sw      s3, r_S3*4(a0)              # save s3
   2735     sw      s4, r_S4*4(a0)              # save s4
   2736     sw      s5, r_S5*4(a0)              # save s5
   2737     sw      s6, r_S6*4(a0)              # save s6
   2738     sw      s7, r_S7*4(a0)              # save s7
   2739 
   2740     sw      t8, r_T8*4(a0)              # save t8
   2741     sw      t9, r_T9*4(a0)              # save t9
   2742 
   2743     sw      k0, r_K0*4(a0)              # save k0
   2744     sw      k1, r_K1*4(a0)              # save k1
   2745 
   2746 #if 0
   2747     sw      gp, r_GP*4(a0)              # save gp
   2748     sw      sp, r_SP*4(a0)              # save sp (need to adjust??? )
   2749     sw      fp, r_FP*4(a0)              # save fp
   2750     sw      ra, r_RA*4(a0)              # save ra
   2751 #endif
   2752 
   2753 /* #ifdef HARD_FLOAT */
   2754 #if 0
   2755     sw      f0, fr0*4(a0)               # save f0
   2756     sw      f1, fr1*4(a0)               # save f1
   2757     sw      f2, fr2*4(a0)               # save f2
   2758     sw      f3, fr3*4(a0)               # save f3
   2759     sw      f4, fr4*4(a0)               # save f4
   2760     sw      f5, fr5*4(a0)               # save f5
   2761     sw      f6, fr6*4(a0)               # save f6
   2762     sw      f7, fr7*4(a0)               # save f7
   2763     sw      f8, fr8*4(a0)               # save f8
   2764     sw      f9, fr9*4(a0)               # save f9
   2765     sw      f10, fr10*4(a0)             # save f10
   2766     sw      f11, fr11*4(a0)             # save f11
   2767     sw      f12, fr12*4(a0)             # save f12
   2768     sw      f13, fr13*4(a0)             # save f13
   2769     sw      f14, fr14*4(a0)             # save f14
   2770     sw      f15, fr15*4(a0)             # save f15
   2771     sw      f16, fr16*4(a0)             # save f16
   2772     sw      f17, fr17*4(a0)             # save f17
   2773     sw      f18, fr18*4(a0)             # save f18
   2774     sw      f19, fr19*4(a0)             # save f19
   2775     sw      f20, fr20*4(a0)             # save f20
   2776     sw      f21, fr21*4(a0)             # save f21
   2777     sw      f22, fr22*4(a0)             # save f22
   2778     sw      f23, fr23*4(a0)             # save f23
   2779     sw      f24, fr24*4(a0)             # save f24
   2780     sw      f25, fr25*4(a0)             # save f25
   2781     sw      f26, fr26*4(a0)             # save f26
   2782     sw      f27, fr27*4(a0)             # save f27
   2783     sw      f28, fr28*4(a0)             # save f28
   2784     sw      f29, fr29*4(a0)             # save f29
   2785     sw      f30, fr30*4(a0)             # save f30
   2786     sw      f31, fr31*4(a0)             # save f31
   2787 #endif
   2788 
   2789     lw      a1, 0(sp)                   # recover a0 value
   2790     lw      a1, 4(sp)                   # recover a1 value
   2791     sub     sp, sp, 8                   # adjust stack ptr
   2792     RETURN
   2793 
   2794 /* ------------------------------ */
   2795     .balign 4
   2796     .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
   2797 dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
   2798 /* File: mips/TEMPLATE_PERIODIC_PROFILING.S */
   2799     /*
   2800      * Increment profile counter for this trace, and decrement
   2801      * sample counter.  If sample counter goes below zero, turn
   2802      * off profiling.
   2803      *
   2804      * On entry
   2805      * (ra-16) is address of pointer to counter.  Note: the counter
   2806      *    actually exists 16 bytes before the return target for mips.
   2807      *     - 4 bytes for prof count addr.
   2808      *     - 4 bytes for chain cell offset (2bytes 32 bit aligned).
   2809      *     - 4 bytes for call TEMPLATE_PERIODIC_PROFILING.
   2810      *     - 4 bytes for call delay slot.
   2811      */
   2812      lw     a0, -16(ra)
   2813      lw     a1, offThread_pProfileCountdown(rSELF)
   2814      lw     a2, 0(a0)                   # get counter
   2815      lw     a3, 0(a1)                   # get countdown timer
   2816      addu   a2, 1
   2817      sub    a3, 1                       # FIXME - bug in ARM code???
   2818      bltz   a3, .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
   2819      sw     a2, 0(a0)
   2820      sw     a3, 0(a1)
   2821      RETURN
   2822 .LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
   2823      la     a0, dvmJitTraceProfilingOff
   2824      JALR(a0)
   2825      # The ra register is preserved by the JALR macro.
   2826      jr     ra
   2827 
   2828 /* ------------------------------ */
   2829     .balign 4
   2830     .global dvmCompiler_TEMPLATE_RETURN_PROF
   2831 dvmCompiler_TEMPLATE_RETURN_PROF:
   2832 /* File: mips/TEMPLATE_RETURN_PROF.S */
   2833 #define TEMPLATE_INLINE_PROFILING
   2834 /* File: mips/TEMPLATE_RETURN.S */
   2835     /*
   2836      * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
   2837      * If the stored value in returnAddr
   2838      * is non-zero, the caller is compiled by the JIT thus return to the
   2839      * address in the code cache following the invoke instruction. Otherwise
   2840      * return to the special dvmJitToInterpNoChain entry point.
   2841      */
   2842 #if defined(TEMPLATE_INLINE_PROFILING)
   2843     # preserve a0-a2 and ra
   2844     SCRATCH_STORE(a0, 0)
   2845     SCRATCH_STORE(a1, 4)
   2846     SCRATCH_STORE(a2, 8)
   2847     SCRATCH_STORE(ra, 12)
   2848 
   2849     # a0=rSELF
   2850     move    a0, rSELF
   2851     la      t9, dvmFastMethodTraceExit
   2852     JALR(t9)
   2853     lw      gp, STACK_OFFSET_GP(sp)
   2854 
   2855     # restore a0-a2 and ra
   2856     SCRATCH_LOAD(ra, 12)
   2857     SCRATCH_LOAD(a2, 8)
   2858     SCRATCH_LOAD(a1, 4)
   2859     SCRATCH_LOAD(a0, 0)
   2860 #endif
   2861     SAVEAREA_FROM_FP(a0, rFP)           # a0<- saveArea (old)
   2862     lw      t0, offStackSaveArea_prevFrame(a0)     # t0<- saveArea->prevFrame
   2863     lbu     t1, offThread_breakFlags(rSELF)        # t1<- breakFlags
   2864     lw      rPC, offStackSaveArea_savedPc(a0)      # rPC<- saveArea->savedPc
   2865 #if !defined(WITH_SELF_VERIFICATION)
   2866     lw      t2,  offStackSaveArea_returnAddr(a0)   # t2<- chaining cell ret
   2867 #else
   2868     move    t2, zero                               # disable chaining
   2869 #endif
   2870     lw      a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
   2871                                                    # a2<- method we're returning to
   2872 #if !defined(WITH_SELF_VERIFICATION)
   2873     beq     a2, zero, 1f                           # bail to interpreter
   2874 #else
   2875     bne     a2, zero, 2f
   2876     JALR(ra)                                       # punt to interpreter and compare state
   2877     # DOUG: assume this does not return ???
   2878 2:
   2879 #endif
   2880     la      t4, .LdvmJitToInterpNoChainNoProfile   # defined in footer.S
   2881     lw      a1, (t4)
   2882     move    rFP, t0                                # publish new FP
   2883     beq     a2, zero, 4f
   2884     lw      t0, offMethod_clazz(a2)                # t0<- method->clazz
   2885 4:
   2886 
   2887     sw      a2, offThread_method(rSELF)            # self->method = newSave->method
   2888     lw      a0, offClassObject_pDvmDex(t0)         # a0<- method->clazz->pDvmDex
   2889     sw      rFP, offThread_curFrame(rSELF)         # self->curFrame = fp
   2890     add     rPC, rPC, 3*2                          # publish new rPC
   2891     sw      a0, offThread_methodClassDex(rSELF)
   2892     movn    t2, zero, t1                           # check the breadFlags and
   2893                                                    # clear the chaining cell address
   2894     sw      t2, offThread_inJitCodeCache(rSELF)    # in code cache or not
   2895     beq     t2, zero, 3f                           # chaining cell exists?
   2896     JALR(t2)                                       # jump to the chaining cell
   2897     # DOUG: assume this does not return ???
   2898 3:
   2899 #if defined(WITH_JIT_TUNING)
   2900     li      a0, kCallsiteInterpreted
   2901 #endif
   2902     j       a1                                     # callsite is interpreted
   2903 1:
   2904     sw      zero, offThread_inJitCodeCache(rSELF)  # reset inJitCodeCache
   2905     SAVE_PC_TO_SELF()                              # SAVE_PC_FP_TO_SELF()
   2906     SAVE_FP_TO_SELF()
   2907     la      t4, .LdvmMterpStdBail                  # defined in footer.S
   2908     lw      a2, (t4)
   2909     move    a0, rSELF                              # Expecting rSELF in a0
   2910     JALR(a2)                                       # exit the interpreter
   2911     # DOUG: assume this does not return ???
   2912 
   2913 #undef TEMPLATE_INLINE_PROFILING
   2914 
   2915 /* ------------------------------ */
   2916     .balign 4
   2917     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF
   2918 dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF:
   2919 /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT_PROF.S */
   2920 #define TEMPLATE_INLINE_PROFILING
   2921 /* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
   2922     /*
   2923      * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
   2924      * into rPC then jump to dvmJitToInterpNoChain to dispatch the
   2925      * runtime-resolved callee.
   2926      */
   2927     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
   2928     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
   2929     lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
   2930     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
   2931     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
   2932     move   a3, a1                                 # a3<- returnCell
   2933     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
   2934     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
   2935     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
   2936     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
   2937     sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
   2938     sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
   2939     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
   2940     RETURN                                        # return to raise stack overflow excep.
   2941 
   2942 1:
   2943     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
   2944     lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
   2945     lw     t0, offMethod_accessFlags(a0)          # t0<- methodToCall->accessFlags
   2946     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
   2947     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
   2948     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
   2949 
   2950     # set up newSaveArea
   2951     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
   2952     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
   2953     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
   2954     beqz   t8, 2f                                 # breakFlags != 0
   2955     RETURN                                        # bail to the interpreter
   2956 
   2957 2:
   2958     and    t6, t0, ACC_NATIVE
   2959     beqz   t6, 3f
   2960 #if !defined(WITH_SELF_VERIFICATION)
   2961     j      .LinvokeNative
   2962 #else
   2963     RETURN                                        # bail to the interpreter
   2964 #endif
   2965 
   2966 3:
   2967     # continue executing the next instruction through the interpreter
   2968     la     t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
   2969     lw     rTEMP, (t0)
   2970     lw     a3, offClassObject_pDvmDex(t9)         # a3<- method->clazz->pDvmDex
   2971 
   2972     # Update "thread" values for the new method
   2973     sw     a0, offThread_method(rSELF)            # self->method = methodToCall
   2974     sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
   2975     move   rFP, a1                                # fp = newFp
   2976     sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
   2977 #if defined(TEMPLATE_INLINE_PROFILING)
   2978     # preserve rTEMP,a1-a3
   2979     SCRATCH_STORE(rTEMP, 0)
   2980     SCRATCH_STORE(a1, 4)
   2981     SCRATCH_STORE(a2, 8)
   2982     SCRATCH_STORE(a3, 12)
   2983 
   2984     # a0=methodToCall, a1=rSELF
   2985     move   a1, rSELF
   2986     la     t9, dvmFastMethodTraceEnter
   2987     JALR(t9)
   2988     lw     gp, STACK_OFFSET_GP(sp)
   2989 
   2990     # restore rTEMP,a1-a3
   2991     SCRATCH_LOAD(a3, 12)
   2992     SCRATCH_LOAD(a2, 8)
   2993     SCRATCH_LOAD(a1, 4)
   2994     SCRATCH_LOAD(rTEMP, 0)
   2995 #endif
   2996 
   2997     # Start executing the callee
   2998 #if defined(WITH_JIT_TUNING)
   2999     li     a0, kInlineCacheMiss
   3000 #endif
   3001     jr     rTEMP                                  # dvmJitToInterpTraceSelectNoChain
   3002 
   3003 #undef TEMPLATE_INLINE_PROFILING
   3004 
   3005 /* ------------------------------ */
   3006     .balign 4
   3007     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF
   3008 dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF:
   3009 /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN_PROF.S */
   3010 #define TEMPLATE_INLINE_PROFILING
   3011 /* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
   3012     /*
   3013      * For monomorphic callsite, setup the Dalvik frame and return to the
   3014      * Thumb code through the link register to transfer control to the callee
   3015      * method through a dedicated chaining cell.
   3016      */
   3017     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
   3018     # methodToCall is guaranteed to be non-native
   3019 .LinvokeChainProf:
   3020     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
   3021     lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
   3022     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
   3023     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
   3024     move   a3, a1                                 # a3<- returnCell
   3025     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
   3026     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
   3027     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
   3028     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
   3029     add    t2, ra, 8                              # setup the punt-to-interp address
   3030                                                   # 8 bytes skips branch and delay slot
   3031     sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
   3032     sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
   3033     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
   3034     jr     t2                                     # return to raise stack overflow excep.
   3035 
   3036 1:
   3037     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
   3038     lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
   3039     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
   3040     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
   3041     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
   3042 
   3043     # set up newSaveArea
   3044     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
   3045     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
   3046     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
   3047     beqz   t8, 2f                                 # breakFlags != 0
   3048     jr     t2                                     # bail to the interpreter
   3049 
   3050 2:
   3051     lw     a3, offClassObject_pDvmDex(t9)         # a3<- methodToCall->clazz->pDvmDex
   3052 
   3053     # Update "thread" values for the new method
   3054     sw     a0, offThread_method(rSELF)            # self->method = methodToCall
   3055     sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
   3056     move   rFP, a1                                # fp = newFp
   3057     sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
   3058 #if defined(TEMPLATE_INLINE_PROFILING)
   3059     # preserve a0-a2 and ra
   3060     SCRATCH_STORE(a0, 0)
   3061     SCRATCH_STORE(a1, 4)
   3062     SCRATCH_STORE(a2, 8)
   3063     SCRATCH_STORE(ra, 12)
   3064 
   3065     move   a1, rSELF
   3066     # a0=methodToCall, a1=rSELF
   3067     la     t9, dvmFastMethodTraceEnter
   3068     jalr   t9
   3069     lw     gp, STACK_OFFSET_GP(sp)
   3070 
   3071     # restore a0-a2 and ra
   3072     SCRATCH_LOAD(ra, 12)
   3073     SCRATCH_LOAD(a2, 8)
   3074     SCRATCH_LOAD(a1, 4)
   3075     SCRATCH_LOAD(a0, 0)
   3076 #endif
   3077     RETURN                                        # return to the callee-chaining cell
   3078 
   3079 #undef TEMPLATE_INLINE_PROFILING
   3080 
   3081 /* ------------------------------ */
   3082     .balign 4
   3083     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF
   3084 dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF:
   3085 /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF.S */
   3086 #define TEMPLATE_INLINE_PROFILING
   3087 /* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
   3088     /*
   3089      * For polymorphic callsite, check whether the cached class pointer matches
   3090      * the current one. If so setup the Dalvik frame and return to the
   3091      * Thumb code through the link register to transfer control to the callee
   3092      * method through a dedicated chaining cell.
   3093      *
   3094      * The predicted chaining cell is declared in ArmLIR.h with the
   3095      * following layout:
   3096      *
   3097      *  typedef struct PredictedChainingCell {
   3098      *      u4 branch;
   3099      *      u4 delay_slot;
   3100      *      const ClassObject *clazz;
   3101      *      const Method *method;
   3102      *      u4 counter;
   3103      *  } PredictedChainingCell;
   3104      *
   3105      * Upon returning to the callsite:
   3106      *    - lr   : to branch to the chaining cell
   3107      *    - lr+8 : to punt to the interpreter
   3108      *    - lr+16: to fully resolve the callee and may rechain.
   3109      *             a3 <- class
   3110      */
   3111     # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
   3112     lw      a3, offObject_clazz(a0)     # a3 <- this->class
   3113     lw      rIBASE, 8(a2)                   # t0 <- predictedChainCell->clazz
   3114     lw      a0, 12(a2)                  # a0 <- predictedChainCell->method
   3115     lw      t1, offThread_icRechainCount(rSELF)    # t1 <- shared rechainCount
   3116 
   3117 #if defined(WITH_JIT_TUNING)
   3118     la      rINST, .LdvmICHitCount
   3119     #add     t2, t2, 1
   3120     bne    a3, rIBASE, 1f
   3121     nop
   3122     lw      t2, 0(rINST)
   3123     add     t2, t2, 1
   3124     sw      t2, 0(rINST)
   3125 1:
   3126     #add     t2, t2, 1
   3127 #endif
   3128     beq     a3, rIBASE, .LinvokeChainProf       # branch if predicted chain is valid
   3129     lw      rINST, offClassObject_vtable(a3)     # rINST <- this->class->vtable
   3130     beqz    rIBASE, 2f                      # initialized class or not
   3131     sub     a1, t1, 1                   # count--
   3132     sw      a1, offThread_icRechainCount(rSELF)   # write back to InterpState
   3133     b       3f
   3134 2:
   3135     move    a1, zero
   3136 3:
   3137     add     ra, ra, 16                  # return to fully-resolve landing pad
   3138     /*
   3139      * a1 <- count
   3140      * a2 <- &predictedChainCell
   3141      * a3 <- this->class
   3142      * rPC <- dPC
   3143      * rINST <- this->class->vtable
   3144      */
   3145     RETURN
   3146 
   3147 #undef TEMPLATE_INLINE_PROFILING
   3148 
   3149 /* ------------------------------ */
   3150     .balign 4
   3151     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF
   3152 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF:
   3153 /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE_PROF.S */
   3154 #define TEMPLATE_INLINE_PROFILING
   3155 /* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
   3156     # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
   3157     lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
   3158     lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
   3159     lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
   3160     move   a3, a1                                 # a3<- returnCell
   3161     SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
   3162     sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
   3163     sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
   3164     SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
   3165     bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
   3166     RETURN                                        # return to raise stack overflow excep.
   3167 
   3168 1:
   3169     # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
   3170     sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
   3171     sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
   3172     lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
   3173 
   3174     # set up newSaveArea
   3175     sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
   3176     sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
   3177     sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
   3178     lw     rTEMP, offMethod_nativeFunc(a0)        # t9<- method->nativeFunc
   3179 #if !defined(WITH_SELF_VERIFICATION)
   3180     beqz   t8, 2f                                 # breakFlags != 0
   3181     RETURN                                        # bail to the interpreter
   3182 2:
   3183 #else
   3184     RETURN                                        # bail to the interpreter unconditionally
   3185 #endif
   3186 
   3187     # go ahead and transfer control to the native code
   3188     lw     t6, offThread_jniLocal_topCookie(rSELF)  # t6<- thread->localRef->...
   3189     sw     a1, offThread_curFrame(rSELF)          # self->curFrame = newFp
   3190     sw     zero, offThread_inJitCodeCache(rSELF)  # not in the jit code cache
   3191     sw     t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
   3192                                                   # newFp->localRefCookie=top
   3193     SAVEAREA_FROM_FP(rBIX, a1)                    # rBIX<- new stack save area
   3194     move   a2, a0                                 # a2<- methodToCall
   3195     move   a0, a1                                 # a0<- newFp
   3196     add    a1, rSELF, offThread_retval            # a1<- &retval
   3197     move   a3, rSELF                              # a3<- self
   3198 #if defined(TEMPLATE_INLINE_PROFILING)
   3199     # a2: methodToCall
   3200     # preserve rTEMP,a0-a3
   3201     SCRATCH_STORE(a0, 0)
   3202     SCRATCH_STORE(a1, 4)
   3203     SCRATCH_STORE(a2, 8)
   3204     SCRATCH_STORE(a3, 12)
   3205     SCRATCH_STORE(rTEMP, 16)
   3206 
   3207     move   a0, a2
   3208     move   a1, rSELF
   3209     # a0=JNIMethod, a1=rSELF
   3210     la      t9, dvmFastMethodTraceEnter
   3211     JALR(t9)                                      # off to the native code
   3212     lw     gp, STACK_OFFSET_GP(sp)
   3213 
   3214     # restore rTEMP,a0-a3
   3215     SCRATCH_LOAD(rTEMP, 16)
   3216     SCRATCH_LOAD(a3, 12)
   3217     SCRATCH_LOAD(a2, 8)
   3218     SCRATCH_LOAD(a1, 4)
   3219     SCRATCH_LOAD(a0, 0)
   3220 
   3221     move   rOBJ, a2                               # save a2
   3222 #endif
   3223     move   t9, rTEMP
   3224     JALR(t9)                                   # off to the native code
   3225     lw     gp, STACK_OFFSET_GP(sp)
   3226 
   3227 #if defined(TEMPLATE_INLINE_PROFILING)
   3228     move   a0, rOBJ
   3229     move   a1, rSELF
   3230     # a0=JNIMethod, a1=rSELF
   3231     la      t9, dvmFastNativeMethodTraceExit
   3232     JALR(t9)
   3233     lw     gp, STACK_OFFSET_GP(sp)
   3234 #endif
   3235 
   3236     # native return; rBIX=newSaveArea
   3237     # equivalent to dvmPopJniLocals
   3238     lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
   3239     lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
   3240     lw     a1, offThread_exception(rSELF)            # check for exception
   3241     sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
   3242     sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
   3243     lw     a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
   3244 
   3245     # a0 = dalvikCallsitePC
   3246     bnez   a1, .LhandleException                     # handle exception if any
   3247 
   3248     sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
   3249     beqz   a2, 3f
   3250     jr     a2                                        # go if return chaining cell still exist
   3251 
   3252 3:
   3253     # continue executing the next instruction through the interpreter
   3254     la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
   3255     lw     a1, (a1)
   3256     add    rPC, a0, 3*2                              # reconstruct new rPC (advance 3 dalvik instr)
   3257 
   3258 #if defined(WITH_JIT_TUNING)
   3259     li     a0, kCallsiteInterpreted
   3260 #endif
   3261     jr     a1
   3262 
   3263 #undef TEMPLATE_INLINE_PROFILING
   3264 
   3265     .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
   3266 /* File: mips/footer.S */
   3267 /*
   3268  * ===========================================================================
   3269  *  Common subroutines and data
   3270  * ===========================================================================
   3271  */
   3272 
   3273     .section .data.rel.ro
   3274     .align  4
   3275 .LinvokeNative:
   3276     # Prep for the native call
   3277     # a1 = newFP, a0 = methodToCall
   3278     lw     t9, offThread_jniLocal_topCookie(rSELF)  # t9<- thread->localRef->...
   3279     sw     zero, offThread_inJitCodeCache(rSELF)    # not in jit code cache
   3280     sw     a1, offThread_curFrame(rSELF)            # self->curFrame = newFp
   3281     sw     t9, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
   3282                                                  # newFp->localRefCookie=top
   3283     lhu     ra, offThread_subMode(rSELF)
   3284     SAVEAREA_FROM_FP(rBIX, a1)                   # rBIX<- new stack save area
   3285 
   3286     move    a2, a0                               # a2<- methodToCall
   3287     move    a0, a1                               # a0<- newFp
   3288     add     a1, rSELF, offThread_retval          # a1<- &retval
   3289     move    a3, rSELF                            # a3<- self
   3290     andi    ra, kSubModeMethodTrace
   3291     beqz    ra, 121f
   3292     # a2: methodToCall
   3293     # preserve a0-a3
   3294     SCRATCH_STORE(a0, 0)
   3295     SCRATCH_STORE(a1, 4)
   3296     SCRATCH_STORE(a2, 8)
   3297     SCRATCH_STORE(a3, 12)
   3298 
   3299     move    a0, a2
   3300     move    a1, rSELF
   3301     la      t9, dvmFastMethodTraceEnter
   3302     JALR(t9)
   3303     lw      gp, STACK_OFFSET_GP(sp)
   3304 
   3305     # restore a0-a3
   3306     SCRATCH_LOAD(a3, 12)
   3307     SCRATCH_LOAD(a2, 8)
   3308     SCRATCH_LOAD(a1, 4)
   3309     SCRATCH_LOAD(a0, 0)
   3310 
   3311     lw      t9, offMethod_nativeFunc(a2)
   3312     JALR(t9)                                      # call methodToCall->nativeFunc
   3313     lw      gp, STACK_OFFSET_GP(sp)
   3314 
   3315     # restore a2 again
   3316     SCRATCH_LOAD(a2, 8)
   3317 
   3318     move    a0, a2
   3319     move    a1, rSELF
   3320     la      t9, dvmFastNativeMethodTraceExit
   3321     JALR(t9)
   3322     lw      gp, STACK_OFFSET_GP(sp)
   3323     b       212f
   3324 
   3325 121:
   3326     lw      t9, offMethod_nativeFunc(a2)
   3327     JALR(t9)                                     # call methodToCall->nativeFunc
   3328     lw      gp, STACK_OFFSET_GP(sp)
   3329 
   3330 212:
   3331     # native return; rBIX=newSaveArea
   3332     # equivalent to dvmPopJniLocals
   3333     lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
   3334     lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
   3335     lw     a1, offThread_exception(rSELF)            # check for exception
   3336     sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
   3337     sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
   3338     lw     a0, offStackSaveArea_savedPc(rBIX)        # reload rPC
   3339 
   3340     # a0 = dalvikCallsitePC
   3341     bnez   a1, .LhandleException                     # handle exception if any
   3342 
   3343     sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
   3344     beqz   a2, 3f
   3345     jr     a2                                        # go if return chaining cell still exist
   3346 
   3347 3:
   3348     # continue executing the next instruction through the interpreter
   3349     la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
   3350     lw     a1, (a1)
   3351     add    rPC, a0, 3*2                              # reconstruct new rPC
   3352 
   3353 #if defined(WITH_JIT_TUNING)
   3354     li     a0, kCallsiteInterpreted
   3355 #endif
   3356     jr     a1
   3357 
   3358 
   3359 /*
   3360  * On entry:
   3361  * a0  Faulting Dalvik PC
   3362  */
   3363 .LhandleException:
   3364 #if defined(WITH_SELF_VERIFICATION)
   3365     la     t0, .LdeadFood
   3366     lw     t0, (t0)                  # should not see this under self-verification mode
   3367     jr     t0
   3368 .LdeadFood:
   3369     .word   0xdeadf00d
   3370 #endif
   3371     sw     zero, offThread_inJitCodeCache(rSELF)  # in interpreter land
   3372     la     a1, .LdvmMterpCommonExceptionThrown  # PIC way of getting &func
   3373     lw     a1, (a1)
   3374     la     rIBASE, .LdvmAsmInstructionStart     # PIC way of getting &func
   3375     lw     rIBASE, (rIBASE)
   3376     move   rPC, a0                              # reload the faulting Dalvid address
   3377     jr     a1                                   # branch to dvmMterpCommonExeceptionThrown
   3378 
   3379     .align  4
   3380 .LdvmAsmInstructionStart:
   3381     .word   dvmAsmInstructionStart
   3382 .LdvmJitToInterpNoChainNoProfile:
   3383     .word   dvmJitToInterpNoChainNoProfile
   3384 .LdvmJitToInterpTraceSelectNoChain:
   3385     .word   dvmJitToInterpTraceSelectNoChain
   3386 .LdvmJitToInterpNoChain:
   3387     .word   dvmJitToInterpNoChain
   3388 .LdvmMterpStdBail:
   3389     .word   dvmMterpStdBail
   3390 .LdvmMterpCommonExceptionThrown:
   3391     .word   dvmMterpCommonExceptionThrown
   3392 .LdvmLockObject:
   3393     .word   dvmLockObject
   3394 #if defined(WITH_JIT_TUNING)
   3395 .LdvmICHitCount:
   3396     .word   gDvmICHitCount
   3397 #endif
   3398 #if defined(WITH_SELF_VERIFICATION)
   3399 .LdvmSelfVerificationMemOpDecode:
   3400     .word   dvmSelfVerificationMemOpDecode
   3401 #endif
   3402 
   3403     .global dvmCompilerTemplateEnd
   3404 dvmCompilerTemplateEnd:
   3405 
   3406 #endif /* WITH_JIT */
   3407 
   3408