Home | History | Annotate | Download | only in arm
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * JNI method invocation.  This is used to call a C/C++ JNI method.  The
     19  * argument list has to be pushed onto the native stack according to
     20  * local calling conventions.
     21  *
     22  * This version supports the "new" ARM EABI.
     23  */
     24 
     25 #include <machine/cpu-features.h>
     26 
     27 #ifdef __ARM_EABI__
     28 
     29 #ifdef EXTENDED_EABI_DEBUG
     30 # define DBG
     31 #else
     32 # define DBG @
     33 #endif
     34 
     35 
     36 /*
     37 Function prototype:
     38 
     39 void dvmPlatformInvoke(void* pEnv, ClassObject* clazz, int argInfo, int argc,
     40     const u4* argv, const char* signature, void* func, JValue* pReturn)
     41 
     42 The method we are calling has the form:
     43 
     44   return_type func(JNIEnv* pEnv, ClassObject* clazz, ...)
     45     -or-
     46   return_type func(JNIEnv* pEnv, Object* this, ...)
     47 
     48 We receive a collection of 32-bit values which correspond to arguments from
     49 the interpreter (e.g. float occupies one, double occupies two).  It's up to
     50 us to convert these into local calling conventions.
     51 */
     52 
     53 /*
     54 ARM EABI notes:
     55 
     56 r0-r3 hold first 4 args to a method
     57 r9 is given special treatment in some situations, but not for us
     58 r10 (sl) seems to be generally available
     59 r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
     60 r12 (ip) is scratch -- not preserved across method calls
     61 r13 (sp) should be managed carefully in case a signal arrives
     62 r14 (lr) must be preserved
     63 r15 (pc) can be tinkered with directly
     64 
     65 r0 holds returns of <= 4 bytes
     66 r0-r1 hold returns of 8 bytes, low word in r0
     67 
     68 Callee must save/restore r4+ (except r12) if it modifies them.
     69 
     70 Stack is "full descending".  Only the arguments that don't fit in the first 4
     71 registers are placed on the stack.  "sp" points at the first stacked argument
     72 (i.e. the 5th arg).
     73 
     74 VFP: single-precision results in s0, double-precision results in d0.
     75 
     76 In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
     77 64-bit quantities (long long, double) must be 64-bit aligned.  This means
     78 we have to scan the method signature, identify arguments that must be
     79 padded, and fix them up appropriately.
     80 */
     81 
     82     .text
     83     .align  2
     84     .global dvmPlatformInvoke
     85     .type   dvmPlatformInvoke, %function
     86 
     87 /*
     88  * On entry:
     89  *   r0  JNIEnv (can be left alone)
     90  *   r1  clazz (NULL for virtual method calls, non-NULL for static)
     91  *   r2  arg info
     92  *   r3  argc (number of 32-bit values in argv)
     93  *   [sp]     argv
     94  *   [sp,#4]  short signature
     95  *   [sp,#8]  func
     96  *   [sp,#12] pReturn
     97  *
     98  * For a virtual method call, the "this" reference is in argv[0].
     99  *
    100  * argInfo (32-bit int) layout:
    101  *   SRRRLLLL FFFFFFFF FFFFFFFF FFFFFFFF
    102  *
    103  *   S - if set, do things the hard way (scan the signature)
    104  *   R - return-type enumeration, really only important for "hard" FP ABI
    105  *   L - number of double-words of storage required on stack (0-30 words)
    106  *   F - pad flag -- if set, write a pad word to the stack
    107  *
    108  * With this arrangement we can efficiently push up to 24 words of arguments
    109  * onto the stack.  Anything requiring more than that -- which should happen
    110  * rarely to never -- can do the slow signature scan.
    111  *
    112  * (We could pack the Fs more efficiently -- we know we never push two pads
    113  * in a row, and the first word can never be a pad -- but there's really
    114  * no need for it.)
    115  *
    116  * NOTE: if the called function has more than 4 words of arguments, gdb
    117  * will not be able to unwind the stack past this method.  The only way
    118  * around this is to convince gdb to respect an explicit frame pointer.
    119  * The stack unwinder in debuggerd *does* pay attention to fp if we set it
    120  * up appropriately, so at least that will work.
    121  */
    122 dvmPlatformInvoke:
    123     .fnstart
    124 
    125     /*
    126      * Save regs.
    127      *
    128      * On entry to a function, "sp" must be 64-bit aligned.  This means
    129      * we have to adjust sp manually if we push an odd number of regs here
    130      * (both here and when exiting).
    131      *
    132      * The ARM spec doesn't specify anything about the frame pointer.  gcc
    133      * points fp at the first saved argument, so our "full descending"
    134      * stack looks like:
    135      *
    136      *  pReturn
    137      *  func
    138      *  shorty
    139      *  argv        <-- sp on entry
    140      *  lr          <-- fp
    141      *  fp
    142      *  r9...r7
    143      *  r6          <-- sp after reg save
    144      *
    145      * Any arguments that need to be pushed on for the target method
    146      * come after this.  The last argument is pushed first.
    147      */
    148 SAVED_REG_COUNT = 6                     @ push 6 regs
    149 FP_STACK_OFFSET = (SAVED_REG_COUNT-1) * 4 @ offset between fp and post-save sp
    150 FP_ADJ = 4                              @ fp is initial sp +4
    151 
    152     .save        {r6, r7, r8, r9, fp, lr}
    153     stmfd   sp!, {r6, r7, r8, r9, fp, lr}
    154 
    155     .setfp  fp, sp, #FP_STACK_OFFSET    @ point fp at first saved reg
    156     add     fp, sp, #FP_STACK_OFFSET
    157 
    158     @.pad    #4                          @ adjust for 64-bit align
    159     @sub     sp, sp, #4                  @ (if we save odd number of regs)
    160 
    161     @ Ensure 64-bit alignment.  EABI guarantees sp is aligned on entry, make
    162     @ sure we're aligned properly now.
    163 DBG tst     sp, #4                      @ 64-bit aligned?
    164 DBG bne     dvmAbort                    @ no, fail
    165 
    166     ldr     r9, [fp, #0+FP_ADJ]         @ r9<- argv
    167     cmp     r1, #0                      @ calling a static method?
    168 
    169     @ Not static, grab the "this" pointer.  Note "this" is not explicitly
    170     @ described by the method signature.
    171     subeq   r3, r3, #1                  @ argc--
    172     ldreq   r1, [r9], #4                @ r1<- *argv++
    173 
    174     @ Do we have arg padding flags in "argInfo"? (just need to check hi bit)
    175     teq     r2, #0
    176     bmi     .Lno_arg_info
    177 
    178     /*
    179      * "Fast" path.
    180      *
    181      * Make room on the stack for the arguments and copy them over,
    182      * inserting pad words when appropriate.
    183      *
    184      * Currently:
    185      *  r0  don't touch
    186      *  r1  don't touch
    187      *  r2  arg info
    188      *  r3  argc
    189      *  r4-r5  don't touch (not saved)
    190      *  r6-r8 (available)
    191      *  r9  argv
    192      *  fp  frame pointer
    193      */
    194 .Lhave_arg_info:
    195     @ Expand the stack by the specified amount.  We want to extract the
    196     @ count of double-words from r2, multiply it by 8, and subtract that
    197     @ from the stack pointer.
    198     and     ip, r2, #0x0f000000         @ ip<- double-words required
    199     mov     r6, r2, lsr #28             @ r6<- return type
    200     sub     sp, sp, ip, lsr #21         @ shift right 24, then left 3
    201     mov     r8, sp                      @ r8<- sp  (arg copy dest)
    202 
    203     @ Stick argv in r7 and advance it past the argv values that will be
    204     @ held in r2-r3.  It's possible r3 will hold a pad, so check the
    205     @ bit in r2.  We do this by ignoring the first bit (which would
    206     @ indicate a pad in r2) and shifting the second into the carry flag.
    207     @ If the carry is set, r3 will hold a pad, so we adjust argv less.
    208     @
    209     @ (This is harmless if argc==0)
    210     mov     r7, r9
    211     movs    r2, r2, lsr #2
    212     addcc   r7, r7, #8                  @ skip past 2 words, for r2 and r3
    213     subcc   r3, r3, #2
    214     addcs   r7, r7, #4                  @ skip past 1 word, for r2
    215     subcs   r3, r3, #1
    216 
    217 .Lfast_copy_loop:
    218     @ if (--argc < 0) goto invoke
    219     subs    r3, r3, #1
    220     bmi     .Lcopy_done                 @ NOTE: expects original argv in r9
    221 
    222 .Lfast_copy_loop2:
    223     @ Get pad flag into carry bit.  If it's set, we don't pull a value
    224     @ out of argv.
    225     movs    r2, r2, lsr #1
    226     ldrcc   ip, [r7], #4                @ ip = *r7++ (pull from argv)
    227     strcc   ip, [r8], #4                @ *r8++ = ip (write to stack)
    228     bcc     .Lfast_copy_loop
    229 
    230 DBG movcs   ip, #-3                     @ DEBUG DEBUG - make pad word obvious
    231 DBG strcs   ip, [r8]                    @ DEBUG DEBUG
    232     add     r8, r8, #4                  @ if pad, just advance ip without store
    233     b       .Lfast_copy_loop2           @ don't adjust argc after writing pad
    234 
    235 
    236 .Lcopy_done:
    237     /*
    238      * Currently:
    239      *  r0-r3  args (JNIEnv*, thisOrClass, arg0, arg1)
    240      *  r6  return type (enum DalvikJniReturnType)
    241      *  r9  original argv
    242      *  fp  frame pointer
    243      *
    244      * The stack copy is complete.  Grab the first two words off of argv
    245      * and tuck them into r2/r3.  If the first arg is 32-bit and the second
    246      * arg is 64-bit, then r3 "holds" a pad word and the load is unnecessary
    247      * but harmless.
    248      *
    249      * If there are 0 or 1 arg words in argv, we will be loading uninitialized
    250      * data into the registers, but since nothing tries to use it it's also
    251      * harmless (assuming argv[0] and argv[1] point to valid memory, which
    252      * is a reasonable assumption for Dalvik's interpreted stacks).
    253      */
    254     ldmia   r9, {r2-r3}                 @ r2/r3<- argv[0]/argv[1]
    255 
    256     ldr     ip, [fp, #8+FP_ADJ]         @ ip<- func
    257 #ifdef __ARM_HAVE_BLX
    258     blx     ip                          @ call func
    259 #else
    260     mov     lr, pc                      @ call func the old-fashioned way
    261     bx      ip
    262 #endif
    263 
    264     @ We're back, result is in r0 or (for long/double) r0-r1.
    265     @
    266     @ In theory, we need to use the "return type" arg to figure out what
    267     @ we have and how to return it.  However, unless we have an FPU and
    268     @ "hard" fp calling conventions, all we need to do is copy r0-r1 into
    269     @ the JValue union.
    270     @
    271     @ Thought: could redefine DalvikJniReturnType such that single-word
    272     @ and double-word values occupy different ranges; simple comparison
    273     @ allows us to choose between str and stm.  Probably not worthwhile.
    274     @
    275     cmp     r6, #0                      @ DALVIK_JNI_RETURN_VOID?
    276     ldrne   ip, [fp, #12+FP_ADJ]        @ pReturn
    277     sub     sp, fp, #FP_STACK_OFFSET    @ restore sp to post-reg-save offset
    278     stmneia ip, {r0-r1}                 @ pReturn->j <- r0/r1
    279 
    280     @ Restore the registers we saved and return.  On >= ARMv5TE we can
    281     @ restore PC directly from the saved LR.
    282 #ifdef __ARM_HAVE_PC_INTERWORK
    283     ldmfd   sp!, {r6, r7, r8, r9, fp, pc}
    284 #else
    285     ldmfd   sp!, {r6, r7, r8, r9, fp, lr}
    286     bx      lr
    287 #endif
    288 
    289 
    290 
    291     /*
    292      * "Slow" path.
    293      * Walk through the argument list, counting up the number of 32-bit words
    294      * required to contain it.  Then walk through it a second time, copying
    295      * values out to the stack.  (We could pre-compute the size to save
    296      * ourselves a trip, but we'd have to store that somewhere -- this is
    297      * sufficiently unlikely that it's not worthwhile.)
    298      *
    299      * Try not to make any assumptions about the number of args -- I think
    300      * the class file format allows up to 64K words (need to verify that).
    301      *
    302      * Currently:
    303      *  r0  don't touch
    304      *  r1  don't touch
    305      *  r2  (available)
    306      *  r3  argc
    307      *  r4-r5 don't touch (not saved)
    308      *  r6-r8 (available)
    309      *  r9  argv
    310      *  fp  frame pointer
    311      */
    312 .Lno_arg_info:
    313     mov     ip, r2, lsr #28             @ ip<- return type
    314     ldr     r6, [fp, #4+FP_ADJ]         @ r6<- short signature
    315     add     r6, r6, #1                  @ advance past return type
    316     mov     r2, #0                      @ r2<- word count, init to zero
    317 
    318 .Lcount_loop:
    319     ldrb    ip, [r6], #1                @ ip<- *signature++
    320     cmp     ip, #0                      @ end?
    321     beq     .Lcount_done                @ all done, bail
    322     add     r2, r2, #1                  @ count++
    323     cmp     ip, #'D'                    @ look for 'D' or 'J', which are 64-bit
    324     cmpne   ip, #'J'
    325     bne     .Lcount_loop
    326 
    327     @ 64-bit value, insert padding if we're not aligned
    328     tst     r2, #1                      @ odd after initial incr?
    329     addne   r2, #1                      @ no, add 1 more to cover 64 bits
    330     addeq   r2, #2                      @ yes, treat prev as pad, incr 2 now
    331     b       .Lcount_loop
    332 .Lcount_done:
    333 
    334     @ We have the padded-out word count in r2.  We subtract 2 from it
    335     @ because we don't push the first two arg words on the stack (they're
    336     @ destined for r2/r3).  Pushing them on and popping them off would be
    337     @ simpler but slower.
    338     subs    r2, r2, #2                  @ subtract 2 (for contents of r2/r3)
    339     movmis  r2, #0                      @ if negative, peg at zero, set Z-flag
    340     beq     .Lcopy_done                 @ zero args, skip stack copy
    341 
    342 DBG tst     sp, #7                      @ DEBUG - make sure sp is aligned now
    343 DBG bne     dvmAbort                    @ DEBUG
    344 
    345     @ Set up to copy from r7 to r8.  We copy from the second arg to the
    346     @ last arg, which means reading and writing to ascending addresses.
    347     sub     sp, sp, r2, asl #2          @ sp<- sp - r2*4
    348     bic     sp, #4                      @ subtract another 4 ifn
    349     mov     r7, r9                      @ r7<- argv
    350     mov     r8, sp                      @ r8<- sp
    351 
    352     @ We need to copy words from [r7] to [r8].  We walk forward through
    353     @ the signature again, "copying" pad words when appropriate, storing
    354     @ upward into the stack.
    355     ldr     r6, [fp, #4+FP_ADJ]         @ r6<- signature
    356     add     r6, r6, #1                  @ advance past return type
    357     add     r7, r7, #8                  @ r7<- r7+8 (assume argv 0/1 in r2/r3)
    358 
    359     @ Eat first arg or two, for the stuff that goes into r2/r3.
    360     ldrb    ip, [r6], #1                @ ip<- *signature++
    361     cmp     ip, #'D'
    362     cmpne   ip, #'J'
    363     beq     .Lstack_copy_loop           @ 64-bit arg fills r2+r3
    364 
    365     @ First arg was 32-bit, check the next
    366     ldrb    ip, [r6], #1                @ ip<- *signature++
    367     cmp     ip, #'D'
    368     cmpne   ip, #'J'
    369     subeq   r7, #4                      @ r7<- r7-4 (take it back - pad word)
    370     beq     .Lstack_copy_loop2          @ start with char we already have
    371 
    372     @ Two 32-bit args, fall through and start with next arg
    373 
    374 .Lstack_copy_loop:
    375     ldrb    ip, [r6], #1                @ ip<- *signature++
    376 .Lstack_copy_loop2:
    377     cmp     ip, #0                      @ end of shorty?
    378     beq     .Lcopy_done                 @ yes
    379 
    380     cmp     ip, #'D'
    381     cmpne   ip, #'J'
    382     beq     .Lcopy64
    383 
    384     @ Copy a 32-bit value.  [r8] is initially at the end of the stack.  We
    385     @ use "full descending" stacks, so we store into [r8] and incr as we
    386     @ move toward the end of the arg list.
    387 .Lcopy32:
    388     ldr     ip, [r7], #4
    389     str     ip, [r8], #4
    390     b       .Lstack_copy_loop
    391 
    392 .Lcopy64:
    393     @ Copy a 64-bit value.  If necessary, leave a hole in the stack to
    394     @ ensure alignment.  We know the [r8] output area is 64-bit aligned,
    395     @ so we can just mask the address.
    396     add     r8, r8, #7          @ r8<- (r8+7) & ~7
    397     ldr     ip, [r7], #4
    398     bic     r8, r8, #7
    399     ldr     r2, [r7], #4
    400     str     ip, [r8], #4
    401     str     r2, [r8], #4
    402     b       .Lstack_copy_loop
    403 
    404     .fnend
    405     .size   dvmPlatformInvoke, .-dvmPlatformInvoke
    406 
    407 #if 0
    408 
    409 /*
    410  * Spit out a "we were here", preserving all registers.  (The attempt
    411  * to save ip won't work, but we need to save an even number of
    412  * registers for EABI 64-bit stack alignment.)
    413  */
    414      .macro SQUEAK num
    415 common_squeak\num:
    416     stmfd   sp!, {r0, r1, r2, r3, ip, lr}
    417     ldr     r0, strSqueak
    418     mov     r1, #\num
    419     bl      printf
    420 #ifdef __ARM_HAVE_PC_INTERWORK
    421     ldmfd   sp!, {r0, r1, r2, r3, ip, pc}
    422 #else
    423     ldmfd   sp!, {r0, r1, r2, r3, ip, lr}
    424     bx      lr
    425 #endif
    426     .endm
    427 
    428     SQUEAK  0
    429     SQUEAK  1
    430     SQUEAK  2
    431     SQUEAK  3
    432     SQUEAK  4
    433     SQUEAK  5
    434 
    435 strSqueak:
    436     .word   .LstrSqueak
    437 .LstrSqueak:
    438     .asciz  "<%d>"
    439 
    440     .align  2
    441 
    442 #endif
    443 
    444 #endif /*__ARM_EABI__*/
    445