Home | History | Annotate | Download | only in arm
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /*
     17  * JNI method invocation.  This is used to call a C/C++ JNI method.  The
     18  * argument list has to be pushed onto the native stack according to
     19  * local calling conventions.
     20  *
     21  * This version supports the "new" ARM EABI.
     22  */
     23 
     24 #include <machine/cpu-features.h>
     25 
     26 #ifdef __ARM_EABI__
     27 
     28 #ifdef EXTENDED_EABI_DEBUG
     29 # define DBG
     30 #else
     31 # define DBG @
     32 #endif
     33 
     34 
     35 /*
     36 Function prototype:
     37 
     38 void dvmPlatformInvoke(void* pEnv, ClassObject* clazz, int argInfo, int argc,
     39     const u4* argv, const char* signature, void* func, JValue* pReturn)
     40 
     41 The method we are calling has the form:
     42 
     43   return_type func(JNIEnv* pEnv, ClassObject* clazz, ...)
     44     -or-
     45   return_type func(JNIEnv* pEnv, Object* this, ...)
     46 
     47 We receive a collection of 32-bit values which correspond to arguments from
     48 the interpreter (e.g. float occupies one, double occupies two).  It's up to
     49 us to convert these into local calling conventions.
     50 */
     51 
     52 /*
     53 ARM EABI notes:
     54 
     55 r0-r3 hold first 4 args to a method
     56 r9 is given special treatment in some situations, but not for us
     57 r10 (sl) seems to be generally available
     58 r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
     59 r12 (ip) is scratch -- not preserved across method calls
     60 r13 (sp) should be managed carefully in case a signal arrives
     61 r14 (lr) must be preserved
     62 r15 (pc) can be tinkered with directly
     63 
     64 r0 holds returns of <= 4 bytes
     65 r0-r1 hold returns of 8 bytes, low word in r0
     66 
     67 Callee must save/restore r4+ (except r12) if it modifies them.
     68 
     69 Stack is "full descending".  Only the arguments that don't fit in the first 4
     70 registers are placed on the stack.  "sp" points at the first stacked argument
     71 (i.e. the 5th arg).
     72 
     73 VFP: single-precision results in s0, double-precision results in d0.
     74 
     75 In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
     76 64-bit quantities (long long, double) must be 64-bit aligned.  This means
     77 we have to scan the method signature, identify arguments that must be
     78 padded, and fix them up appropriately.
     79 */
     80 
     81     .text
     82     .align  2
     83     .global dvmPlatformInvoke
     84     .type   dvmPlatformInvoke, %function
     85 
     86 /*
     87  * On entry:
     88  *   r0  JNIEnv (can be left alone)
     89  *   r1  clazz (NULL for virtual method calls, non-NULL for static)
     90  *   r2  arg info
     91  *   r3  argc (number of 32-bit values in argv)
     92  *   [sp]     argv
     93  *   [sp,#4]  short signature
     94  *   [sp,#8]  func
     95  *   [sp,#12] pReturn
     96  *
     97  * For a virtual method call, the "this" reference is in argv[0].
     98  *
     99  * argInfo (32-bit int) layout:
    100  *   SRRRLLLL FFFFFFFF FFFFFFFF FFFFFFFF
    101  *
    102  *   S - if set, do things the hard way (scan the signature)
    103  *   R - return type enumeration, really only important for hardware FP
    104  *   L - number of double-words of storage required on stack (0-30 words)
    105  *   F - pad flag -- if set, write a pad word to the stack
    106  *
    107  * With this arrangement we can efficiently push up to 24 words of arguments
    108  * onto the stack.  Anything requiring more than that -- which should happen
    109  * rarely to never -- can do the slow signature scan.
    110  *
    111  * (We could pack the Fs more efficiently -- we know we never push two pads
    112  * in a row, and the first word can never be a pad -- but there's really
    113  * no need for it.)
    114  *
    115  * TODO: could reduce register-saving overhead for "fast" case, since we
    116  * don't use a couple of registers.  Another thought is to rearrange the
    117  * arguments such that r0/r1 get passed in on the stack, allowing us to
    118  * use r0/r1 freely here and then load them with a single ldm.  Might be
    119  * faster than saving/restoring other registers so that we can leave r0/r1
    120  * undisturbed.
    121  *
    122  * NOTE: if the called function has more than 4 words of arguments, gdb
    123  * will not be able to unwind the stack past this method.  The only way
    124  * around this is to convince gdb to respect an explicit frame pointer.
    125  */
    126 dvmPlatformInvoke:
    127     .fnstart
    128     @ Save regs.  Same style as gcc with "-fomit-frame-pointer" -- we don't
    129     @ disturb "fp" in case somebody else wants it.  Copy "sp" to r4 and use
    130     @ that to access local vars.
    131     @
    132     @ On entry to a function, "sp" must be 64-bit aligned.  This means
    133     @ we have to adjust sp manually if we push an odd number of regs here
    134     @ (both here and when exiting).  Easier to just push an even number
    135     @ of registers.
    136     mov     ip, sp                      @ ip<- original stack pointer
    137     .save {r4, r5, r6, r7, r8, r9, ip, lr}
    138     stmfd   sp!, {r4, r5, r6, r7, r8, r9, ip, lr}
    139 
    140     mov     r4, ip                      @ r4<- original stack pointer
    141 
    142     @ Ensure 64-bit alignment.  EABI guarantees sp is aligned on entry, make
    143     @ sure we're aligned properly now.
    144 DBG tst     sp, #4                      @ 64-bit aligned?
    145 DBG bne     dvmAbort
    146 
    147     cmp     r1, #0                      @ Is this a static method?
    148     ldr     r9, [r4]                    @ r9<- argv
    149 
    150     @ Not static: set r1 to *argv++ ("this"), and set argc--.
    151     @
    152     @ Note the "this" pointer is not included in the method signature.
    153     ldreq   r1, [r9], #4
    154     subeq   r3, r3, #1
    155 
    156     @ Do we have arg padding flags in "argInfo"? (just need to check hi bit)
    157     teq     r2, #0
    158     bmi     .Lno_arg_info
    159 
    160     /*
    161      * "Fast" path.
    162      *
    163      * Make room on the stack for the arguments and copy them over,
    164      * inserting pad words when appropriate.
    165      *
    166      * Currently:
    167      *   r0  don't touch
    168      *   r1  don't touch
    169      *   r2  arg info
    170      *   r3  argc
    171      *   r4  original stack pointer
    172      *   r5-r8 (available)
    173      *   r9  argv
    174      */
    175 .Lhave_arg_info:
    176     @ Expand the stack by the specified amount.  We want to extract the
    177     @ count of double-words from r2, multiply it by 8, and subtract that
    178     @ from the stack pointer.
    179     and     ip, r2, #0x0f000000         @ ip<- double-words required
    180     mov     r5, r2, lsr #28             @ r5<- return type
    181     sub     sp, sp, ip, lsr #21         @ shift right 24, then left 3
    182     mov     r8, sp                      @ r8<- sp  (arg copy dest)
    183 
    184     @ Stick argv in r7 and advance it past the argv values that will be
    185     @ held in r2-r3.  It's possible r3 will hold a pad, so check the
    186     @ bit in r2.  We do this by ignoring the first bit (which would
    187     @ indicate a pad in r2) and shifting the second into the carry flag.
    188     @ If the carry is set, r3 will hold a pad, so we adjust argv less.
    189     @
    190     @ (This is harmless if argc==0)
    191     mov     r7, r9
    192     movs    r2, r2, lsr #2
    193     addcc   r7, r7, #8                  @ skip past 2 words, for r2 and r3
    194     subcc   r3, r3, #2
    195     addcs   r7, r7, #4                  @ skip past 1 word, for r2
    196     subcs   r3, r3, #1
    197 
    198 .Lfast_copy_loop:
    199     @ if (--argc < 0) goto invoke
    200     subs    r3, r3, #1
    201     bmi     .Lcopy_done                 @ NOTE: expects original argv in r9
    202 
    203 .Lfast_copy_loop2:
    204     @ Get pad flag into carry bit.  If it's set, we don't pull a value
    205     @ out of argv.
    206     movs    r2, r2, lsr #1
    207 
    208     ldrcc   ip, [r7], #4                @ ip = *r7++ (pull from argv)
    209     strcc   ip, [r8], #4                @ *r8++ = ip (write to stack)
    210     bcc     .Lfast_copy_loop
    211 
    212 DBG movcs   ip, #-3                     @ DEBUG DEBUG - make pad word obvious
    213 DBG strcs   ip, [r8]                    @ DEBUG DEBUG
    214     add     r8, r8, #4                  @ if pad, just advance ip without store
    215     b       .Lfast_copy_loop2           @ don't adjust argc after writing pad
    216 
    217 
    218 
    219 .Lcopy_done:
    220     /*
    221      * Currently:
    222      *  r0-r3  args (JNIEnv*, thisOrClass, arg0, arg1)
    223      *  r4  original saved sp
    224      *  r5  return type (enum DalvikJniReturnType)
    225      *  r9  original argv
    226      *
    227      * The stack copy is complete.  Grab the first two words off of argv
    228      * and tuck them into r2/r3.  If the first arg is 32-bit and the second
    229      * arg is 64-bit, then r3 "holds" a pad word and the load is unnecessary
    230      * but harmless.
    231      *
    232      * If there are 0 or 1 arg words in argv, we will be loading uninitialized
    233      * data into the registers, but since nothing tries to use it it's also
    234      * harmless (assuming argv[0] and argv[1] point to valid memory, which
    235      * is a reasonable assumption for Dalvik's interpreted stacks).
    236      *
    237      */
    238     ldmia   r9, {r2-r3}                 @ r2/r3<- argv[0]/argv[1]
    239 
    240     @ call the method
    241     ldr     ip, [r4, #8]                @ func
    242 #ifdef __ARM_HAVE_BLX
    243     blx     ip
    244 #else
    245     mov     lr, pc
    246     bx      ip
    247 #endif
    248 
    249     @ We're back, result is in r0 or (for long/double) r0-r1.
    250     @
    251     @ In theory, we need to use the "return type" arg to figure out what
    252     @ we have and how to return it.  However, unless we have an FPU,
    253     @ all we need to do is copy r0-r1 into the JValue union.
    254     @
    255     @ Thought: could redefine DalvikJniReturnType such that single-word
    256     @ and double-word values occupy different ranges; simple comparison
    257     @ allows us to choose between str and stm.  Probably not worthwhile.
    258     @
    259     cmp     r5, #0                      @ DALVIK_JNI_RETURN_VOID?
    260     ldrne   ip, [r4, #12]               @ pReturn
    261     stmneia ip, {r0-r1}                 @ pReturn->j <- r0/r1
    262 
    263     @ Restore the registers we saved and return (restores lr into pc, and
    264     @ the initial stack pointer into sp).
    265 #ifdef __ARM_HAVE_PC_INTERWORK
    266     ldmdb   r4, {r4, r5, r6, r7, r8, r9, sp, pc}
    267 #else
    268     ldmdb   r4, {r4, r5, r6, r7, r8, r9, sp, lr}
    269     bx      lr
    270 #endif
    271     .fnend
    272 
    273 
    274 
    275     /*
    276      * "Slow" path.
    277      * Walk through the argument list, counting up the number of 32-bit words
    278      * required to contain it.  Then walk through it a second time, copying
    279      * values out to the stack.  (We could pre-compute the size to save
    280      * ourselves a trip, but we'd have to store that somewhere -- this is
    281      * sufficiently unlikely that it's not worthwhile.)
    282      *
    283      * Try not to make any assumptions about the number of args -- I think
    284      * the class file format allows up to 64K words (need to verify that).
    285      *
    286      * Currently:
    287      *   r0  don't touch
    288      *   r1  don't touch
    289      *   r2  (available)
    290      *   r3  argc
    291      *   r4  original stack pointer
    292      *   r5-r8 (available)
    293      *   r9  argv
    294      */
    295 .Lno_arg_info:
    296     mov     r5, r2, lsr #28             @ r5<- return type
    297     ldr     r6, [r4, #4]                @ r6<- short signature
    298     mov     r2, #0                      @ r2<- word count, init to zero
    299 
    300 .Lcount_loop:
    301     ldrb    ip, [r6], #1                @ ip<- *signature++
    302     cmp     ip, #0                      @ end?
    303     beq     .Lcount_done                @ all done, bail
    304     add     r2, r2, #1                  @ count++
    305     cmp     ip, #'D'                    @ look for 'D' or 'J', which are 64-bit
    306     cmpne   ip, #'J'
    307     bne     .Lcount_loop
    308 
    309     @ 64-bit value, insert padding if we're not aligned
    310     tst     r2, #1                      @ odd after initial incr?
    311     addne   r2, #1                      @ no, add 1 more to cover 64 bits
    312     addeq   r2, #2                      @ yes, treat prev as pad, incr 2 now
    313     b       .Lcount_loop
    314 .Lcount_done:
    315 
    316     @ We have the padded-out word count in r2.  We subtract 2 from it
    317     @ because we don't push the first two arg words on the stack (they're
    318     @ destined for r2/r3).  Pushing them on and popping them off would be
    319     @ simpler but slower.
    320     subs    r2, r2, #2                  @ subtract 2 (for contents of r2/r3)
    321     movmis  r2, #0                      @ if negative, peg at zero, set Z-flag
    322     beq     .Lcopy_done                 @ zero args, skip stack copy
    323 
    324 DBG tst     sp, #7                      @ DEBUG - make sure sp is aligned now
    325 DBG bne     dvmAbort                    @ DEBUG
    326 
    327     @ Set up to copy from r7 to r8.  We copy from the second arg to the
    328     @ last arg, which means reading and writing to ascending addresses.
    329     sub     sp, sp, r2, asl #2          @ sp<- sp - r2*4
    330     bic     sp, #4                      @ subtract another 4 ifn
    331     mov     r7, r9                      @ r7<- argv
    332     mov     r8, sp                      @ r8<- sp
    333 
    334     @ We need to copy words from [r7] to [r8].  We walk forward through
    335     @ the signature again, "copying" pad words when appropriate, storing
    336     @ upward into the stack.
    337     ldr     r6, [r4, #4]                @ r6<- signature
    338     add     r7, r7, #8                  @ r7<- r7+8 (assume argv 0/1 in r2/r3)
    339 
    340     @ Eat first arg or two, for the stuff that goes into r2/r3.
    341     ldrb    ip, [r6], #1                @ ip<- *signature++
    342     cmp     ip, #'D'
    343     cmpne   ip, #'J'
    344     beq     .Lstack_copy_loop           @ 64-bit arg fills r2+r3
    345 
    346     @ First arg was 32-bit, check the next
    347     ldrb    ip, [r6], #1                @ ip<- *signature++
    348     cmp     r6, #'D'
    349     cmpne   r6, #'J'
    350     subeq   r7, #4                      @ r7<- r7-4 (take it back - pad word)
    351     beq     .Lstack_copy_loop2          @ start with char we already have
    352 
    353     @ Two 32-bit args, fall through and start with next arg
    354 
    355 .Lstack_copy_loop:
    356     ldrb    ip, [r6], #1                @ ip<- *signature++
    357 .Lstack_copy_loop2:
    358     cmp     ip, #0                      @ end of shorty?
    359     beq     .Lcopy_done                 @ yes
    360 
    361     cmp     ip, #'D'
    362     cmpne   ip, #'J'
    363     beq     .Lcopy64
    364 
    365     @ Copy a 32-bit value.  [r8] is initially at the end of the stack.  We
    366     @ use "full descending" stacks, so we store into [r8] and incr as we
    367     @ move toward the end of the arg list.
    368 .Lcopy32:
    369     ldr     ip, [r7], #4
    370     str     ip, [r8], #4
    371     b       .Lstack_copy_loop
    372 
    373 .Lcopy64:
    374     @ Copy a 64-bit value.  If necessary, leave a hole in the stack to
    375     @ ensure alignment.  We know the [r8] output area is 64-bit aligned,
    376     @ so we can just mask the address.
    377     add     r8, r8, #7          @ r8<- (r8+7) & ~7
    378     ldr     ip, [r7], #4
    379     bic     r8, r8, #7
    380     ldr     r2, [r7], #4
    381     str     ip, [r8], #4
    382     str     r2, [r8], #4
    383     b       .Lstack_copy_loop
    384 
    385 
    386 
    387 #if 0
    388 
    389 /*
    390  * Spit out a "we were here", preserving all registers.  (The attempt
    391  * to save ip won't work, but we need to save an even number of
    392  * registers for EABI 64-bit stack alignment.)
    393  */
    394      .macro SQUEAK num
    395 common_squeak\num:
    396     stmfd   sp!, {r0, r1, r2, r3, ip, lr}
    397     ldr     r0, strSqueak
    398     mov     r1, #\num
    399     bl      printf
    400 #ifdef __ARM_HAVE_PC_INTERWORK
    401     ldmfd   sp!, {r0, r1, r2, r3, ip, pc}
    402 #else
    403     ldmfd   sp!, {r0, r1, r2, r3, ip, lr}
    404     bx      lr
    405 #endif
    406     .endm
    407 
    408     SQUEAK  0
    409     SQUEAK  1
    410     SQUEAK  2
    411     SQUEAK  3
    412     SQUEAK  4
    413     SQUEAK  5
    414 
    415 strSqueak:
    416     .word   .LstrSqueak
    417 .LstrSqueak:
    418     .asciz  "<%d>"
    419 
    420     .align  2
    421 
    422 #endif
    423 
    424 #endif /*__ARM_EABI__*/
    425