1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /* 17 * JNI method invocation. This is used to call a C/C++ JNI method. The 18 * argument list has to be pushed onto the native stack according to 19 * local calling conventions. 20 * 21 * This version supports the "new" ARM EABI. 22 */ 23 24 #include <machine/cpu-features.h> 25 26 #ifdef __ARM_EABI__ 27 28 #ifdef EXTENDED_EABI_DEBUG 29 # define DBG 30 #else 31 # define DBG @ 32 #endif 33 34 35 /* 36 Function prototype: 37 38 void dvmPlatformInvoke(void* pEnv, ClassObject* clazz, int argInfo, int argc, 39 const u4* argv, const char* signature, void* func, JValue* pReturn) 40 41 The method we are calling has the form: 42 43 return_type func(JNIEnv* pEnv, ClassObject* clazz, ...) 44 -or- 45 return_type func(JNIEnv* pEnv, Object* this, ...) 46 47 We receive a collection of 32-bit values which correspond to arguments from 48 the interpreter (e.g. float occupies one, double occupies two). It's up to 49 us to convert these into local calling conventions. 50 */ 51 52 /* 53 ARM EABI notes: 54 55 r0-r3 hold first 4 args to a method 56 r9 is given special treatment in some situations, but not for us 57 r10 (sl) seems to be generally available 58 r11 (fp) is used by gcc (unless -fomit-frame-pointer is set) 59 r12 (ip) is scratch -- not preserved across method calls 60 r13 (sp) should be managed carefully in case a signal arrives 61 r14 (lr) must be preserved 62 r15 (pc) can be tinkered with directly 63 64 r0 holds returns of <= 4 bytes 65 r0-r1 hold returns of 8 bytes, low word in r0 66 67 Callee must save/restore r4+ (except r12) if it modifies them. 68 69 Stack is "full descending". Only the arguments that don't fit in the first 4 70 registers are placed on the stack. "sp" points at the first stacked argument 71 (i.e. the 5th arg). 72 73 VFP: single-precision results in s0, double-precision results in d0. 74 75 In the EABI, "sp" must be 64-bit aligned on entry to a function, and any 76 64-bit quantities (long long, double) must be 64-bit aligned. This means 77 we have to scan the method signature, identify arguments that must be 78 padded, and fix them up appropriately. 79 */ 80 81 .text 82 .align 2 83 .global dvmPlatformInvoke 84 .type dvmPlatformInvoke, %function 85 86 /* 87 * On entry: 88 * r0 JNIEnv (can be left alone) 89 * r1 clazz (NULL for virtual method calls, non-NULL for static) 90 * r2 arg info 91 * r3 argc (number of 32-bit values in argv) 92 * [sp] argv 93 * [sp,#4] short signature 94 * [sp,#8] func 95 * [sp,#12] pReturn 96 * 97 * For a virtual method call, the "this" reference is in argv[0]. 98 * 99 * argInfo (32-bit int) layout: 100 * SRRRLLLL FFFFFFFF FFFFFFFF FFFFFFFF 101 * 102 * S - if set, do things the hard way (scan the signature) 103 * R - return type enumeration, really only important for hardware FP 104 * L - number of double-words of storage required on stack (0-30 words) 105 * F - pad flag -- if set, write a pad word to the stack 106 * 107 * With this arrangement we can efficiently push up to 24 words of arguments 108 * onto the stack. Anything requiring more than that -- which should happen 109 * rarely to never -- can do the slow signature scan. 110 * 111 * (We could pack the Fs more efficiently -- we know we never push two pads 112 * in a row, and the first word can never be a pad -- but there's really 113 * no need for it.) 114 * 115 * TODO: could reduce register-saving overhead for "fast" case, since we 116 * don't use a couple of registers. Another thought is to rearrange the 117 * arguments such that r0/r1 get passed in on the stack, allowing us to 118 * use r0/r1 freely here and then load them with a single ldm. Might be 119 * faster than saving/restoring other registers so that we can leave r0/r1 120 * undisturbed. 121 * 122 * NOTE: if the called function has more than 4 words of arguments, gdb 123 * will not be able to unwind the stack past this method. The only way 124 * around this is to convince gdb to respect an explicit frame pointer. 125 */ 126 dvmPlatformInvoke: 127 .fnstart 128 @ Save regs. Same style as gcc with "-fomit-frame-pointer" -- we don't 129 @ disturb "fp" in case somebody else wants it. Copy "sp" to r4 and use 130 @ that to access local vars. 131 @ 132 @ On entry to a function, "sp" must be 64-bit aligned. This means 133 @ we have to adjust sp manually if we push an odd number of regs here 134 @ (both here and when exiting). Easier to just push an even number 135 @ of registers. 136 mov ip, sp @ ip<- original stack pointer 137 .save {r4, r5, r6, r7, r8, r9, ip, lr} 138 stmfd sp!, {r4, r5, r6, r7, r8, r9, ip, lr} 139 140 mov r4, ip @ r4<- original stack pointer 141 142 @ Ensure 64-bit alignment. EABI guarantees sp is aligned on entry, make 143 @ sure we're aligned properly now. 144 DBG tst sp, #4 @ 64-bit aligned? 145 DBG bne dvmAbort 146 147 cmp r1, #0 @ Is this a static method? 148 ldr r9, [r4] @ r9<- argv 149 150 @ Not static: set r1 to *argv++ ("this"), and set argc--. 151 @ 152 @ Note the "this" pointer is not included in the method signature. 153 ldreq r1, [r9], #4 154 subeq r3, r3, #1 155 156 @ Do we have arg padding flags in "argInfo"? (just need to check hi bit) 157 teq r2, #0 158 bmi .Lno_arg_info 159 160 /* 161 * "Fast" path. 162 * 163 * Make room on the stack for the arguments and copy them over, 164 * inserting pad words when appropriate. 165 * 166 * Currently: 167 * r0 don't touch 168 * r1 don't touch 169 * r2 arg info 170 * r3 argc 171 * r4 original stack pointer 172 * r5-r8 (available) 173 * r9 argv 174 */ 175 .Lhave_arg_info: 176 @ Expand the stack by the specified amount. We want to extract the 177 @ count of double-words from r2, multiply it by 8, and subtract that 178 @ from the stack pointer. 179 and ip, r2, #0x0f000000 @ ip<- double-words required 180 mov r5, r2, lsr #28 @ r5<- return type 181 sub sp, sp, ip, lsr #21 @ shift right 24, then left 3 182 mov r8, sp @ r8<- sp (arg copy dest) 183 184 @ Stick argv in r7 and advance it past the argv values that will be 185 @ held in r2-r3. It's possible r3 will hold a pad, so check the 186 @ bit in r2. We do this by ignoring the first bit (which would 187 @ indicate a pad in r2) and shifting the second into the carry flag. 188 @ If the carry is set, r3 will hold a pad, so we adjust argv less. 189 @ 190 @ (This is harmless if argc==0) 191 mov r7, r9 192 movs r2, r2, lsr #2 193 addcc r7, r7, #8 @ skip past 2 words, for r2 and r3 194 subcc r3, r3, #2 195 addcs r7, r7, #4 @ skip past 1 word, for r2 196 subcs r3, r3, #1 197 198 .Lfast_copy_loop: 199 @ if (--argc < 0) goto invoke 200 subs r3, r3, #1 201 bmi .Lcopy_done @ NOTE: expects original argv in r9 202 203 .Lfast_copy_loop2: 204 @ Get pad flag into carry bit. If it's set, we don't pull a value 205 @ out of argv. 206 movs r2, r2, lsr #1 207 208 ldrcc ip, [r7], #4 @ ip = *r7++ (pull from argv) 209 strcc ip, [r8], #4 @ *r8++ = ip (write to stack) 210 bcc .Lfast_copy_loop 211 212 DBG movcs ip, #-3 @ DEBUG DEBUG - make pad word obvious 213 DBG strcs ip, [r8] @ DEBUG DEBUG 214 add r8, r8, #4 @ if pad, just advance ip without store 215 b .Lfast_copy_loop2 @ don't adjust argc after writing pad 216 217 218 219 .Lcopy_done: 220 /* 221 * Currently: 222 * r0-r3 args (JNIEnv*, thisOrClass, arg0, arg1) 223 * r4 original saved sp 224 * r5 return type (enum DalvikJniReturnType) 225 * r9 original argv 226 * 227 * The stack copy is complete. Grab the first two words off of argv 228 * and tuck them into r2/r3. If the first arg is 32-bit and the second 229 * arg is 64-bit, then r3 "holds" a pad word and the load is unnecessary 230 * but harmless. 231 * 232 * If there are 0 or 1 arg words in argv, we will be loading uninitialized 233 * data into the registers, but since nothing tries to use it it's also 234 * harmless (assuming argv[0] and argv[1] point to valid memory, which 235 * is a reasonable assumption for Dalvik's interpreted stacks). 236 * 237 */ 238 ldmia r9, {r2-r3} @ r2/r3<- argv[0]/argv[1] 239 240 @ call the method 241 ldr ip, [r4, #8] @ func 242 #ifdef __ARM_HAVE_BLX 243 blx ip 244 #else 245 mov lr, pc 246 bx ip 247 #endif 248 249 @ We're back, result is in r0 or (for long/double) r0-r1. 250 @ 251 @ In theory, we need to use the "return type" arg to figure out what 252 @ we have and how to return it. However, unless we have an FPU, 253 @ all we need to do is copy r0-r1 into the JValue union. 254 @ 255 @ Thought: could redefine DalvikJniReturnType such that single-word 256 @ and double-word values occupy different ranges; simple comparison 257 @ allows us to choose between str and stm. Probably not worthwhile. 258 @ 259 cmp r5, #0 @ DALVIK_JNI_RETURN_VOID? 260 ldrne ip, [r4, #12] @ pReturn 261 stmneia ip, {r0-r1} @ pReturn->j <- r0/r1 262 263 @ Restore the registers we saved and return (restores lr into pc, and 264 @ the initial stack pointer into sp). 265 #ifdef __ARM_HAVE_PC_INTERWORK 266 ldmdb r4, {r4, r5, r6, r7, r8, r9, sp, pc} 267 #else 268 ldmdb r4, {r4, r5, r6, r7, r8, r9, sp, lr} 269 bx lr 270 #endif 271 .fnend 272 273 274 275 /* 276 * "Slow" path. 277 * Walk through the argument list, counting up the number of 32-bit words 278 * required to contain it. Then walk through it a second time, copying 279 * values out to the stack. (We could pre-compute the size to save 280 * ourselves a trip, but we'd have to store that somewhere -- this is 281 * sufficiently unlikely that it's not worthwhile.) 282 * 283 * Try not to make any assumptions about the number of args -- I think 284 * the class file format allows up to 64K words (need to verify that). 285 * 286 * Currently: 287 * r0 don't touch 288 * r1 don't touch 289 * r2 (available) 290 * r3 argc 291 * r4 original stack pointer 292 * r5-r8 (available) 293 * r9 argv 294 */ 295 .Lno_arg_info: 296 mov r5, r2, lsr #28 @ r5<- return type 297 ldr r6, [r4, #4] @ r6<- short signature 298 mov r2, #0 @ r2<- word count, init to zero 299 300 .Lcount_loop: 301 ldrb ip, [r6], #1 @ ip<- *signature++ 302 cmp ip, #0 @ end? 303 beq .Lcount_done @ all done, bail 304 add r2, r2, #1 @ count++ 305 cmp ip, #'D' @ look for 'D' or 'J', which are 64-bit 306 cmpne ip, #'J' 307 bne .Lcount_loop 308 309 @ 64-bit value, insert padding if we're not aligned 310 tst r2, #1 @ odd after initial incr? 311 addne r2, #1 @ no, add 1 more to cover 64 bits 312 addeq r2, #2 @ yes, treat prev as pad, incr 2 now 313 b .Lcount_loop 314 .Lcount_done: 315 316 @ We have the padded-out word count in r2. We subtract 2 from it 317 @ because we don't push the first two arg words on the stack (they're 318 @ destined for r2/r3). Pushing them on and popping them off would be 319 @ simpler but slower. 320 subs r2, r2, #2 @ subtract 2 (for contents of r2/r3) 321 movmis r2, #0 @ if negative, peg at zero, set Z-flag 322 beq .Lcopy_done @ zero args, skip stack copy 323 324 DBG tst sp, #7 @ DEBUG - make sure sp is aligned now 325 DBG bne dvmAbort @ DEBUG 326 327 @ Set up to copy from r7 to r8. We copy from the second arg to the 328 @ last arg, which means reading and writing to ascending addresses. 329 sub sp, sp, r2, asl #2 @ sp<- sp - r2*4 330 bic sp, #4 @ subtract another 4 ifn 331 mov r7, r9 @ r7<- argv 332 mov r8, sp @ r8<- sp 333 334 @ We need to copy words from [r7] to [r8]. We walk forward through 335 @ the signature again, "copying" pad words when appropriate, storing 336 @ upward into the stack. 337 ldr r6, [r4, #4] @ r6<- signature 338 add r7, r7, #8 @ r7<- r7+8 (assume argv 0/1 in r2/r3) 339 340 @ Eat first arg or two, for the stuff that goes into r2/r3. 341 ldrb ip, [r6], #1 @ ip<- *signature++ 342 cmp ip, #'D' 343 cmpne ip, #'J' 344 beq .Lstack_copy_loop @ 64-bit arg fills r2+r3 345 346 @ First arg was 32-bit, check the next 347 ldrb ip, [r6], #1 @ ip<- *signature++ 348 cmp r6, #'D' 349 cmpne r6, #'J' 350 subeq r7, #4 @ r7<- r7-4 (take it back - pad word) 351 beq .Lstack_copy_loop2 @ start with char we already have 352 353 @ Two 32-bit args, fall through and start with next arg 354 355 .Lstack_copy_loop: 356 ldrb ip, [r6], #1 @ ip<- *signature++ 357 .Lstack_copy_loop2: 358 cmp ip, #0 @ end of shorty? 359 beq .Lcopy_done @ yes 360 361 cmp ip, #'D' 362 cmpne ip, #'J' 363 beq .Lcopy64 364 365 @ Copy a 32-bit value. [r8] is initially at the end of the stack. We 366 @ use "full descending" stacks, so we store into [r8] and incr as we 367 @ move toward the end of the arg list. 368 .Lcopy32: 369 ldr ip, [r7], #4 370 str ip, [r8], #4 371 b .Lstack_copy_loop 372 373 .Lcopy64: 374 @ Copy a 64-bit value. If necessary, leave a hole in the stack to 375 @ ensure alignment. We know the [r8] output area is 64-bit aligned, 376 @ so we can just mask the address. 377 add r8, r8, #7 @ r8<- (r8+7) & ~7 378 ldr ip, [r7], #4 379 bic r8, r8, #7 380 ldr r2, [r7], #4 381 str ip, [r8], #4 382 str r2, [r8], #4 383 b .Lstack_copy_loop 384 385 386 387 #if 0 388 389 /* 390 * Spit out a "we were here", preserving all registers. (The attempt 391 * to save ip won't work, but we need to save an even number of 392 * registers for EABI 64-bit stack alignment.) 393 */ 394 .macro SQUEAK num 395 common_squeak\num: 396 stmfd sp!, {r0, r1, r2, r3, ip, lr} 397 ldr r0, strSqueak 398 mov r1, #\num 399 bl printf 400 #ifdef __ARM_HAVE_PC_INTERWORK 401 ldmfd sp!, {r0, r1, r2, r3, ip, pc} 402 #else 403 ldmfd sp!, {r0, r1, r2, r3, ip, lr} 404 bx lr 405 #endif 406 .endm 407 408 SQUEAK 0 409 SQUEAK 1 410 SQUEAK 2 411 SQUEAK 3 412 SQUEAK 4 413 SQUEAK 5 414 415 strSqueak: 416 .word .LstrSqueak 417 .LstrSqueak: 418 .asciz "<%d>" 419 420 .align 2 421 422 #endif 423 424 #endif /*__ARM_EABI__*/ 425