1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * This file contains codegen for the Thumb2 ISA and is intended to be 19 * includes by: 20 * 21 * Codegen-$(TARGET_ARCH_VARIANT).c 22 * 23 */ 24 25 /* 26 * Reserve 6 bytes at the beginning of the trace 27 * +----------------------------+ 28 * | prof count addr (4 bytes) | 29 * +----------------------------+ 30 * | chain cell offset (2 bytes)| 31 * +----------------------------+ 32 * 33 * ...and then code to increment the execution 34 * 35 * For continuous profiling (10 bytes) 36 * ldr r0, [pc-8] @ get prof count addr [4 bytes] 37 * ldr r1, [r0] @ load counter [2 bytes] 38 * add r1, #1 @ increment [2 bytes] 39 * str r1, [r0] @ store [2 bytes] 40 * 41 * For periodic profiling (4 bytes) 42 * call TEMPLATE_PERIODIC_PROFILING 43 * 44 * and return the size (in bytes) of the generated code. 45 */ 46 47 static int genTraceProfileEntry(CompilationUnit *cUnit) 48 { 49 intptr_t addr = (intptr_t)dvmJitNextTraceCounter(); 50 assert(__BYTE_ORDER == __LITTLE_ENDIAN); 51 newLIR1(cUnit, kArm16BitData, addr & 0xffff); 52 newLIR1(cUnit, kArm16BitData, (addr >> 16) & 0xffff); 53 cUnit->chainCellOffsetLIR = 54 (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG); 55 cUnit->headerSize = 6; 56 if ((gDvmJit.profileMode == kTraceProfilingContinuous) || 57 (gDvmJit.profileMode == kTraceProfilingDisabled)) { 58 /* Thumb[2] instruction used directly here to ensure correct size */ 59 newLIR2(cUnit, kThumb2LdrPcReln12, r0, 8); 60 newLIR3(cUnit, kThumbLdrRRI5, r1, r0, 0); 61 newLIR2(cUnit, kThumbAddRI8, r1, 1); 62 newLIR3(cUnit, kThumbStrRRI5, r1, r0, 0); 63 return 10; 64 } else { 65 int opcode = TEMPLATE_PERIODIC_PROFILING; 66 newLIR2(cUnit, kThumbBlx1, 67 (int) gDvmJit.codeCache + templateEntryOffsets[opcode], 68 (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); 69 newLIR2(cUnit, kThumbBlx2, 70 (int) gDvmJit.codeCache + templateEntryOffsets[opcode], 71 (int) gDvmJit.codeCache + templateEntryOffsets[opcode]); 72 return 4; 73 } 74 } 75 76 static void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest, 77 RegLocation rlSrc) 78 { 79 RegLocation rlResult; 80 rlSrc = loadValue(cUnit, rlSrc, kFPReg); 81 rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); 82 newLIR2(cUnit, kThumb2Vnegs, rlResult.lowReg, rlSrc.lowReg); 83 storeValue(cUnit, rlDest, rlResult); 84 } 85 86 static void genNegDouble(CompilationUnit *cUnit, RegLocation rlDest, 87 RegLocation rlSrc) 88 { 89 RegLocation rlResult; 90 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg); 91 rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); 92 newLIR2(cUnit, kThumb2Vnegd, S2D(rlResult.lowReg, rlResult.highReg), 93 S2D(rlSrc.lowReg, rlSrc.highReg)); 94 storeValueWide(cUnit, rlDest, rlResult); 95 } 96 97 /* 98 * To avoid possible conflicts, we use a lot of temps here. Note that 99 * our usage of Thumb2 instruction forms avoids the problems with register 100 * reuse for multiply instructions prior to arm6. 101 */ 102 static void genMulLong(CompilationUnit *cUnit, RegLocation rlDest, 103 RegLocation rlSrc1, RegLocation rlSrc2) 104 { 105 RegLocation rlResult; 106 int resLo = dvmCompilerAllocTemp(cUnit); 107 int resHi = dvmCompilerAllocTemp(cUnit); 108 int tmp1 = dvmCompilerAllocTemp(cUnit); 109 110 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); 111 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); 112 113 newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg); 114 newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg); 115 newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1); 116 newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0); 117 dvmCompilerFreeTemp(cUnit, tmp1); 118 119 rlResult = dvmCompilerGetReturnWide(cUnit); // Just as a template, will patch 120 rlResult.lowReg = resLo; 121 rlResult.highReg = resHi; 122 storeValueWide(cUnit, rlDest, rlResult); 123 } 124 125 static void genLong3Addr(CompilationUnit *cUnit, MIR *mir, OpKind firstOp, 126 OpKind secondOp, RegLocation rlDest, 127 RegLocation rlSrc1, RegLocation rlSrc2) 128 { 129 RegLocation rlResult; 130 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); 131 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); 132 rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); 133 opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); 134 opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg, 135 rlSrc2.highReg); 136 storeValueWide(cUnit, rlDest, rlResult); 137 } 138 139 void dvmCompilerInitializeRegAlloc(CompilationUnit *cUnit) 140 { 141 int numTemps = sizeof(coreTemps)/sizeof(int); 142 int numFPTemps = sizeof(fpTemps)/sizeof(int); 143 RegisterPool *pool = (RegisterPool *)dvmCompilerNew(sizeof(*pool), true); 144 cUnit->regPool = pool; 145 pool->numCoreTemps = numTemps; 146 pool->coreTemps = (RegisterInfo *) 147 dvmCompilerNew(numTemps * sizeof(*cUnit->regPool->coreTemps), true); 148 pool->numFPTemps = numFPTemps; 149 pool->FPTemps = (RegisterInfo *) 150 dvmCompilerNew(numFPTemps * sizeof(*cUnit->regPool->FPTemps), true); 151 dvmCompilerInitPool(pool->coreTemps, coreTemps, pool->numCoreTemps); 152 dvmCompilerInitPool(pool->FPTemps, fpTemps, pool->numFPTemps); 153 pool->nullCheckedRegs = 154 dvmCompilerAllocBitVector(cUnit->numSSARegs, false); 155 } 156 157 /* 158 * Generate a Thumb2 IT instruction, which can nullify up to 159 * four subsequent instructions based on a condition and its 160 * inverse. The condition applies to the first instruction, which 161 * is executed if the condition is met. The string "guide" consists 162 * of 0 to 3 chars, and applies to the 2nd through 4th instruction. 163 * A "T" means the instruction is executed if the condition is 164 * met, and an "E" means the instruction is executed if the condition 165 * is not met. 166 */ 167 static ArmLIR *genIT(CompilationUnit *cUnit, ArmConditionCode code, 168 const char *guide) 169 { 170 int mask; 171 int condBit = code & 1; 172 int altBit = condBit ^ 1; 173 int mask3 = 0; 174 int mask2 = 0; 175 int mask1 = 0; 176 177 //Note: case fallthroughs intentional 178 switch(strlen(guide)) { 179 case 3: 180 mask1 = (guide[2] == 'T') ? condBit : altBit; 181 case 2: 182 mask2 = (guide[1] == 'T') ? condBit : altBit; 183 case 1: 184 mask3 = (guide[0] == 'T') ? condBit : altBit; 185 break; 186 case 0: 187 break; 188 default: 189 LOGE("Jit: bad case in genIT"); 190 dvmCompilerAbort(cUnit); 191 } 192 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) | 193 (1 << (3 - strlen(guide))); 194 return newLIR2(cUnit, kThumb2It, code, mask); 195 } 196 197 /* Export the Dalvik PC assicated with an instruction to the StackSave area */ 198 static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir) 199 { 200 ArmLIR *res; 201 int offset = offsetof(StackSaveArea, xtra.currentPc); 202 int rDPC = dvmCompilerAllocTemp(cUnit); 203 res = loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset)); 204 newLIR3(cUnit, kThumb2StrRRI8Predec, rDPC, r5FP, 205 sizeof(StackSaveArea) - offset); 206 dvmCompilerFreeTemp(cUnit, rDPC); 207 return res; 208 } 209 210 /* 211 * Handle simple case (thin lock) inline. If it's complicated, bail 212 * out to the heavyweight lock/unlock routines. We'll use dedicated 213 * registers here in order to be in the right position in case we 214 * to bail to dvm[Lock/Unlock]Object(self, object) 215 * 216 * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object 217 * r1 -> object [arg1 for dvm[Lock/Unlock]Object 218 * r2 -> intial contents of object->lock, later result of strex 219 * r3 -> self->threadId 220 * r7 -> temp to hold new lock value [unlock only] 221 * r4 -> allow to be used by utilities as general temp 222 * 223 * The result of the strex is 0 if we acquire the lock. 224 * 225 * See comments in Sync.c for the layout of the lock word. 226 * Of particular interest to this code is the test for the 227 * simple case - which we handle inline. For monitor enter, the 228 * simple case is thin lock, held by no-one. For monitor exit, 229 * the simple case is thin lock, held by the unlocking thread with 230 * a recurse count of 0. 231 * 232 * A minor complication is that there is a field in the lock word 233 * unrelated to locking: the hash state. This field must be ignored, but 234 * preserved. 235 * 236 */ 237 static void genMonitorEnter(CompilationUnit *cUnit, MIR *mir) 238 { 239 RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); 240 ArmLIR *target; 241 ArmLIR *hopTarget; 242 ArmLIR *branch; 243 ArmLIR *hopBranch; 244 245 assert(LW_SHAPE_THIN == 0); 246 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj 247 dvmCompilerLockAllTemps(cUnit); // Prepare for explicit register usage 248 dvmCompilerFreeTemp(cUnit, r4PC); // Free up r4 for general use 249 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL); 250 loadWordDisp(cUnit, r6SELF, offsetof(Thread, threadId), r3); // Get threadId 251 newLIR3(cUnit, kThumb2Ldrex, r2, r1, 252 offsetof(Object, lock) >> 2); // Get object->lock 253 opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner 254 // Is lock unheld on lock or held by us (==threadId) on unlock? 255 newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, LW_LOCK_OWNER_SHIFT - 1); 256 newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT, 257 LW_LOCK_OWNER_SHIFT - 1); 258 hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0); 259 newLIR4(cUnit, kThumb2Strex, r2, r3, r1, offsetof(Object, lock) >> 2); 260 dvmCompilerGenMemBarrier(cUnit, kSY); 261 branch = newLIR2(cUnit, kThumb2Cbz, r2, 0); 262 263 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel); 264 hopTarget->defMask = ENCODE_ALL; 265 hopBranch->generic.target = (LIR *)hopTarget; 266 267 // Export PC (part 1) 268 loadConstant(cUnit, r3, (int) (cUnit->method->insns + mir->offset)); 269 270 /* Get dPC of next insn */ 271 loadConstant(cUnit, r4PC, (int)(cUnit->method->insns + mir->offset + 272 dexGetWidthFromOpcode(OP_MONITOR_ENTER))); 273 // Export PC (part 2) 274 newLIR3(cUnit, kThumb2StrRRI8Predec, r3, r5FP, 275 sizeof(StackSaveArea) - 276 offsetof(StackSaveArea, xtra.currentPc)); 277 /* Call template, and don't return */ 278 genRegCopy(cUnit, r0, r6SELF); 279 genDispatchToHandler(cUnit, TEMPLATE_MONITOR_ENTER); 280 // Resume here 281 target = newLIR0(cUnit, kArmPseudoTargetLabel); 282 target->defMask = ENCODE_ALL; 283 branch->generic.target = (LIR *)target; 284 } 285 286 /* 287 * For monitor unlock, we don't have to use ldrex/strex. Once 288 * we've determined that the lock is thin and that we own it with 289 * a zero recursion count, it's safe to punch it back to the 290 * initial, unlock thin state with a store word. 291 */ 292 static void genMonitorExit(CompilationUnit *cUnit, MIR *mir) 293 { 294 RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); 295 ArmLIR *target; 296 ArmLIR *branch; 297 ArmLIR *hopTarget; 298 ArmLIR *hopBranch; 299 300 assert(LW_SHAPE_THIN == 0); 301 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj 302 dvmCompilerLockAllTemps(cUnit); // Prepare for explicit register usage 303 dvmCompilerFreeTemp(cUnit, r4PC); // Free up r4 for general use 304 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL); 305 loadWordDisp(cUnit, r1, offsetof(Object, lock), r2); // Get object->lock 306 loadWordDisp(cUnit, r6SELF, offsetof(Thread, threadId), r3); // Get threadId 307 // Is lock unheld on lock or held by us (==threadId) on unlock? 308 opRegRegImm(cUnit, kOpAnd, r7, r2, 309 (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); 310 opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner 311 newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT, 312 LW_LOCK_OWNER_SHIFT - 1); 313 opRegReg(cUnit, kOpSub, r2, r3); 314 hopBranch = opCondBranch(cUnit, kArmCondNe); 315 dvmCompilerGenMemBarrier(cUnit, kSY); 316 storeWordDisp(cUnit, r1, offsetof(Object, lock), r7); 317 branch = opNone(cUnit, kOpUncondBr); 318 319 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel); 320 hopTarget->defMask = ENCODE_ALL; 321 hopBranch->generic.target = (LIR *)hopTarget; 322 323 // Export PC (part 1) 324 loadConstant(cUnit, r3, (int) (cUnit->method->insns + mir->offset)); 325 326 LOAD_FUNC_ADDR(cUnit, r7, (int)dvmUnlockObject); 327 genRegCopy(cUnit, r0, r6SELF); 328 // Export PC (part 2) 329 newLIR3(cUnit, kThumb2StrRRI8Predec, r3, r5FP, 330 sizeof(StackSaveArea) - 331 offsetof(StackSaveArea, xtra.currentPc)); 332 opReg(cUnit, kOpBlx, r7); 333 /* Did we throw? */ 334 ArmLIR *branchOver = genCmpImmBranch(cUnit, kArmCondNe, r0, 0); 335 loadConstant(cUnit, r0, 336 (int) (cUnit->method->insns + mir->offset + 337 dexGetWidthFromOpcode(OP_MONITOR_EXIT))); 338 genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON); 339 340 // Resume here 341 target = newLIR0(cUnit, kArmPseudoTargetLabel); 342 target->defMask = ENCODE_ALL; 343 branch->generic.target = (LIR *)target; 344 branchOver->generic.target = (LIR *) target; 345 } 346 347 static void genMonitor(CompilationUnit *cUnit, MIR *mir) 348 { 349 if (mir->dalvikInsn.opcode == OP_MONITOR_ENTER) 350 genMonitorEnter(cUnit, mir); 351 else 352 genMonitorExit(cUnit, mir); 353 } 354 355 /* 356 * 64-bit 3way compare function. 357 * mov r7, #-1 358 * cmp op1hi, op2hi 359 * blt done 360 * bgt flip 361 * sub r7, op1lo, op2lo (treat as unsigned) 362 * beq done 363 * ite hi 364 * mov(hi) r7, #-1 365 * mov(!hi) r7, #1 366 * flip: 367 * neg r7 368 * done: 369 */ 370 static void genCmpLong(CompilationUnit *cUnit, MIR *mir, 371 RegLocation rlDest, RegLocation rlSrc1, 372 RegLocation rlSrc2) 373 { 374 RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change 375 ArmLIR *target1; 376 ArmLIR *target2; 377 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); 378 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); 379 rlTemp.lowReg = dvmCompilerAllocTemp(cUnit); 380 loadConstant(cUnit, rlTemp.lowReg, -1); 381 opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg); 382 ArmLIR *branch1 = opCondBranch(cUnit, kArmCondLt); 383 ArmLIR *branch2 = opCondBranch(cUnit, kArmCondGt); 384 opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); 385 ArmLIR *branch3 = opCondBranch(cUnit, kArmCondEq); 386 387 genIT(cUnit, kArmCondHi, "E"); 388 newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1)); 389 loadConstant(cUnit, rlTemp.lowReg, 1); 390 genBarrier(cUnit); 391 392 target2 = newLIR0(cUnit, kArmPseudoTargetLabel); 393 target2->defMask = -1; 394 opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg); 395 396 target1 = newLIR0(cUnit, kArmPseudoTargetLabel); 397 target1->defMask = -1; 398 399 storeValue(cUnit, rlDest, rlTemp); 400 401 branch1->generic.target = (LIR *)target1; 402 branch2->generic.target = (LIR *)target2; 403 branch3->generic.target = branch1->generic.target; 404 } 405 406 static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir) 407 { 408 RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0); 409 RegLocation rlDest = inlinedTarget(cUnit, mir, true); 410 rlSrc = loadValue(cUnit, rlSrc, kFPReg); 411 RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); 412 newLIR2(cUnit, kThumb2Vabss, rlResult.lowReg, rlSrc.lowReg); 413 storeValue(cUnit, rlDest, rlResult); 414 return false; 415 } 416 417 static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir) 418 { 419 RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1); 420 RegLocation rlDest = inlinedTargetWide(cUnit, mir, true); 421 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg); 422 RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true); 423 newLIR2(cUnit, kThumb2Vabsd, S2D(rlResult.lowReg, rlResult.highReg), 424 S2D(rlSrc.lowReg, rlSrc.highReg)); 425 storeValueWide(cUnit, rlDest, rlResult); 426 return false; 427 } 428 429 static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin) 430 { 431 RegLocation rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0); 432 RegLocation rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1); 433 rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg); 434 rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg); 435 RegLocation rlDest = inlinedTarget(cUnit, mir, false); 436 RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); 437 opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg); 438 genIT(cUnit, (isMin) ? kArmCondGt : kArmCondLt, "E"); 439 opRegReg(cUnit, kOpMov, rlResult.lowReg, rlSrc2.lowReg); 440 opRegReg(cUnit, kOpMov, rlResult.lowReg, rlSrc1.lowReg); 441 genBarrier(cUnit); 442 storeValue(cUnit, rlDest, rlResult); 443 return false; 444 } 445 446 static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit, 447 RegLocation rlSrc, RegLocation rlResult, int lit, 448 int firstBit, int secondBit) 449 { 450 opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg, 451 encodeShift(kArmLsl, secondBit - firstBit)); 452 if (firstBit != 0) { 453 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit); 454 } 455 } 456