1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <sys/mman.h> 17 #include "Dalvik.h" 18 #include "libdex/DexOpcodes.h" 19 #include "compiler/Compiler.h" 20 #include "compiler/CompilerIR.h" 21 #include "interp/Jit.h" 22 #include "libdex/DexFile.h" 23 #include "Lower.h" 24 #include "NcgAot.h" 25 #include "compiler/codegen/CompilerCodegen.h" 26 27 /* Init values when a predicted chain is initially assembled */ 28 /* E7FE is branch to self */ 29 #define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe 30 31 /* Target-specific save/restore */ 32 extern "C" void dvmJitCalleeSave(double *saveArea); 33 extern "C" void dvmJitCalleeRestore(double *saveArea); 34 35 /* 36 * Determine the initial instruction set to be used for this trace. 37 * Later components may decide to change this. 38 */ 39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit) 40 JitInstructionSetType dvmCompilerInstructionSet(void) 41 { 42 return DALVIK_JIT_IA32; 43 } 44 45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet() 46 { 47 return DALVIK_JIT_IA32; 48 } 49 50 /* we don't use template for IA32 */ 51 void *dvmCompilerGetInterpretTemplate() 52 { 53 return NULL; 54 } 55 56 /* Track the number of times that the code cache is patched */ 57 #if defined(WITH_JIT_TUNING) 58 #define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++) 59 #else 60 #define UPDATE_CODE_CACHE_PATCHES() 61 #endif 62 63 bool dvmCompilerArchInit() { 64 /* Target-specific configuration */ 65 gDvmJit.jitTableSize = 1 << 12; 66 gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1; 67 if (gDvmJit.threshold == 0) { 68 gDvmJit.threshold = 255; 69 } 70 gDvmJit.codeCacheSize = 512*1024; 71 gDvmJit.optLevel = kJitOptLevelO1; 72 73 //Disable Method-JIT 74 gDvmJit.disableOpt |= (1 << kMethodJit); 75 76 #if defined(WITH_SELF_VERIFICATION) 77 /* Force into blocking mode */ 78 gDvmJit.blockingMode = true; 79 gDvm.nativeDebuggerActive = true; 80 #endif 81 82 // Make sure all threads have current values 83 dvmJitUpdateThreadStateAll(); 84 85 return true; 86 } 87 88 void dvmCompilerPatchInlineCache(void) 89 { 90 int i; 91 PredictedChainingCell *minAddr, *maxAddr; 92 93 /* Nothing to be done */ 94 if (gDvmJit.compilerICPatchIndex == 0) return; 95 96 /* 97 * Since all threads are already stopped we don't really need to acquire 98 * the lock. But race condition can be easily introduced in the future w/o 99 * paying attention so we still acquire the lock here. 100 */ 101 dvmLockMutex(&gDvmJit.compilerICPatchLock); 102 103 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 104 105 //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex); 106 107 /* Initialize the min/max address range */ 108 minAddr = (PredictedChainingCell *) 109 ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize); 110 maxAddr = (PredictedChainingCell *) gDvmJit.codeCache; 111 112 for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) { 113 ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i]; 114 PredictedChainingCell *cellAddr = workOrder->cellAddr; 115 PredictedChainingCell *cellContent = &workOrder->cellContent; 116 ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor, 117 workOrder->classLoader); 118 119 assert(clazz->serialNumber == workOrder->serialNumber); 120 121 /* Use the newly resolved clazz pointer */ 122 cellContent->clazz = clazz; 123 124 if (cellAddr->clazz == NULL) { 125 COMPILER_TRACE_CHAINING( 126 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized", 127 cellAddr, 128 cellContent->clazz->descriptor, 129 cellContent->method->name)); 130 } else { 131 COMPILER_TRACE_CHAINING( 132 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) " 133 "patched", 134 cellAddr, 135 cellAddr->clazz->descriptor, 136 cellContent->clazz->descriptor, 137 cellContent->method->name)); 138 } 139 140 /* Patch the chaining cell */ 141 *cellAddr = *cellContent; 142 minAddr = (cellAddr < minAddr) ? cellAddr : minAddr; 143 maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr; 144 } 145 146 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 147 148 gDvmJit.compilerICPatchIndex = 0; 149 dvmUnlockMutex(&gDvmJit.compilerICPatchLock); 150 } 151 152 /* Target-specific cache clearing */ 153 void dvmCompilerCacheClear(char *start, size_t size) 154 { 155 /* "0xFF 0xFF" is an invalid opcode for x86. */ 156 memset(start, 0xFF, size); 157 } 158 159 /* for JIT debugging, to be implemented */ 160 void dvmJitCalleeSave(double *saveArea) { 161 } 162 163 void dvmJitCalleeRestore(double *saveArea) { 164 } 165 166 void dvmJitToInterpSingleStep() { 167 } 168 169 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc, 170 const JitEntry *knownEntry) { 171 return NULL; 172 } 173 174 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c 175 { 176 } 177 178 void dvmCompilerArchDump(void) 179 { 180 } 181 182 char *getTraceBase(const JitEntry *p) 183 { 184 return NULL; 185 } 186 187 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info) 188 { 189 } 190 191 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress) 192 { 193 } 194 195 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit) 196 { 197 // Method-based JIT not supported for x86. 198 } 199 200 void dvmJitScanAllClassPointers(void (*callback)(void *)) 201 { 202 } 203 204 /* Handy function to retrieve the profile count */ 205 static inline int getProfileCount(const JitEntry *entry) 206 { 207 if (entry->dPC == 0 || entry->codeAddress == 0) 208 return 0; 209 u4 *pExecutionCount = (u4 *) getTraceBase(entry); 210 211 return pExecutionCount ? *pExecutionCount : 0; 212 } 213 214 /* qsort callback function */ 215 static int sortTraceProfileCount(const void *entry1, const void *entry2) 216 { 217 const JitEntry *jitEntry1 = (const JitEntry *)entry1; 218 const JitEntry *jitEntry2 = (const JitEntry *)entry2; 219 220 JitTraceCounter_t count1 = getProfileCount(jitEntry1); 221 JitTraceCounter_t count2 = getProfileCount(jitEntry2); 222 return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1); 223 } 224 225 /* Sort the trace profile counts and dump them */ 226 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c 227 { 228 JitEntry *sortedEntries; 229 int numTraces = 0; 230 unsigned long counts = 0; 231 unsigned int i; 232 233 /* Make sure that the table is not changing */ 234 dvmLockMutex(&gDvmJit.tableLock); 235 236 /* Sort the entries by descending order */ 237 sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize); 238 if (sortedEntries == NULL) 239 goto done; 240 memcpy(sortedEntries, gDvmJit.pJitEntryTable, 241 sizeof(JitEntry) * gDvmJit.jitTableSize); 242 qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry), 243 sortTraceProfileCount); 244 245 /* Dump the sorted entries */ 246 for (i=0; i < gDvmJit.jitTableSize; i++) { 247 if (sortedEntries[i].dPC != 0) { 248 numTraces++; 249 } 250 } 251 if (numTraces == 0) 252 numTraces = 1; 253 ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces)); 254 255 free(sortedEntries); 256 done: 257 dvmUnlockMutex(&gDvmJit.tableLock); 258 return; 259 } 260 261 void jumpWithRelOffset(char* instAddr, int relOffset) { 262 stream = instAddr; 263 OpndSize immSize = estOpndSizeFromImm(relOffset); 264 relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); 265 dump_imm(Mnemonic_JMP, immSize, relOffset); 266 } 267 268 // works whether instructions for target basic block are generated or not 269 LowOp* jumpToBasicBlock(char* instAddr, int targetId) { 270 stream = instAddr; 271 bool unknown; 272 OpndSize size; 273 int relativeNCG = targetId; 274 relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size); 275 unconditional_jump_int(relativeNCG, size); 276 return NULL; 277 } 278 279 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) { 280 stream = instAddr; 281 bool unknown; 282 OpndSize size; 283 int relativeNCG = targetId; 284 relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size); 285 conditional_jump_int(cc, relativeNCG, size); 286 return NULL; 287 } 288 289 /* 290 * Attempt to enqueue a work order to patch an inline cache for a predicted 291 * chaining cell for virtual/interface calls. 292 */ 293 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr, 294 PredictedChainingCell *newContent) 295 { 296 bool result = true; 297 298 /* 299 * Make sure only one thread gets here since updating the cell (ie fast 300 * path and queueing the request (ie the queued path) have to be done 301 * in an atomic fashion. 302 */ 303 dvmLockMutex(&gDvmJit.compilerICPatchLock); 304 305 /* Fast path for uninitialized chaining cell */ 306 if (cellAddr->clazz == NULL && 307 cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) { 308 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 309 310 cellAddr->method = newContent->method; 311 cellAddr->branch = newContent->branch; 312 cellAddr->branch2 = newContent->branch2; 313 314 /* 315 * The update order matters - make sure clazz is updated last since it 316 * will bring the uninitialized chaining cell to life. 317 */ 318 android_atomic_release_store((int32_t)newContent->clazz, 319 (volatile int32_t *)(void*) &cellAddr->clazz); 320 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); 321 UPDATE_CODE_CACHE_PATCHES(); 322 323 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 324 325 #if 0 326 MEM_BARRIER(); 327 cellAddr->clazz = newContent->clazz; 328 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); 329 #endif 330 #if defined(WITH_JIT_TUNING) 331 gDvmJit.icPatchInit++; 332 #endif 333 COMPILER_TRACE_CHAINING( 334 ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p", 335 cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method)); 336 /* Check if this is a frequently missed clazz */ 337 } else if (cellAddr->stagedClazz != newContent->clazz) { 338 /* Not proven to be frequent yet - build up the filter cache */ 339 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 340 341 cellAddr->stagedClazz = newContent->clazz; 342 343 UPDATE_CODE_CACHE_PATCHES(); 344 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 345 346 #if defined(WITH_JIT_TUNING) 347 gDvmJit.icPatchRejected++; 348 #endif 349 /* 350 * Different classes but same method implementation - it is safe to just 351 * patch the class value without the need to stop the world. 352 */ 353 } else if (cellAddr->method == newContent->method) { 354 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 355 356 cellAddr->clazz = newContent->clazz; 357 /* No need to flush the cache here since the branch is not patched */ 358 UPDATE_CODE_CACHE_PATCHES(); 359 360 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 361 362 #if defined(WITH_JIT_TUNING) 363 gDvmJit.icPatchLockFree++; 364 #endif 365 /* 366 * Cannot patch the chaining cell inline - queue it until the next safe 367 * point. 368 */ 369 } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) { 370 int index = gDvmJit.compilerICPatchIndex++; 371 const ClassObject *clazz = newContent->clazz; 372 373 gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr; 374 gDvmJit.compilerICPatchQueue[index].cellContent = *newContent; 375 gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor; 376 gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader; 377 /* For verification purpose only */ 378 gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber; 379 380 #if defined(WITH_JIT_TUNING) 381 gDvmJit.icPatchQueued++; 382 #endif 383 COMPILER_TRACE_CHAINING( 384 ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s", 385 cellAddr, newContent->clazz->descriptor, newContent->method->name)); 386 } else { 387 /* Queue is full - just drop this patch request */ 388 #if defined(WITH_JIT_TUNING) 389 gDvmJit.icPatchDropped++; 390 #endif 391 392 COMPILER_TRACE_CHAINING( 393 ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s", 394 cellAddr, newContent->clazz->descriptor, newContent->method->name)); 395 } 396 397 dvmUnlockMutex(&gDvmJit.compilerICPatchLock); 398 return result; 399 } 400 401 /* 402 * This method is called from the invoke templates for virtual and interface 403 * methods to speculatively setup a chain to the callee. The templates are 404 * written in assembly and have setup method, cell, and clazz at r0, r2, and 405 * r3 respectively, so there is a unused argument in the list. Upon return one 406 * of the following three results may happen: 407 * 1) Chain is not setup because the callee is native. Reset the rechain 408 * count to a big number so that it will take a long time before the next 409 * rechain attempt to happen. 410 * 2) Chain is not setup because the callee has not been created yet. Reset 411 * the rechain count to a small number and retry in the near future. 412 * 3) Ask all other threads to stop before patching this chaining cell. 413 * This is required because another thread may have passed the class check 414 * but hasn't reached the chaining cell yet to follow the chain. If we 415 * patch the content before halting the other thread, there could be a 416 * small window for race conditions to happen that it may follow the new 417 * but wrong chain to invoke a different method. 418 */ 419 const Method *dvmJitToPatchPredictedChain(const Method *method, 420 Thread *self, 421 PredictedChainingCell *cell, 422 const ClassObject *clazz) 423 { 424 int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN; 425 /* Don't come back here for a long time if the method is native */ 426 if (dvmIsNativeMethod(method)) { 427 UNPROTECT_CODE_CACHE(cell, sizeof(*cell)); 428 429 /* 430 * Put a non-zero/bogus value in the clazz field so that it won't 431 * trigger immediate patching and will continue to fail to match with 432 * a real clazz pointer. 433 */ 434 cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ; 435 436 UPDATE_CODE_CACHE_PATCHES(); 437 PROTECT_CODE_CACHE(cell, sizeof(*cell)); 438 COMPILER_TRACE_CHAINING( 439 ALOGI("Jit Runtime: predicted chain %p to native method %s ignored", 440 cell, method->name)); 441 goto done; 442 } 443 { 444 int tgtAddr = (int) dvmJitGetTraceAddr(method->insns); 445 446 /* 447 * Compilation not made yet for the callee. Reset the counter to a small 448 * value and come back to check soon. 449 */ 450 if ((tgtAddr == 0) || 451 ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) { 452 COMPILER_TRACE_CHAINING( 453 ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed", 454 cell, method->clazz->descriptor, method->name)); 455 goto done; 456 } 457 458 PredictedChainingCell newCell; 459 460 if (cell->clazz == NULL) { 461 newRechainCount = self->icRechainCount; 462 } 463 464 int relOffset = (int) tgtAddr - (int)cell; 465 OpndSize immSize = estOpndSizeFromImm(relOffset); 466 int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond); 467 relOffset -= jumpSize; 468 COMPILER_TRACE_CHAINING( 469 ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d", 470 cell, method->clazz->descriptor, method->name, jumpSize)); 471 //can't use stream here since it is used by the compilation thread 472 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch 473 474 newCell.clazz = clazz; 475 newCell.method = method; 476 477 /* 478 * Enter the work order to the queue and the chaining cell will be patched 479 * the next time a safe point is entered. 480 * 481 * If the enqueuing fails reset the rechain count to a normal value so that 482 * it won't get indefinitely delayed. 483 */ 484 inlineCachePatchEnqueue(cell, &newCell); 485 } 486 done: 487 self->icRechainCount = newRechainCount; 488 return method; 489 } 490 491 /* 492 * Unchain a trace given the starting address of the translation 493 * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR. 494 * For ARM, it returns the address following the last cell unchained. 495 * For IA, it returns NULL since cacheflush is not required for IA. 496 */ 497 u4* dvmJitUnchain(void* codeAddr) 498 { 499 /* codeAddr is 4-byte aligned, so is chain cell count offset */ 500 u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4); 501 u2 chainCellCountOffset = *pChainCellCountOffset; 502 /* chain cell counts information is 4-byte aligned */ 503 ChainCellCounts *pChainCellCounts = 504 (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset); 505 u2* pChainCellOffset = (u2*)((char*)codeAddr - 2); 506 u2 chainCellOffset = *pChainCellOffset; 507 u1* pChainCells; 508 int i,j; 509 PredictedChainingCell *predChainCell; 510 int padding; 511 512 /* Locate the beginning of the chain cell region */ 513 pChainCells = (u1 *)((char*)codeAddr + chainCellOffset); 514 515 /* The cells are sorted in order - walk through them and reset */ 516 for (i = 0; i < kChainingCellGap; i++) { 517 /* for hot, normal, singleton chaining: 518 nop //padding. 519 jmp 0 520 mov imm32, reg1 521 mov imm32, reg2 522 call reg2 523 after chaining: 524 nop 525 jmp imm 526 mov imm32, reg1 527 mov imm32, reg2 528 call reg2 529 after unchaining: 530 nop 531 jmp 0 532 mov imm32, reg1 533 mov imm32, reg2 534 call reg2 535 Space occupied by the chaining cell in bytes: nop is for padding, 536 jump 0, the target 0 is 4 bytes aligned. 537 Space for predicted chaining: 5 words = 20 bytes 538 */ 539 int elemSize = 0; 540 if (i == kChainingCellInvokePredicted) { 541 elemSize = 20; 542 } 543 COMPILER_TRACE_CHAINING( 544 ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i])); 545 546 for (j = 0; j < pChainCellCounts->u.count[i]; j++) { 547 switch(i) { 548 case kChainingCellNormal: 549 case kChainingCellHot: 550 case kChainingCellInvokeSingleton: 551 case kChainingCellBackwardBranch: 552 COMPILER_TRACE_CHAINING( 553 ALOGI("Jit Runtime: unchaining of normal, hot, or singleton")); 554 pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03)); 555 elemSize = 4+5+5+2; 556 memset(pChainCells, 0, 4); 557 break; 558 case kChainingCellInvokePredicted: 559 COMPILER_TRACE_CHAINING( 560 ALOGI("Jit Runtime: unchaining of predicted")); 561 /* 4-byte aligned */ 562 padding = (4 - ((u4)pChainCells & 3)) & 3; 563 pChainCells += padding; 564 predChainCell = (PredictedChainingCell *) pChainCells; 565 /* 566 * There could be a race on another mutator thread to use 567 * this particular predicted cell and the check has passed 568 * the clazz comparison. So we cannot safely wipe the 569 * method and branch but it is safe to clear the clazz, 570 * which serves as the key. 571 */ 572 predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT; 573 break; 574 default: 575 ALOGE("Unexpected chaining type: %d", i); 576 dvmAbort(); // dvmAbort OK here - can't safely recover 577 } 578 COMPILER_TRACE_CHAINING( 579 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells)); 580 pChainCells += elemSize; /* Advance by a fixed number of bytes */ 581 } 582 } 583 return NULL; 584 } 585 586 /* Unchain all translation in the cache. */ 587 void dvmJitUnchainAll() 588 { 589 ALOGV("Jit Runtime: unchaining all"); 590 if (gDvmJit.pJitEntryTable != NULL) { 591 COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all")); 592 dvmLockMutex(&gDvmJit.tableLock); 593 594 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 595 596 for (size_t i = 0; i < gDvmJit.jitTableSize; i++) { 597 if (gDvmJit.pJitEntryTable[i].dPC && 598 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry && 599 gDvmJit.pJitEntryTable[i].codeAddress) { 600 dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress); 601 } 602 } 603 604 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 605 606 dvmUnlockMutex(&gDvmJit.tableLock); 607 gDvmJit.translationChains = 0; 608 } 609 gDvmJit.hasNewChain = false; 610 } 611 612 #define P_GPR_1 PhysicalReg_EBX 613 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/ 614 static void insertJumpHelp() 615 { 616 int rem = (uint)stream % 4; 617 int nop_size = 3 - rem; 618 dump_nop(nop_size); 619 unconditional_jump_int(0, OpndSize_32); 620 return; 621 } 622 623 /* Chaining cell for code that may need warmup. */ 624 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?) 625 blx r0 626 data 0xb23a //bytecode address: 0x5115b23a 627 data 0x5115 628 IA32 assembly: 629 jmp 0 //5 bytes 630 movl address, %ebx 631 movl dvmJitToInterpNormal, %eax 632 call %eax 633 <-- return address 634 */ 635 static void handleNormalChainingCell(CompilationUnit *cUnit, 636 unsigned int offset, int blockId, LowOpBlockLabel* labelList) 637 { 638 ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x", 639 cUnit->method->name, blockId, offset, stream - streamMethodStart); 640 if(dump_x86_inst) 641 ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p", 642 offset, stream - streamMethodStart, stream); 643 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 644 * reslove the multithreading issue. 645 */ 646 insertJumpHelp(); 647 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); 648 scratchRegs[0] = PhysicalReg_EAX; 649 call_dvmJitToInterpNormal(); 650 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ 651 } 652 653 /* 654 * Chaining cell for instructions that immediately following already translated 655 * code. 656 */ 657 static void handleHotChainingCell(CompilationUnit *cUnit, 658 unsigned int offset, int blockId, LowOpBlockLabel* labelList) 659 { 660 ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x", 661 cUnit->method->name, blockId, offset, stream - streamMethodStart); 662 if(dump_x86_inst) 663 ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p", 664 offset, stream - streamMethodStart, stream); 665 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 666 * reslove the multithreading issue. 667 */ 668 insertJumpHelp(); 669 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); 670 scratchRegs[0] = PhysicalReg_EAX; 671 call_dvmJitToInterpTraceSelect(); 672 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ 673 } 674 675 /* Chaining cell for branches that branch back into the same basic block */ 676 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit, 677 unsigned int offset, int blockId, LowOpBlockLabel* labelList) 678 { 679 ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x", 680 cUnit->method->name, blockId, offset, stream - streamMethodStart); 681 if(dump_x86_inst) 682 ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p", 683 offset, stream - streamMethodStart, stream); 684 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 685 * reslove the multithreading issue. 686 */ 687 insertJumpHelp(); 688 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); 689 scratchRegs[0] = PhysicalReg_EAX; 690 call_dvmJitToInterpNormal(); 691 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ 692 } 693 694 /* Chaining cell for monomorphic method invocations. */ 695 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit, 696 const Method *callee, int blockId, LowOpBlockLabel* labelList) 697 { 698 ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x", 699 cUnit->method->name, blockId, callee->name, stream - streamMethodStart); 700 if(dump_x86_inst) 701 ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p", 702 blockId, stream - streamMethodStart, stream); 703 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 704 * reslove the multithreading issue. 705 */ 706 insertJumpHelp(); 707 move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); 708 scratchRegs[0] = PhysicalReg_EAX; 709 call_dvmJitToInterpTraceSelect(); 710 //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */ 711 } 712 #undef P_GPR_1 713 714 /* Chaining cell for monomorphic method invocations. */ 715 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId) 716 { 717 if(dump_x86_inst) 718 ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p", 719 blockId, stream - streamMethodStart, stream); 720 #ifndef PREDICTED_CHAINING 721 //assume rPC for callee->insns in %ebx 722 scratchRegs[0] = PhysicalReg_EAX; 723 #if defined(WITH_JIT_TUNING) 724 /* Predicted chaining is not enabled. Fall back to interpreter and 725 * indicate that predicted chaining was not done. 726 */ 727 move_imm_to_reg(OpndSize_32, kInlineCacheMiss, PhysicalReg_EDX, true); 728 #endif 729 call_dvmJitToInterpTraceSelectNoChain(); 730 #else 731 /* make sure section for predicited chaining cell is 4-byte aligned */ 732 //int padding = (4 - ((u4)stream & 3)) & 3; 733 //stream += padding; 734 int* streamData = (int*)stream; 735 /* Should not be executed in the initial state */ 736 streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT; 737 streamData[1] = 0; 738 /* To be filled: class */ 739 streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT; 740 /* To be filled: method */ 741 streamData[3] = PREDICTED_CHAIN_METHOD_INIT; 742 /* 743 * Rechain count. The initial value of 0 here will trigger chaining upon 744 * the first invocation of this callsite. 745 */ 746 streamData[4] = PREDICTED_CHAIN_COUNTER_INIT; 747 #if 0 748 ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)), 749 *((int*)(stream+8)), *((int*)(stream+12))); 750 #endif 751 stream += 20; //5 *4 752 #endif 753 } 754 755 /* Load the Dalvik PC into r0 and jump to the specified target */ 756 static void handlePCReconstruction(CompilationUnit *cUnit, 757 LowOpBlockLabel *targetLabel) 758 { 759 #if 0 760 LowOp **pcrLabel = 761 (LowOp **) cUnit->pcReconstructionList.elemList; 762 int numElems = cUnit->pcReconstructionList.numUsed; 763 int i; 764 for (i = 0; i < numElems; i++) { 765 dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]); 766 /* r0 = dalvik PC */ 767 loadConstant(cUnit, r0, pcrLabel[i]->operands[0]); 768 genUnconditionalBranch(cUnit, targetLabel); 769 } 770 #endif 771 } 772 773 //use O0 code generator for hoisted checks outside of the loop 774 /* 775 * vA = arrayReg; 776 * vB = idxReg; 777 * vC = endConditionReg; 778 * arg[0] = maxC 779 * arg[1] = minC 780 * arg[2] = loopBranchConditionCode 781 */ 782 #define P_GPR_1 PhysicalReg_EBX 783 #define P_GPR_2 PhysicalReg_ECX 784 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) 785 { 786 /* 787 * NOTE: these synthesized blocks don't have ssa names assigned 788 * for Dalvik registers. However, because they dominate the following 789 * blocks we can simply use the Dalvik name w/ subscript 0 as the 790 * ssa name. 791 */ 792 DecodedInstruction *dInsn = &mir->dalvikInsn; 793 const int maxC = dInsn->arg[0]; 794 795 /* assign array in virtual register to P_GPR_1 */ 796 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); 797 /* assign index in virtual register to P_GPR_2 */ 798 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true); 799 export_pc(); 800 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); 801 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); 802 int delta = maxC; 803 /* 804 * If the loop end condition is ">=" instead of ">", then the largest value 805 * of the index is "endCondition - 1". 806 */ 807 if (dInsn->arg[2] == OP_IF_GE) { 808 delta--; 809 } 810 811 if (delta < 0) { //+delta 812 //if P_GPR_2 is mapped to a VR, we can't do this 813 alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true); 814 } else if(delta > 0) { 815 alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true); 816 } 817 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); 818 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); 819 } 820 821 /* 822 * vA = arrayReg; 823 * vB = idxReg; 824 * vC = endConditionReg; 825 * arg[0] = maxC 826 * arg[1] = minC 827 * arg[2] = loopBranchConditionCode 828 */ 829 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir) 830 { 831 DecodedInstruction *dInsn = &mir->dalvikInsn; 832 const int maxC = dInsn->arg[0]; 833 834 /* assign array in virtual register to P_GPR_1 */ 835 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); 836 /* assign index in virtual register to P_GPR_2 */ 837 get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true); 838 export_pc(); 839 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); 840 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); 841 842 if (maxC < 0) { 843 //if P_GPR_2 is mapped to a VR, we can't do this 844 alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true); 845 } else if(maxC > 0) { 846 alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true); 847 } 848 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); 849 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); 850 851 } 852 #undef P_GPR_1 853 #undef P_GPR_2 854 855 /* 856 * vA = idxReg; 857 * vB = minC; 858 */ 859 #define P_GPR_1 PhysicalReg_ECX 860 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir) 861 { 862 DecodedInstruction *dInsn = &mir->dalvikInsn; 863 const int minC = dInsn->vB; 864 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array 865 export_pc(); 866 compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true); 867 condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId); 868 } 869 #undef P_GPR_1 870 871 #ifdef WITH_JIT_INLINING 872 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir) 873 { 874 CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo; 875 if(gDvm.executionMode == kExecutionModeNcgO0) { 876 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true); 877 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true); 878 compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true); 879 export_pc(); //use %edx 880 conditional_jump_global_API(, Condition_E, "common_errNullObject", false); 881 move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true); 882 compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true); 883 } else { 884 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false); 885 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false); 886 nullCheck(5, false, 1, mir->dalvikInsn.vC); 887 move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false); 888 compare_reg_reg(4, false, 6, false); 889 } 890 891 //immdiate will be updated later in genLandingPadForMispredictedCallee 892 streamMisPred = stream; 893 callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8); 894 } 895 #endif 896 897 /* Extended MIR instructions like PHI */ 898 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir) 899 { 900 ExecutionMode origMode = gDvm.executionMode; 901 gDvm.executionMode = kExecutionModeNcgO0; 902 switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) { 903 case kMirOpPhi: { 904 break; 905 } 906 case kMirOpNullNRangeUpCheck: { 907 genHoistedChecksForCountUpLoop(cUnit, mir); 908 break; 909 } 910 case kMirOpNullNRangeDownCheck: { 911 genHoistedChecksForCountDownLoop(cUnit, mir); 912 break; 913 } 914 case kMirOpLowerBound: { 915 genHoistedLowerBoundCheck(cUnit, mir); 916 break; 917 } 918 case kMirOpPunt: { 919 break; 920 } 921 #ifdef WITH_JIT_INLINING 922 case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c 923 genValidationForPredictedInline(cUnit, mir); 924 break; 925 } 926 #endif 927 default: 928 break; 929 } 930 gDvm.executionMode = origMode; 931 } 932 933 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry, 934 int bodyId) 935 { 936 /* 937 * Next, create two branches - one branch over to the loop body and the 938 * other branch to the PCR cell to punt. 939 */ 940 //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId); 941 //setupResourceMasks(branchToBody); 942 //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody); 943 944 #if 0 945 LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true); 946 branchToPCR->opCode = kThumbBUncond; 947 branchToPCR->generic.target = (LIR *) pcrLabel; 948 setupResourceMasks(branchToPCR); 949 cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR; 950 #endif 951 } 952 953 /* check whether we can merge the block at index i with its target block */ 954 bool mergeBlock(BasicBlock *bb) { 955 if(bb->blockType == kDalvikByteCode && 956 bb->firstMIRInsn != NULL && 957 (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 || 958 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO || 959 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) && 960 bb->fallThrough == NULL) {// && 961 //cUnit->hasLoop) { 962 //ALOGI("merge blocks ending with goto at index %d", i); 963 MIR* prevInsn = bb->lastMIRInsn->prev; 964 if(bb->taken == NULL) return false; 965 MIR* mergeInsn = bb->taken->firstMIRInsn; 966 if(mergeInsn == NULL) return false; 967 if(prevInsn == NULL) {//the block has a single instruction 968 bb->firstMIRInsn = mergeInsn; 969 } else { 970 prevInsn->next = mergeInsn; //remove goto from the chain 971 } 972 mergeInsn->prev = prevInsn; 973 bb->lastMIRInsn = bb->taken->lastMIRInsn; 974 bb->taken->firstMIRInsn = NULL; //block being merged in 975 bb->fallThrough = bb->taken->fallThrough; 976 bb->taken = bb->taken->taken; 977 return true; 978 } 979 return false; 980 } 981 982 static int genTraceProfileEntry(CompilationUnit *cUnit) 983 { 984 cUnit->headerSize = 6; 985 if ((gDvmJit.profileMode == kTraceProfilingContinuous) || 986 (gDvmJit.profileMode == kTraceProfilingDisabled)) { 987 return 12; 988 } else { 989 return 4; 990 } 991 992 } 993 994 #define PRINT_BUFFER_LEN 1024 995 /* Print the code block in code cache in the range of [startAddr, endAddr) 996 * in readable format. 997 */ 998 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr) 999 { 1000 char strbuf[PRINT_BUFFER_LEN]; 1001 unsigned char *addr; 1002 unsigned char *next_addr; 1003 int n; 1004 1005 if (gDvmJit.printBinary) { 1006 // print binary in bytes 1007 n = 0; 1008 for (addr = startAddr; addr < endAddr; addr++) { 1009 n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr); 1010 if (n > PRINT_BUFFER_LEN - 10) { 1011 ALOGD("## %s", strbuf); 1012 n = 0; 1013 } 1014 } 1015 if (n > 0) 1016 ALOGD("## %s", strbuf); 1017 } 1018 1019 // print disassembled instructions 1020 addr = startAddr; 1021 while (addr < endAddr) { 1022 next_addr = reinterpret_cast<unsigned char*> 1023 (decoder_disassemble_instr(reinterpret_cast<char*>(addr), 1024 strbuf, PRINT_BUFFER_LEN)); 1025 if (addr != next_addr) { 1026 ALOGD("** %p: %s", addr, strbuf); 1027 } else { // check whether this is nop padding 1028 if (addr[0] == 0x90) { 1029 ALOGD("** %p: NOP (1 byte)", addr); 1030 next_addr += 1; 1031 } else if (addr[0] == 0x66 && addr[1] == 0x90) { 1032 ALOGD("** %p: NOP (2 bytes)", addr); 1033 next_addr += 2; 1034 } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) { 1035 ALOGD("** %p: NOP (3 bytes)", addr); 1036 next_addr += 3; 1037 } else { 1038 ALOGD("** unable to decode binary at %p", addr); 1039 break; 1040 } 1041 } 1042 addr = next_addr; 1043 } 1044 } 1045 1046 /* 4 is the number of additional bytes needed for chaining information for trace: 1047 * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */ 1048 #define EXTRA_BYTES_FOR_CHAINING 4 1049 1050 /* Entry function to invoke the backend of the JIT compiler */ 1051 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info) 1052 { 1053 dump_x86_inst = cUnit->printMe; 1054 /* Used to hold the labels of each block */ 1055 LowOpBlockLabel *labelList = 1056 (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c 1057 LowOp *headLIR = NULL; 1058 GrowableList chainingListByType[kChainingCellLast]; 1059 unsigned int i, padding; 1060 1061 /* 1062 * Initialize various types chaining lists. 1063 */ 1064 for (i = 0; i < kChainingCellLast; i++) { 1065 dvmInitGrowableList(&chainingListByType[i], 2); 1066 } 1067 1068 /* Clear the visited flag for each block */ 1069 dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag, 1070 kAllNodes, false /* isIterative */); 1071 1072 GrowableListIterator iterator; 1073 dvmGrowableListIteratorInit(&cUnit->blockList, &iterator); 1074 1075 /* Traces start with a profiling entry point. Generate it here */ 1076 cUnit->profileCodeSize = genTraceProfileEntry(cUnit); 1077 1078 //BasicBlock **blockList = cUnit->blockList; 1079 GrowableList *blockList = &cUnit->blockList; 1080 BasicBlock *bb; 1081 1082 info->codeAddress = NULL; 1083 stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed; 1084 1085 // TODO: compile into a temporary buffer and then copy into the code cache. 1086 // That would let us leave the code cache unprotected for a shorter time. 1087 size_t unprotected_code_cache_bytes = 1088 gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING; 1089 UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1090 1091 streamStart = stream; /* trace start before alignment */ 1092 stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */ 1093 stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */ 1094 streamMethodStart = stream; /* code start */ 1095 for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) { 1096 labelList[i].lop.generic.offset = -1; 1097 } 1098 cUnit->exceptionBlockId = -1; 1099 for (i = 0; i < blockList->numUsed; i++) { 1100 bb = (BasicBlock *) blockList->elemList[i]; 1101 if(bb->blockType == kExceptionHandling) 1102 cUnit->exceptionBlockId = i; 1103 } 1104 startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit); 1105 if(gDvm.executionMode == kExecutionModeNcgO1) { 1106 //merge blocks ending with "goto" with the fall through block 1107 if (cUnit->jitMode != kJitLoop) 1108 for (i = 0; i < blockList->numUsed; i++) { 1109 bb = (BasicBlock *) blockList->elemList[i]; 1110 bool merged = mergeBlock(bb); 1111 while(merged) merged = mergeBlock(bb); 1112 } 1113 for (i = 0; i < blockList->numUsed; i++) { 1114 bb = (BasicBlock *) blockList->elemList[i]; 1115 if(bb->blockType == kDalvikByteCode && 1116 bb->firstMIRInsn != NULL) { 1117 preprocessingBB(bb); 1118 } 1119 } 1120 preprocessingTrace(); 1121 } 1122 1123 /* Handle the content in each basic block */ 1124 for (i = 0; ; i++) { 1125 MIR *mir; 1126 bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator); 1127 if (bb == NULL) break; 1128 if (bb->visited == true) continue; 1129 1130 labelList[i].immOpnd.value = bb->startOffset; 1131 1132 if (bb->blockType >= kChainingCellLast) { 1133 /* 1134 * Append the label pseudo LIR first. Chaining cells will be handled 1135 * separately afterwards. 1136 */ 1137 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]); 1138 } 1139 1140 if (bb->blockType == kEntryBlock) { 1141 labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK; 1142 if (bb->firstMIRInsn == NULL) { 1143 continue; 1144 } else { 1145 setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id); 1146 //&labelList[blockList[i]->fallThrough->id]); 1147 } 1148 } else if (bb->blockType == kExitBlock) { 1149 labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK; 1150 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1151 goto gen_fallthrough; 1152 } else if (bb->blockType == kDalvikByteCode) { 1153 if (bb->hidden == true) continue; 1154 labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL; 1155 /* Reset the register state */ 1156 #if 0 1157 resetRegisterScoreboard(cUnit); 1158 #endif 1159 } else { 1160 switch (bb->blockType) { 1161 case kChainingCellNormal: 1162 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL; 1163 /* handle the codegen later */ 1164 dvmInsertGrowableList( 1165 &chainingListByType[kChainingCellNormal], i); 1166 break; 1167 case kChainingCellInvokeSingleton: 1168 labelList[i].lop.opCode2 = 1169 ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON; 1170 labelList[i].immOpnd.value = 1171 (int) bb->containingMethod; 1172 /* handle the codegen later */ 1173 dvmInsertGrowableList( 1174 &chainingListByType[kChainingCellInvokeSingleton], i); 1175 break; 1176 case kChainingCellInvokePredicted: 1177 labelList[i].lop.opCode2 = 1178 ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED; 1179 /* 1180 * Move the cached method pointer from operand 1 to 0. 1181 * Operand 0 was clobbered earlier in this routine to store 1182 * the block starting offset, which is not applicable to 1183 * predicted chaining cell. 1184 */ 1185 //TODO 1186 //labelList[i].operands[0] = labelList[i].operands[1]; 1187 1188 /* handle the codegen later */ 1189 dvmInsertGrowableList( 1190 &chainingListByType[kChainingCellInvokePredicted], i); 1191 break; 1192 case kChainingCellHot: 1193 labelList[i].lop.opCode2 = 1194 ATOM_PSEUDO_CHAINING_CELL_HOT; 1195 /* handle the codegen later */ 1196 dvmInsertGrowableList( 1197 &chainingListByType[kChainingCellHot], i); 1198 break; 1199 case kPCReconstruction: 1200 /* Make sure exception handling block is next */ 1201 labelList[i].lop.opCode2 = 1202 ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL; 1203 //assert (i == cUnit->numBlocks - 2); 1204 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1205 handlePCReconstruction(cUnit, 1206 &labelList[cUnit->puntBlock->id]); 1207 break; 1208 case kExceptionHandling: 1209 labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL; 1210 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1211 //if (cUnit->pcReconstructionList.numUsed) { 1212 scratchRegs[0] = PhysicalReg_EAX; 1213 jumpToInterpPunt(); 1214 //call_dvmJitToInterpPunt(); 1215 //} 1216 break; 1217 case kChainingCellBackwardBranch: 1218 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH; 1219 /* handle the codegen later */ 1220 dvmInsertGrowableList( 1221 &chainingListByType[kChainingCellBackwardBranch], 1222 i); 1223 break; 1224 default: 1225 break; 1226 } 1227 continue; 1228 } 1229 { 1230 //LowOp *headLIR = NULL; 1231 const DexCode *dexCode = dvmGetMethodCode(cUnit->method); 1232 const u2 *startCodePtr = dexCode->insns; 1233 const u2 *codePtr; 1234 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1235 ALOGV("get ready to handle JIT bb %d type %d hidden %d", 1236 bb->id, bb->blockType, bb->hidden); 1237 for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) { 1238 bb = nextBB; 1239 bb->visited = true; 1240 cUnit->nextCodegenBlock = NULL; 1241 1242 if(gDvm.executionMode == kExecutionModeNcgO1 && 1243 bb->blockType != kEntryBlock && 1244 bb->firstMIRInsn != NULL) { 1245 startOfBasicBlock(bb); 1246 int cg_ret = codeGenBasicBlockJit(cUnit->method, bb); 1247 endOfBasicBlock(bb); 1248 if(cg_ret < 0) { 1249 endOfTrace(true/*freeOnly*/); 1250 cUnit->baseAddr = NULL; 1251 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1252 return; 1253 } 1254 } else { 1255 for (mir = bb->firstMIRInsn; mir; mir = mir->next) { 1256 startOfBasicBlock(bb); //why here for O0 1257 Opcode dalvikOpCode = mir->dalvikInsn.opcode; 1258 if((int)dalvikOpCode >= (int)kMirOpFirst) { 1259 handleExtendedMIR(cUnit, mir); 1260 continue; 1261 } 1262 InstructionFormat dalvikFormat = 1263 dexGetFormatFromOpcode(dalvikOpCode); 1264 ALOGV("ready to handle bytecode at offset %x: opcode %d format %d", 1265 mir->offset, dalvikOpCode, dalvikFormat); 1266 LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset); 1267 /* Remember the first LIR for this block */ 1268 if (headLIR == NULL) { 1269 headLIR = (LowOp*)boundaryLIR; 1270 } 1271 bool notHandled = true; 1272 /* 1273 * Debugging: screen the opcode first to see if it is in the 1274 * do[-not]-compile list 1275 */ 1276 bool singleStepMe = 1277 gDvmJit.includeSelectedOp != 1278 ((gDvmJit.opList[dalvikOpCode >> 3] & 1279 (1 << (dalvikOpCode & 0x7))) != 1280 0); 1281 if (singleStepMe || cUnit->allSingleStep) { 1282 } else { 1283 codePtr = startCodePtr + mir->offset; 1284 //lower each byte code, update LIR 1285 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir); 1286 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) + 1287 CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { 1288 ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart)); 1289 gDvmJit.codeCacheFull = true; 1290 cUnit->baseAddr = NULL; 1291 endOfTrace(true/*freeOnly*/); 1292 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1293 return; 1294 } 1295 } 1296 if (notHandled) { 1297 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled", 1298 mir->offset, 1299 dalvikOpCode, dexGetOpcodeName(dalvikOpCode), 1300 dalvikFormat); 1301 dvmAbort(); 1302 break; 1303 } 1304 } // end for 1305 } // end else //JIT + O0 code generator 1306 } 1307 } // end for 1308 /* Eliminate redundant loads/stores and delay stores into later slots */ 1309 #if 0 1310 dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR, 1311 cUnit->lastLIRInsn); 1312 #endif 1313 if (headLIR) headLIR = NULL; 1314 gen_fallthrough: 1315 /* 1316 * Check if the block is terminated due to trace length constraint - 1317 * insert an unconditional branch to the chaining cell. 1318 */ 1319 if (bb->needFallThroughBranch) { 1320 jumpToBasicBlock(stream, bb->fallThrough->id); 1321 } 1322 1323 } 1324 1325 char* streamChainingStart = (char*)stream; 1326 /* Handle the chaining cells in predefined order */ 1327 for (i = 0; i < kChainingCellGap; i++) { 1328 size_t j; 1329 int *blockIdList = (int *) chainingListByType[i].elemList; 1330 1331 cUnit->numChainingCells[i] = chainingListByType[i].numUsed; 1332 1333 /* No chaining cells of this type */ 1334 if (cUnit->numChainingCells[i] == 0) 1335 continue; 1336 1337 /* Record the first LIR for a new type of chaining cell */ 1338 cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]]; 1339 for (j = 0; j < chainingListByType[i].numUsed; j++) { 1340 int blockId = blockIdList[j]; 1341 BasicBlock *chainingBlock = 1342 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 1343 blockId); 1344 1345 labelList[blockId].lop.generic.offset = (stream - streamMethodStart); 1346 1347 /* Align this chaining cell first */ 1348 #if 0 1349 newLIR0(cUnit, ATOM_PSEUDO_ALIGN4); 1350 #endif 1351 /* Insert the pseudo chaining instruction */ 1352 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]); 1353 1354 1355 switch (chainingBlock->blockType) { 1356 case kChainingCellNormal: 1357 handleNormalChainingCell(cUnit, 1358 chainingBlock->startOffset, blockId, labelList); 1359 break; 1360 case kChainingCellInvokeSingleton: 1361 handleInvokeSingletonChainingCell(cUnit, 1362 chainingBlock->containingMethod, blockId, labelList); 1363 break; 1364 case kChainingCellInvokePredicted: 1365 handleInvokePredictedChainingCell(cUnit, blockId); 1366 break; 1367 case kChainingCellHot: 1368 handleHotChainingCell(cUnit, 1369 chainingBlock->startOffset, blockId, labelList); 1370 break; 1371 case kChainingCellBackwardBranch: 1372 handleBackwardBranchChainingCell(cUnit, 1373 chainingBlock->startOffset, blockId, labelList); 1374 break; 1375 default: 1376 ALOGE("Bad blocktype %d", chainingBlock->blockType); 1377 dvmAbort(); 1378 break; 1379 } 1380 1381 if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { 1382 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart)); 1383 gDvmJit.codeCacheFull = true; 1384 cUnit->baseAddr = NULL; 1385 endOfTrace(true); /* need to free structures */ 1386 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1387 return; 1388 } 1389 } 1390 } 1391 #if 0 1392 dvmCompilerApplyGlobalOptimizations(cUnit); 1393 #endif 1394 endOfTrace(false); 1395 1396 if (gDvmJit.codeCacheFull) { 1397 /* We hit code cache size limit inside endofTrace(false). 1398 * Bail out for this trace! 1399 */ 1400 ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart)); 1401 cUnit->baseAddr = NULL; 1402 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1403 return; 1404 } 1405 1406 /* dump section for chaining cell counts, make sure it is 4-byte aligned */ 1407 padding = (4 - ((u4)stream & 3)) & 3; 1408 stream += padding; 1409 ChainCellCounts chainCellCounts; 1410 /* Install the chaining cell counts */ 1411 for (i=0; i< kChainingCellGap; i++) { 1412 chainCellCounts.u.count[i] = cUnit->numChainingCells[i]; 1413 } 1414 char* streamCountStart = (char*)stream; 1415 memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts)); 1416 stream += sizeof(chainCellCounts); 1417 1418 cUnit->baseAddr = streamMethodStart; 1419 cUnit->totalSize = (stream - streamStart); 1420 if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { 1421 ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart)); 1422 gDvmJit.codeCacheFull = true; 1423 cUnit->baseAddr = NULL; 1424 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1425 return; 1426 } 1427 1428 /* write chaining cell count offset & chaining cell offset */ 1429 u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */ 1430 *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */ 1431 pOffset[1] = streamChainingStart - streamMethodStart; 1432 1433 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1434 1435 gDvmJit.codeCacheByteUsed += (stream - streamStart); 1436 if (cUnit->printMe) { 1437 unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr; 1438 unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed; 1439 ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p", 1440 cUnit->method->clazz->descriptor, cUnit->method->name, 1441 codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache); 1442 ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor, 1443 cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset); 1444 printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext); 1445 } 1446 ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr, 1447 (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed, 1448 cUnit->totalSize, gDvmJit.codeCache); 1449 1450 gDvmJit.numCompilations++; 1451 1452 info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize; 1453 } 1454 1455 /* 1456 * Perform translation chain operation. 1457 */ 1458 void* dvmJitChain(void* tgtAddr, u4* branchAddr) 1459 { 1460 #ifdef JIT_CHAIN 1461 int relOffset = (int) tgtAddr - (int)branchAddr; 1462 1463 if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) && 1464 (gDvmJit.codeCacheFull == false)) { 1465 1466 gDvmJit.translationChains++; 1467 1468 //OpndSize immSize = estOpndSizeFromImm(relOffset); 1469 //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); 1470 /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in 1471 * the original code sequence. 1472 */ 1473 OpndSize immSize = OpndSize_32; 1474 relOffset -= 5; 1475 //can't use stream here since it is used by the compilation thread 1476 UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); 1477 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr 1478 PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); 1479 1480 gDvmJit.hasNewChain = true; 1481 1482 COMPILER_TRACE_CHAINING( 1483 ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x", 1484 (int) branchAddr, tgtAddr, relOffset)); 1485 } 1486 #endif 1487 return tgtAddr; 1488 } 1489 1490 /* 1491 * Accept the work and start compiling. Returns true if compilation 1492 * is attempted. 1493 */ 1494 bool dvmCompilerDoWork(CompilerWorkOrder *work) 1495 { 1496 JitTraceDescription *desc; 1497 bool isCompile; 1498 bool success = true; 1499 1500 if (gDvmJit.codeCacheFull) { 1501 return false; 1502 } 1503 1504 switch (work->kind) { 1505 case kWorkOrderTrace: 1506 isCompile = true; 1507 /* Start compilation with maximally allowed trace length */ 1508 desc = (JitTraceDescription *)work->info; 1509 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, 1510 work->bailPtr, 0 /* no hints */); 1511 break; 1512 case kWorkOrderTraceDebug: { 1513 bool oldPrintMe = gDvmJit.printMe; 1514 gDvmJit.printMe = true; 1515 isCompile = true; 1516 /* Start compilation with maximally allowed trace length */ 1517 desc = (JitTraceDescription *)work->info; 1518 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, 1519 work->bailPtr, 0 /* no hints */); 1520 gDvmJit.printMe = oldPrintMe; 1521 break; 1522 } 1523 case kWorkOrderProfileMode: 1524 dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info); 1525 isCompile = false; 1526 break; 1527 default: 1528 isCompile = false; 1529 ALOGE("Jit: unknown work order type"); 1530 assert(0); // Bail if debug build, discard otherwise 1531 } 1532 if (!success) 1533 work->result.codeAddress = NULL; 1534 return isCompile; 1535 } 1536 1537 void dvmCompilerCacheFlush(long start, long end, long flags) { 1538 /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */ 1539 } 1540 1541 //#endif 1542