1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <sys/mman.h> 17 #include "Dalvik.h" 18 #include "libdex/DexOpcodes.h" 19 #include "compiler/Compiler.h" 20 #include "compiler/CompilerIR.h" 21 #include "interp/Jit.h" 22 #include "libdex/DexFile.h" 23 #include "Lower.h" 24 #include "NcgAot.h" 25 #include "compiler/codegen/CompilerCodegen.h" 26 27 /* Init values when a predicted chain is initially assembled */ 28 /* E7FE is branch to self */ 29 #define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe 30 31 /* Target-specific save/restore */ 32 extern "C" void dvmJitCalleeSave(double *saveArea); 33 extern "C" void dvmJitCalleeRestore(double *saveArea); 34 35 /* 36 * Determine the initial instruction set to be used for this trace. 37 * Later components may decide to change this. 38 */ 39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit) 40 JitInstructionSetType dvmCompilerInstructionSet(void) 41 { 42 return DALVIK_JIT_IA32; 43 } 44 45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet() 46 { 47 return DALVIK_JIT_IA32; 48 } 49 50 /* we don't use template for IA32 */ 51 void *dvmCompilerGetInterpretTemplate() 52 { 53 return NULL; 54 } 55 56 /* Track the number of times that the code cache is patched */ 57 #if defined(WITH_JIT_TUNING) 58 #define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++) 59 #else 60 #define UPDATE_CODE_CACHE_PATCHES() 61 #endif 62 63 bool dvmCompilerArchInit() { 64 /* Target-specific configuration */ 65 gDvmJit.jitTableSize = 1 << 12; 66 gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1; 67 if (gDvmJit.threshold == 0) { 68 gDvmJit.threshold = 255; 69 } 70 if (gDvmJit.codeCacheSize == DEFAULT_CODE_CACHE_SIZE) { 71 gDvmJit.codeCacheSize = 512 * 1024; 72 } else if ((gDvmJit.codeCacheSize == 0) && (gDvm.executionMode == kExecutionModeJit)) { 73 gDvm.executionMode = kExecutionModeInterpFast; 74 } 75 gDvmJit.optLevel = kJitOptLevelO1; 76 77 //Disable Method-JIT 78 gDvmJit.disableOpt |= (1 << kMethodJit); 79 80 #if defined(WITH_SELF_VERIFICATION) 81 /* Force into blocking mode */ 82 gDvmJit.blockingMode = true; 83 gDvm.nativeDebuggerActive = true; 84 #endif 85 86 // Make sure all threads have current values 87 dvmJitUpdateThreadStateAll(); 88 89 return true; 90 } 91 92 void dvmCompilerPatchInlineCache(void) 93 { 94 int i; 95 PredictedChainingCell *minAddr, *maxAddr; 96 97 /* Nothing to be done */ 98 if (gDvmJit.compilerICPatchIndex == 0) return; 99 100 /* 101 * Since all threads are already stopped we don't really need to acquire 102 * the lock. But race condition can be easily introduced in the future w/o 103 * paying attention so we still acquire the lock here. 104 */ 105 dvmLockMutex(&gDvmJit.compilerICPatchLock); 106 107 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 108 109 //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex); 110 111 /* Initialize the min/max address range */ 112 minAddr = (PredictedChainingCell *) 113 ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize); 114 maxAddr = (PredictedChainingCell *) gDvmJit.codeCache; 115 116 for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) { 117 ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i]; 118 PredictedChainingCell *cellAddr = workOrder->cellAddr; 119 PredictedChainingCell *cellContent = &workOrder->cellContent; 120 ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor, 121 workOrder->classLoader); 122 123 assert(clazz->serialNumber == workOrder->serialNumber); 124 125 /* Use the newly resolved clazz pointer */ 126 cellContent->clazz = clazz; 127 128 if (cellAddr->clazz == NULL) { 129 COMPILER_TRACE_CHAINING( 130 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized", 131 cellAddr, 132 cellContent->clazz->descriptor, 133 cellContent->method->name)); 134 } else { 135 COMPILER_TRACE_CHAINING( 136 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) " 137 "patched", 138 cellAddr, 139 cellAddr->clazz->descriptor, 140 cellContent->clazz->descriptor, 141 cellContent->method->name)); 142 } 143 144 /* Patch the chaining cell */ 145 *cellAddr = *cellContent; 146 minAddr = (cellAddr < minAddr) ? cellAddr : minAddr; 147 maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr; 148 } 149 150 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 151 152 gDvmJit.compilerICPatchIndex = 0; 153 dvmUnlockMutex(&gDvmJit.compilerICPatchLock); 154 } 155 156 /* Target-specific cache clearing */ 157 void dvmCompilerCacheClear(char *start, size_t size) 158 { 159 /* "0xFF 0xFF" is an invalid opcode for x86. */ 160 memset(start, 0xFF, size); 161 } 162 163 /* for JIT debugging, to be implemented */ 164 void dvmJitCalleeSave(double *saveArea) { 165 } 166 167 void dvmJitCalleeRestore(double *saveArea) { 168 } 169 170 void dvmJitToInterpSingleStep() { 171 } 172 173 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc, 174 const JitEntry *knownEntry) { 175 return NULL; 176 } 177 178 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c 179 { 180 } 181 182 void dvmCompilerArchDump(void) 183 { 184 } 185 186 char *getTraceBase(const JitEntry *p) 187 { 188 return NULL; 189 } 190 191 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info) 192 { 193 } 194 195 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress) 196 { 197 } 198 199 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit) 200 { 201 // Method-based JIT not supported for x86. 202 } 203 204 void dvmJitScanAllClassPointers(void (*callback)(void *)) 205 { 206 } 207 208 /* Handy function to retrieve the profile count */ 209 static inline int getProfileCount(const JitEntry *entry) 210 { 211 if (entry->dPC == 0 || entry->codeAddress == 0) 212 return 0; 213 u4 *pExecutionCount = (u4 *) getTraceBase(entry); 214 215 return pExecutionCount ? *pExecutionCount : 0; 216 } 217 218 /* qsort callback function */ 219 static int sortTraceProfileCount(const void *entry1, const void *entry2) 220 { 221 const JitEntry *jitEntry1 = (const JitEntry *)entry1; 222 const JitEntry *jitEntry2 = (const JitEntry *)entry2; 223 224 JitTraceCounter_t count1 = getProfileCount(jitEntry1); 225 JitTraceCounter_t count2 = getProfileCount(jitEntry2); 226 return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1); 227 } 228 229 /* Sort the trace profile counts and dump them */ 230 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c 231 { 232 JitEntry *sortedEntries; 233 int numTraces = 0; 234 unsigned long counts = 0; 235 unsigned int i; 236 237 /* Make sure that the table is not changing */ 238 dvmLockMutex(&gDvmJit.tableLock); 239 240 /* Sort the entries by descending order */ 241 sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize); 242 if (sortedEntries == NULL) 243 goto done; 244 memcpy(sortedEntries, gDvmJit.pJitEntryTable, 245 sizeof(JitEntry) * gDvmJit.jitTableSize); 246 qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry), 247 sortTraceProfileCount); 248 249 /* Dump the sorted entries */ 250 for (i=0; i < gDvmJit.jitTableSize; i++) { 251 if (sortedEntries[i].dPC != 0) { 252 numTraces++; 253 } 254 } 255 if (numTraces == 0) 256 numTraces = 1; 257 ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces)); 258 259 free(sortedEntries); 260 done: 261 dvmUnlockMutex(&gDvmJit.tableLock); 262 return; 263 } 264 265 void jumpWithRelOffset(char* instAddr, int relOffset) { 266 stream = instAddr; 267 OpndSize immSize = estOpndSizeFromImm(relOffset); 268 relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); 269 dump_imm(Mnemonic_JMP, immSize, relOffset); 270 } 271 272 // works whether instructions for target basic block are generated or not 273 LowOp* jumpToBasicBlock(char* instAddr, int targetId) { 274 stream = instAddr; 275 bool unknown; 276 OpndSize size; 277 int relativeNCG = targetId; 278 relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size); 279 unconditional_jump_int(relativeNCG, size); 280 return NULL; 281 } 282 283 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) { 284 stream = instAddr; 285 bool unknown; 286 OpndSize size; 287 int relativeNCG = targetId; 288 relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size); 289 conditional_jump_int(cc, relativeNCG, size); 290 return NULL; 291 } 292 293 /* 294 * Attempt to enqueue a work order to patch an inline cache for a predicted 295 * chaining cell for virtual/interface calls. 296 */ 297 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr, 298 PredictedChainingCell *newContent) 299 { 300 bool result = true; 301 302 /* 303 * Make sure only one thread gets here since updating the cell (ie fast 304 * path and queueing the request (ie the queued path) have to be done 305 * in an atomic fashion. 306 */ 307 dvmLockMutex(&gDvmJit.compilerICPatchLock); 308 309 /* Fast path for uninitialized chaining cell */ 310 if (cellAddr->clazz == NULL && 311 cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) { 312 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 313 314 cellAddr->method = newContent->method; 315 cellAddr->branch = newContent->branch; 316 cellAddr->branch2 = newContent->branch2; 317 318 /* 319 * The update order matters - make sure clazz is updated last since it 320 * will bring the uninitialized chaining cell to life. 321 */ 322 android_atomic_release_store((int32_t)newContent->clazz, 323 (volatile int32_t *)(void*) &cellAddr->clazz); 324 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); 325 UPDATE_CODE_CACHE_PATCHES(); 326 327 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 328 329 #if 0 330 MEM_BARRIER(); 331 cellAddr->clazz = newContent->clazz; 332 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); 333 #endif 334 #if defined(WITH_JIT_TUNING) 335 gDvmJit.icPatchInit++; 336 #endif 337 COMPILER_TRACE_CHAINING( 338 ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p", 339 cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method)); 340 /* Check if this is a frequently missed clazz */ 341 } else if (cellAddr->stagedClazz != newContent->clazz) { 342 /* Not proven to be frequent yet - build up the filter cache */ 343 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 344 345 cellAddr->stagedClazz = newContent->clazz; 346 347 UPDATE_CODE_CACHE_PATCHES(); 348 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 349 350 #if defined(WITH_JIT_TUNING) 351 gDvmJit.icPatchRejected++; 352 #endif 353 /* 354 * Different classes but same method implementation - it is safe to just 355 * patch the class value without the need to stop the world. 356 */ 357 } else if (cellAddr->method == newContent->method) { 358 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 359 360 cellAddr->clazz = newContent->clazz; 361 /* No need to flush the cache here since the branch is not patched */ 362 UPDATE_CODE_CACHE_PATCHES(); 363 364 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 365 366 #if defined(WITH_JIT_TUNING) 367 gDvmJit.icPatchLockFree++; 368 #endif 369 /* 370 * Cannot patch the chaining cell inline - queue it until the next safe 371 * point. 372 */ 373 } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) { 374 int index = gDvmJit.compilerICPatchIndex++; 375 const ClassObject *clazz = newContent->clazz; 376 377 gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr; 378 gDvmJit.compilerICPatchQueue[index].cellContent = *newContent; 379 gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor; 380 gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader; 381 /* For verification purpose only */ 382 gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber; 383 384 #if defined(WITH_JIT_TUNING) 385 gDvmJit.icPatchQueued++; 386 #endif 387 COMPILER_TRACE_CHAINING( 388 ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s", 389 cellAddr, newContent->clazz->descriptor, newContent->method->name)); 390 } else { 391 /* Queue is full - just drop this patch request */ 392 #if defined(WITH_JIT_TUNING) 393 gDvmJit.icPatchDropped++; 394 #endif 395 396 COMPILER_TRACE_CHAINING( 397 ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s", 398 cellAddr, newContent->clazz->descriptor, newContent->method->name)); 399 } 400 401 dvmUnlockMutex(&gDvmJit.compilerICPatchLock); 402 return result; 403 } 404 405 /* 406 * This method is called from the invoke templates for virtual and interface 407 * methods to speculatively setup a chain to the callee. The templates are 408 * written in assembly and have setup method, cell, and clazz at r0, r2, and 409 * r3 respectively, so there is a unused argument in the list. Upon return one 410 * of the following three results may happen: 411 * 1) Chain is not setup because the callee is native. Reset the rechain 412 * count to a big number so that it will take a long time before the next 413 * rechain attempt to happen. 414 * 2) Chain is not setup because the callee has not been created yet. Reset 415 * the rechain count to a small number and retry in the near future. 416 * 3) Ask all other threads to stop before patching this chaining cell. 417 * This is required because another thread may have passed the class check 418 * but hasn't reached the chaining cell yet to follow the chain. If we 419 * patch the content before halting the other thread, there could be a 420 * small window for race conditions to happen that it may follow the new 421 * but wrong chain to invoke a different method. 422 */ 423 const Method *dvmJitToPatchPredictedChain(const Method *method, 424 Thread *self, 425 PredictedChainingCell *cell, 426 const ClassObject *clazz) 427 { 428 int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN; 429 /* Don't come back here for a long time if the method is native */ 430 if (dvmIsNativeMethod(method)) { 431 UNPROTECT_CODE_CACHE(cell, sizeof(*cell)); 432 433 /* 434 * Put a non-zero/bogus value in the clazz field so that it won't 435 * trigger immediate patching and will continue to fail to match with 436 * a real clazz pointer. 437 */ 438 cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ; 439 440 UPDATE_CODE_CACHE_PATCHES(); 441 PROTECT_CODE_CACHE(cell, sizeof(*cell)); 442 COMPILER_TRACE_CHAINING( 443 ALOGI("Jit Runtime: predicted chain %p to native method %s ignored", 444 cell, method->name)); 445 goto done; 446 } 447 { 448 int tgtAddr = (int) dvmJitGetTraceAddr(method->insns); 449 450 /* 451 * Compilation not made yet for the callee. Reset the counter to a small 452 * value and come back to check soon. 453 */ 454 if ((tgtAddr == 0) || 455 ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) { 456 COMPILER_TRACE_CHAINING( 457 ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed", 458 cell, method->clazz->descriptor, method->name)); 459 goto done; 460 } 461 462 PredictedChainingCell newCell; 463 464 if (cell->clazz == NULL) { 465 newRechainCount = self->icRechainCount; 466 } 467 468 int relOffset = (int) tgtAddr - (int)cell; 469 OpndSize immSize = estOpndSizeFromImm(relOffset); 470 int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond); 471 relOffset -= jumpSize; 472 COMPILER_TRACE_CHAINING( 473 ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d", 474 cell, method->clazz->descriptor, method->name, jumpSize)); 475 //can't use stream here since it is used by the compilation thread 476 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch 477 478 newCell.clazz = clazz; 479 newCell.method = method; 480 481 /* 482 * Enter the work order to the queue and the chaining cell will be patched 483 * the next time a safe point is entered. 484 * 485 * If the enqueuing fails reset the rechain count to a normal value so that 486 * it won't get indefinitely delayed. 487 */ 488 inlineCachePatchEnqueue(cell, &newCell); 489 } 490 done: 491 self->icRechainCount = newRechainCount; 492 return method; 493 } 494 495 /* 496 * Unchain a trace given the starting address of the translation 497 * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR. 498 * For ARM, it returns the address following the last cell unchained. 499 * For IA, it returns NULL since cacheflush is not required for IA. 500 */ 501 u4* dvmJitUnchain(void* codeAddr) 502 { 503 /* codeAddr is 4-byte aligned, so is chain cell count offset */ 504 u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4); 505 u2 chainCellCountOffset = *pChainCellCountOffset; 506 /* chain cell counts information is 4-byte aligned */ 507 ChainCellCounts *pChainCellCounts = 508 (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset); 509 u2* pChainCellOffset = (u2*)((char*)codeAddr - 2); 510 u2 chainCellOffset = *pChainCellOffset; 511 u1* pChainCells; 512 int i,j; 513 PredictedChainingCell *predChainCell; 514 int padding; 515 516 /* Locate the beginning of the chain cell region */ 517 pChainCells = (u1 *)((char*)codeAddr + chainCellOffset); 518 519 /* The cells are sorted in order - walk through them and reset */ 520 for (i = 0; i < kChainingCellGap; i++) { 521 /* for hot, normal, singleton chaining: 522 nop //padding. 523 jmp 0 524 mov imm32, reg1 525 mov imm32, reg2 526 call reg2 527 after chaining: 528 nop 529 jmp imm 530 mov imm32, reg1 531 mov imm32, reg2 532 call reg2 533 after unchaining: 534 nop 535 jmp 0 536 mov imm32, reg1 537 mov imm32, reg2 538 call reg2 539 Space occupied by the chaining cell in bytes: nop is for padding, 540 jump 0, the target 0 is 4 bytes aligned. 541 Space for predicted chaining: 5 words = 20 bytes 542 */ 543 int elemSize = 0; 544 if (i == kChainingCellInvokePredicted) { 545 elemSize = 20; 546 } 547 COMPILER_TRACE_CHAINING( 548 ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i])); 549 550 for (j = 0; j < pChainCellCounts->u.count[i]; j++) { 551 switch(i) { 552 case kChainingCellNormal: 553 case kChainingCellHot: 554 case kChainingCellInvokeSingleton: 555 case kChainingCellBackwardBranch: 556 COMPILER_TRACE_CHAINING( 557 ALOGI("Jit Runtime: unchaining of normal, hot, or singleton")); 558 pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03)); 559 elemSize = 4+5+5+2; 560 memset(pChainCells, 0, 4); 561 break; 562 case kChainingCellInvokePredicted: 563 COMPILER_TRACE_CHAINING( 564 ALOGI("Jit Runtime: unchaining of predicted")); 565 /* 4-byte aligned */ 566 padding = (4 - ((u4)pChainCells & 3)) & 3; 567 pChainCells += padding; 568 predChainCell = (PredictedChainingCell *) pChainCells; 569 /* 570 * There could be a race on another mutator thread to use 571 * this particular predicted cell and the check has passed 572 * the clazz comparison. So we cannot safely wipe the 573 * method and branch but it is safe to clear the clazz, 574 * which serves as the key. 575 */ 576 predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT; 577 break; 578 default: 579 ALOGE("Unexpected chaining type: %d", i); 580 dvmAbort(); // dvmAbort OK here - can't safely recover 581 } 582 COMPILER_TRACE_CHAINING( 583 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells)); 584 pChainCells += elemSize; /* Advance by a fixed number of bytes */ 585 } 586 } 587 return NULL; 588 } 589 590 /* Unchain all translation in the cache. */ 591 void dvmJitUnchainAll() 592 { 593 ALOGV("Jit Runtime: unchaining all"); 594 if (gDvmJit.pJitEntryTable != NULL) { 595 COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all")); 596 dvmLockMutex(&gDvmJit.tableLock); 597 598 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 599 600 for (size_t i = 0; i < gDvmJit.jitTableSize; i++) { 601 if (gDvmJit.pJitEntryTable[i].dPC && 602 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry && 603 gDvmJit.pJitEntryTable[i].codeAddress) { 604 dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress); 605 } 606 } 607 608 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 609 610 dvmUnlockMutex(&gDvmJit.tableLock); 611 gDvmJit.translationChains = 0; 612 } 613 gDvmJit.hasNewChain = false; 614 } 615 616 #define P_GPR_1 PhysicalReg_EBX 617 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/ 618 static void insertJumpHelp() 619 { 620 int rem = (uint)stream % 4; 621 int nop_size = 3 - rem; 622 dump_nop(nop_size); 623 unconditional_jump_int(0, OpndSize_32); 624 return; 625 } 626 627 /* Chaining cell for code that may need warmup. */ 628 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?) 629 blx r0 630 data 0xb23a //bytecode address: 0x5115b23a 631 data 0x5115 632 IA32 assembly: 633 jmp 0 //5 bytes 634 movl address, %ebx 635 movl dvmJitToInterpNormal, %eax 636 call %eax 637 <-- return address 638 */ 639 static void handleNormalChainingCell(CompilationUnit *cUnit, 640 unsigned int offset, int blockId, LowOpBlockLabel* labelList) 641 { 642 ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x", 643 cUnit->method->name, blockId, offset, stream - streamMethodStart); 644 if(dump_x86_inst) 645 ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p", 646 offset, stream - streamMethodStart, stream); 647 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 648 * reslove the multithreading issue. 649 */ 650 insertJumpHelp(); 651 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); 652 scratchRegs[0] = PhysicalReg_EAX; 653 call_dvmJitToInterpNormal(); 654 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ 655 } 656 657 /* 658 * Chaining cell for instructions that immediately following already translated 659 * code. 660 */ 661 static void handleHotChainingCell(CompilationUnit *cUnit, 662 unsigned int offset, int blockId, LowOpBlockLabel* labelList) 663 { 664 ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x", 665 cUnit->method->name, blockId, offset, stream - streamMethodStart); 666 if(dump_x86_inst) 667 ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p", 668 offset, stream - streamMethodStart, stream); 669 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 670 * reslove the multithreading issue. 671 */ 672 insertJumpHelp(); 673 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); 674 scratchRegs[0] = PhysicalReg_EAX; 675 call_dvmJitToInterpTraceSelect(); 676 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ 677 } 678 679 /* Chaining cell for branches that branch back into the same basic block */ 680 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit, 681 unsigned int offset, int blockId, LowOpBlockLabel* labelList) 682 { 683 ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x", 684 cUnit->method->name, blockId, offset, stream - streamMethodStart); 685 if(dump_x86_inst) 686 ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p", 687 offset, stream - streamMethodStart, stream); 688 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 689 * reslove the multithreading issue. 690 */ 691 insertJumpHelp(); 692 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); 693 scratchRegs[0] = PhysicalReg_EAX; 694 call_dvmJitToInterpNormal(); 695 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ 696 } 697 698 /* Chaining cell for monomorphic method invocations. */ 699 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit, 700 const Method *callee, int blockId, LowOpBlockLabel* labelList) 701 { 702 ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x", 703 cUnit->method->name, blockId, callee->name, stream - streamMethodStart); 704 if(dump_x86_inst) 705 ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p", 706 blockId, stream - streamMethodStart, stream); 707 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 708 * reslove the multithreading issue. 709 */ 710 insertJumpHelp(); 711 move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); 712 scratchRegs[0] = PhysicalReg_EAX; 713 call_dvmJitToInterpTraceSelect(); 714 //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */ 715 } 716 #undef P_GPR_1 717 718 /* Chaining cell for monomorphic method invocations. */ 719 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId) 720 { 721 if(dump_x86_inst) 722 ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p", 723 blockId, stream - streamMethodStart, stream); 724 #ifndef PREDICTED_CHAINING 725 //assume rPC for callee->insns in %ebx 726 scratchRegs[0] = PhysicalReg_EAX; 727 #if defined(WITH_JIT_TUNING) 728 /* Predicted chaining is not enabled. Fall back to interpreter and 729 * indicate that predicted chaining was not done. 730 */ 731 move_imm_to_reg(OpndSize_32, kInlineCacheMiss, PhysicalReg_EDX, true); 732 #endif 733 call_dvmJitToInterpTraceSelectNoChain(); 734 #else 735 /* make sure section for predicited chaining cell is 4-byte aligned */ 736 //int padding = (4 - ((u4)stream & 3)) & 3; 737 //stream += padding; 738 int* streamData = (int*)stream; 739 /* Should not be executed in the initial state */ 740 streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT; 741 streamData[1] = 0; 742 /* To be filled: class */ 743 streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT; 744 /* To be filled: method */ 745 streamData[3] = PREDICTED_CHAIN_METHOD_INIT; 746 /* 747 * Rechain count. The initial value of 0 here will trigger chaining upon 748 * the first invocation of this callsite. 749 */ 750 streamData[4] = PREDICTED_CHAIN_COUNTER_INIT; 751 #if 0 752 ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)), 753 *((int*)(stream+8)), *((int*)(stream+12))); 754 #endif 755 stream += 20; //5 *4 756 #endif 757 } 758 759 /* Load the Dalvik PC into r0 and jump to the specified target */ 760 static void handlePCReconstruction(CompilationUnit *cUnit, 761 LowOpBlockLabel *targetLabel) 762 { 763 #if 0 764 LowOp **pcrLabel = 765 (LowOp **) cUnit->pcReconstructionList.elemList; 766 int numElems = cUnit->pcReconstructionList.numUsed; 767 int i; 768 for (i = 0; i < numElems; i++) { 769 dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]); 770 /* r0 = dalvik PC */ 771 loadConstant(cUnit, r0, pcrLabel[i]->operands[0]); 772 genUnconditionalBranch(cUnit, targetLabel); 773 } 774 #endif 775 } 776 777 //use O0 code generator for hoisted checks outside of the loop 778 /* 779 * vA = arrayReg; 780 * vB = idxReg; 781 * vC = endConditionReg; 782 * arg[0] = maxC 783 * arg[1] = minC 784 * arg[2] = loopBranchConditionCode 785 */ 786 #define P_GPR_1 PhysicalReg_EBX 787 #define P_GPR_2 PhysicalReg_ECX 788 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) 789 { 790 /* 791 * NOTE: these synthesized blocks don't have ssa names assigned 792 * for Dalvik registers. However, because they dominate the following 793 * blocks we can simply use the Dalvik name w/ subscript 0 as the 794 * ssa name. 795 */ 796 DecodedInstruction *dInsn = &mir->dalvikInsn; 797 const int maxC = dInsn->arg[0]; 798 799 /* assign array in virtual register to P_GPR_1 */ 800 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); 801 /* assign index in virtual register to P_GPR_2 */ 802 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true); 803 export_pc(); 804 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); 805 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); 806 int delta = maxC; 807 /* 808 * If the loop end condition is ">=" instead of ">", then the largest value 809 * of the index is "endCondition - 1". 810 */ 811 if (dInsn->arg[2] == OP_IF_GE) { 812 delta--; 813 } 814 815 if (delta < 0) { //+delta 816 //if P_GPR_2 is mapped to a VR, we can't do this 817 alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true); 818 } else if(delta > 0) { 819 alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true); 820 } 821 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); 822 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); 823 } 824 825 /* 826 * vA = arrayReg; 827 * vB = idxReg; 828 * vC = endConditionReg; 829 * arg[0] = maxC 830 * arg[1] = minC 831 * arg[2] = loopBranchConditionCode 832 */ 833 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir) 834 { 835 DecodedInstruction *dInsn = &mir->dalvikInsn; 836 const int maxC = dInsn->arg[0]; 837 838 /* assign array in virtual register to P_GPR_1 */ 839 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); 840 /* assign index in virtual register to P_GPR_2 */ 841 get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true); 842 export_pc(); 843 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); 844 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); 845 846 if (maxC < 0) { 847 //if P_GPR_2 is mapped to a VR, we can't do this 848 alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true); 849 } else if(maxC > 0) { 850 alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true); 851 } 852 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); 853 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); 854 855 } 856 #undef P_GPR_1 857 #undef P_GPR_2 858 859 /* 860 * vA = idxReg; 861 * vB = minC; 862 */ 863 #define P_GPR_1 PhysicalReg_ECX 864 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir) 865 { 866 DecodedInstruction *dInsn = &mir->dalvikInsn; 867 const int minC = dInsn->vB; 868 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array 869 export_pc(); 870 compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true); 871 condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId); 872 } 873 #undef P_GPR_1 874 875 #ifdef WITH_JIT_INLINING 876 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir) 877 { 878 CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo; 879 if(gDvm.executionMode == kExecutionModeNcgO0) { 880 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true); 881 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true); 882 compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true); 883 export_pc(); //use %edx 884 conditional_jump_global_API(, Condition_E, "common_errNullObject", false); 885 move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true); 886 compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true); 887 } else { 888 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false); 889 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false); 890 nullCheck(5, false, 1, mir->dalvikInsn.vC); 891 move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false); 892 compare_reg_reg(4, false, 6, false); 893 } 894 895 //immdiate will be updated later in genLandingPadForMispredictedCallee 896 streamMisPred = stream; 897 callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8); 898 } 899 #endif 900 901 /* Extended MIR instructions like PHI */ 902 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir) 903 { 904 ExecutionMode origMode = gDvm.executionMode; 905 gDvm.executionMode = kExecutionModeNcgO0; 906 switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) { 907 case kMirOpPhi: { 908 break; 909 } 910 case kMirOpNullNRangeUpCheck: { 911 genHoistedChecksForCountUpLoop(cUnit, mir); 912 break; 913 } 914 case kMirOpNullNRangeDownCheck: { 915 genHoistedChecksForCountDownLoop(cUnit, mir); 916 break; 917 } 918 case kMirOpLowerBound: { 919 genHoistedLowerBoundCheck(cUnit, mir); 920 break; 921 } 922 case kMirOpPunt: { 923 break; 924 } 925 #ifdef WITH_JIT_INLINING 926 case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c 927 genValidationForPredictedInline(cUnit, mir); 928 break; 929 } 930 #endif 931 default: 932 break; 933 } 934 gDvm.executionMode = origMode; 935 } 936 937 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry, 938 int bodyId) 939 { 940 /* 941 * Next, create two branches - one branch over to the loop body and the 942 * other branch to the PCR cell to punt. 943 */ 944 //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId); 945 //setupResourceMasks(branchToBody); 946 //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody); 947 948 #if 0 949 LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true); 950 branchToPCR->opCode = kThumbBUncond; 951 branchToPCR->generic.target = (LIR *) pcrLabel; 952 setupResourceMasks(branchToPCR); 953 cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR; 954 #endif 955 } 956 957 /* check whether we can merge the block at index i with its target block */ 958 bool mergeBlock(BasicBlock *bb) { 959 if(bb->blockType == kDalvikByteCode && 960 bb->firstMIRInsn != NULL && 961 (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 || 962 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO || 963 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) && 964 bb->fallThrough == NULL) {// && 965 //cUnit->hasLoop) { 966 //ALOGI("merge blocks ending with goto at index %d", i); 967 MIR* prevInsn = bb->lastMIRInsn->prev; 968 if(bb->taken == NULL) return false; 969 MIR* mergeInsn = bb->taken->firstMIRInsn; 970 if(mergeInsn == NULL) return false; 971 if(prevInsn == NULL) {//the block has a single instruction 972 bb->firstMIRInsn = mergeInsn; 973 } else { 974 prevInsn->next = mergeInsn; //remove goto from the chain 975 } 976 mergeInsn->prev = prevInsn; 977 bb->lastMIRInsn = bb->taken->lastMIRInsn; 978 bb->taken->firstMIRInsn = NULL; //block being merged in 979 bb->fallThrough = bb->taken->fallThrough; 980 bb->taken = bb->taken->taken; 981 return true; 982 } 983 return false; 984 } 985 986 static int genTraceProfileEntry(CompilationUnit *cUnit) 987 { 988 cUnit->headerSize = 6; 989 if ((gDvmJit.profileMode == kTraceProfilingContinuous) || 990 (gDvmJit.profileMode == kTraceProfilingDisabled)) { 991 return 12; 992 } else { 993 return 4; 994 } 995 996 } 997 998 #define PRINT_BUFFER_LEN 1024 999 /* Print the code block in code cache in the range of [startAddr, endAddr) 1000 * in readable format. 1001 */ 1002 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr) 1003 { 1004 char strbuf[PRINT_BUFFER_LEN]; 1005 unsigned char *addr; 1006 unsigned char *next_addr; 1007 int n; 1008 1009 if (gDvmJit.printBinary) { 1010 // print binary in bytes 1011 n = 0; 1012 for (addr = startAddr; addr < endAddr; addr++) { 1013 n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr); 1014 if (n > PRINT_BUFFER_LEN - 10) { 1015 ALOGD("## %s", strbuf); 1016 n = 0; 1017 } 1018 } 1019 if (n > 0) 1020 ALOGD("## %s", strbuf); 1021 } 1022 1023 // print disassembled instructions 1024 addr = startAddr; 1025 while (addr < endAddr) { 1026 next_addr = reinterpret_cast<unsigned char*> 1027 (decoder_disassemble_instr(reinterpret_cast<char*>(addr), 1028 strbuf, PRINT_BUFFER_LEN)); 1029 if (addr != next_addr) { 1030 ALOGD("** %p: %s", addr, strbuf); 1031 } else { // check whether this is nop padding 1032 if (addr[0] == 0x90) { 1033 ALOGD("** %p: NOP (1 byte)", addr); 1034 next_addr += 1; 1035 } else if (addr[0] == 0x66 && addr[1] == 0x90) { 1036 ALOGD("** %p: NOP (2 bytes)", addr); 1037 next_addr += 2; 1038 } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) { 1039 ALOGD("** %p: NOP (3 bytes)", addr); 1040 next_addr += 3; 1041 } else { 1042 ALOGD("** unable to decode binary at %p", addr); 1043 break; 1044 } 1045 } 1046 addr = next_addr; 1047 } 1048 } 1049 1050 /* 4 is the number of additional bytes needed for chaining information for trace: 1051 * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */ 1052 #define EXTRA_BYTES_FOR_CHAINING 4 1053 1054 /* Entry function to invoke the backend of the JIT compiler */ 1055 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info) 1056 { 1057 dump_x86_inst = cUnit->printMe; 1058 /* Used to hold the labels of each block */ 1059 LowOpBlockLabel *labelList = 1060 (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c 1061 LowOp *headLIR = NULL; 1062 GrowableList chainingListByType[kChainingCellLast]; 1063 unsigned int i, padding; 1064 1065 /* 1066 * Initialize various types chaining lists. 1067 */ 1068 for (i = 0; i < kChainingCellLast; i++) { 1069 dvmInitGrowableList(&chainingListByType[i], 2); 1070 } 1071 1072 /* Clear the visited flag for each block */ 1073 dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag, 1074 kAllNodes, false /* isIterative */); 1075 1076 GrowableListIterator iterator; 1077 dvmGrowableListIteratorInit(&cUnit->blockList, &iterator); 1078 1079 /* Traces start with a profiling entry point. Generate it here */ 1080 cUnit->profileCodeSize = genTraceProfileEntry(cUnit); 1081 1082 //BasicBlock **blockList = cUnit->blockList; 1083 GrowableList *blockList = &cUnit->blockList; 1084 BasicBlock *bb; 1085 1086 info->codeAddress = NULL; 1087 stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed; 1088 streamStart = stream; /* trace start before alignment */ 1089 1090 // TODO: compile into a temporary buffer and then copy into the code cache. 1091 // That would let us leave the code cache unprotected for a shorter time. 1092 size_t unprotected_code_cache_bytes = 1093 gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed; 1094 UNPROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes); 1095 1096 stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */ 1097 stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */ 1098 streamMethodStart = stream; /* code start */ 1099 for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) { 1100 labelList[i].lop.generic.offset = -1; 1101 } 1102 cUnit->exceptionBlockId = -1; 1103 for (i = 0; i < blockList->numUsed; i++) { 1104 bb = (BasicBlock *) blockList->elemList[i]; 1105 if(bb->blockType == kExceptionHandling) 1106 cUnit->exceptionBlockId = i; 1107 } 1108 startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit); 1109 if(gDvm.executionMode == kExecutionModeNcgO1) { 1110 //merge blocks ending with "goto" with the fall through block 1111 if (cUnit->jitMode != kJitLoop) 1112 for (i = 0; i < blockList->numUsed; i++) { 1113 bb = (BasicBlock *) blockList->elemList[i]; 1114 bool merged = mergeBlock(bb); 1115 while(merged) merged = mergeBlock(bb); 1116 } 1117 for (i = 0; i < blockList->numUsed; i++) { 1118 bb = (BasicBlock *) blockList->elemList[i]; 1119 if(bb->blockType == kDalvikByteCode && 1120 bb->firstMIRInsn != NULL) { 1121 preprocessingBB(bb); 1122 } 1123 } 1124 preprocessingTrace(); 1125 } 1126 1127 /* Handle the content in each basic block */ 1128 for (i = 0; ; i++) { 1129 MIR *mir; 1130 bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator); 1131 if (bb == NULL) break; 1132 if (bb->visited == true) continue; 1133 1134 labelList[i].immOpnd.value = bb->startOffset; 1135 1136 if (bb->blockType >= kChainingCellLast) { 1137 /* 1138 * Append the label pseudo LIR first. Chaining cells will be handled 1139 * separately afterwards. 1140 */ 1141 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]); 1142 } 1143 1144 if (bb->blockType == kEntryBlock) { 1145 labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK; 1146 if (bb->firstMIRInsn == NULL) { 1147 continue; 1148 } else { 1149 setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id); 1150 //&labelList[blockList[i]->fallThrough->id]); 1151 } 1152 } else if (bb->blockType == kExitBlock) { 1153 labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK; 1154 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1155 goto gen_fallthrough; 1156 } else if (bb->blockType == kDalvikByteCode) { 1157 if (bb->hidden == true) continue; 1158 labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL; 1159 /* Reset the register state */ 1160 #if 0 1161 resetRegisterScoreboard(cUnit); 1162 #endif 1163 } else { 1164 switch (bb->blockType) { 1165 case kChainingCellNormal: 1166 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL; 1167 /* handle the codegen later */ 1168 dvmInsertGrowableList( 1169 &chainingListByType[kChainingCellNormal], i); 1170 break; 1171 case kChainingCellInvokeSingleton: 1172 labelList[i].lop.opCode2 = 1173 ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON; 1174 labelList[i].immOpnd.value = 1175 (int) bb->containingMethod; 1176 /* handle the codegen later */ 1177 dvmInsertGrowableList( 1178 &chainingListByType[kChainingCellInvokeSingleton], i); 1179 break; 1180 case kChainingCellInvokePredicted: 1181 labelList[i].lop.opCode2 = 1182 ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED; 1183 /* 1184 * Move the cached method pointer from operand 1 to 0. 1185 * Operand 0 was clobbered earlier in this routine to store 1186 * the block starting offset, which is not applicable to 1187 * predicted chaining cell. 1188 */ 1189 //TODO 1190 //labelList[i].operands[0] = labelList[i].operands[1]; 1191 1192 /* handle the codegen later */ 1193 dvmInsertGrowableList( 1194 &chainingListByType[kChainingCellInvokePredicted], i); 1195 break; 1196 case kChainingCellHot: 1197 labelList[i].lop.opCode2 = 1198 ATOM_PSEUDO_CHAINING_CELL_HOT; 1199 /* handle the codegen later */ 1200 dvmInsertGrowableList( 1201 &chainingListByType[kChainingCellHot], i); 1202 break; 1203 case kPCReconstruction: 1204 /* Make sure exception handling block is next */ 1205 labelList[i].lop.opCode2 = 1206 ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL; 1207 //assert (i == cUnit->numBlocks - 2); 1208 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1209 handlePCReconstruction(cUnit, 1210 &labelList[cUnit->puntBlock->id]); 1211 break; 1212 case kExceptionHandling: 1213 labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL; 1214 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1215 //if (cUnit->pcReconstructionList.numUsed) { 1216 scratchRegs[0] = PhysicalReg_EAX; 1217 jumpToInterpPunt(); 1218 //call_dvmJitToInterpPunt(); 1219 //} 1220 break; 1221 case kChainingCellBackwardBranch: 1222 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH; 1223 /* handle the codegen later */ 1224 dvmInsertGrowableList( 1225 &chainingListByType[kChainingCellBackwardBranch], 1226 i); 1227 break; 1228 default: 1229 break; 1230 } 1231 continue; 1232 } 1233 { 1234 //LowOp *headLIR = NULL; 1235 const DexCode *dexCode = dvmGetMethodCode(cUnit->method); 1236 const u2 *startCodePtr = dexCode->insns; 1237 const u2 *codePtr; 1238 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1239 ALOGV("get ready to handle JIT bb %d type %d hidden %d", 1240 bb->id, bb->blockType, bb->hidden); 1241 for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) { 1242 bb = nextBB; 1243 bb->visited = true; 1244 cUnit->nextCodegenBlock = NULL; 1245 1246 if(gDvm.executionMode == kExecutionModeNcgO1 && 1247 bb->blockType != kEntryBlock && 1248 bb->firstMIRInsn != NULL) { 1249 startOfBasicBlock(bb); 1250 int cg_ret = codeGenBasicBlockJit(cUnit->method, bb); 1251 endOfBasicBlock(bb); 1252 if(cg_ret < 0) { 1253 endOfTrace(true/*freeOnly*/); 1254 cUnit->baseAddr = NULL; 1255 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes); 1256 return; 1257 } 1258 } else { 1259 for (mir = bb->firstMIRInsn; mir; mir = mir->next) { 1260 startOfBasicBlock(bb); //why here for O0 1261 Opcode dalvikOpCode = mir->dalvikInsn.opcode; 1262 if((int)dalvikOpCode >= (int)kMirOpFirst) { 1263 handleExtendedMIR(cUnit, mir); 1264 continue; 1265 } 1266 InstructionFormat dalvikFormat = 1267 dexGetFormatFromOpcode(dalvikOpCode); 1268 ALOGV("ready to handle bytecode at offset %x: opcode %d format %d", 1269 mir->offset, dalvikOpCode, dalvikFormat); 1270 LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset); 1271 /* Remember the first LIR for this block */ 1272 if (headLIR == NULL) { 1273 headLIR = (LowOp*)boundaryLIR; 1274 } 1275 bool notHandled = true; 1276 /* 1277 * Debugging: screen the opcode first to see if it is in the 1278 * do[-not]-compile list 1279 */ 1280 bool singleStepMe = 1281 gDvmJit.includeSelectedOp != 1282 ((gDvmJit.opList[dalvikOpCode >> 3] & 1283 (1 << (dalvikOpCode & 0x7))) != 1284 0); 1285 if (singleStepMe || cUnit->allSingleStep) { 1286 } else { 1287 codePtr = startCodePtr + mir->offset; 1288 //lower each byte code, update LIR 1289 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir); 1290 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) + 1291 CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { 1292 ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart)); 1293 gDvmJit.codeCacheFull = true; 1294 cUnit->baseAddr = NULL; 1295 endOfTrace(true/*freeOnly*/); 1296 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes); 1297 return; 1298 } 1299 } 1300 if (notHandled) { 1301 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled", 1302 mir->offset, 1303 dalvikOpCode, dexGetOpcodeName(dalvikOpCode), 1304 dalvikFormat); 1305 dvmAbort(); 1306 break; 1307 } 1308 } // end for 1309 } // end else //JIT + O0 code generator 1310 } 1311 } // end for 1312 /* Eliminate redundant loads/stores and delay stores into later slots */ 1313 #if 0 1314 dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR, 1315 cUnit->lastLIRInsn); 1316 #endif 1317 if (headLIR) headLIR = NULL; 1318 gen_fallthrough: 1319 /* 1320 * Check if the block is terminated due to trace length constraint - 1321 * insert an unconditional branch to the chaining cell. 1322 */ 1323 if (bb->needFallThroughBranch) { 1324 jumpToBasicBlock(stream, bb->fallThrough->id); 1325 } 1326 1327 } 1328 1329 char* streamChainingStart = (char*)stream; 1330 /* Handle the chaining cells in predefined order */ 1331 for (i = 0; i < kChainingCellGap; i++) { 1332 size_t j; 1333 int *blockIdList = (int *) chainingListByType[i].elemList; 1334 1335 cUnit->numChainingCells[i] = chainingListByType[i].numUsed; 1336 1337 /* No chaining cells of this type */ 1338 if (cUnit->numChainingCells[i] == 0) 1339 continue; 1340 1341 /* Record the first LIR for a new type of chaining cell */ 1342 cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]]; 1343 for (j = 0; j < chainingListByType[i].numUsed; j++) { 1344 int blockId = blockIdList[j]; 1345 BasicBlock *chainingBlock = 1346 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 1347 blockId); 1348 1349 labelList[blockId].lop.generic.offset = (stream - streamMethodStart); 1350 1351 /* Align this chaining cell first */ 1352 #if 0 1353 newLIR0(cUnit, ATOM_PSEUDO_ALIGN4); 1354 #endif 1355 /* Insert the pseudo chaining instruction */ 1356 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]); 1357 1358 1359 switch (chainingBlock->blockType) { 1360 case kChainingCellNormal: 1361 handleNormalChainingCell(cUnit, 1362 chainingBlock->startOffset, blockId, labelList); 1363 break; 1364 case kChainingCellInvokeSingleton: 1365 handleInvokeSingletonChainingCell(cUnit, 1366 chainingBlock->containingMethod, blockId, labelList); 1367 break; 1368 case kChainingCellInvokePredicted: 1369 handleInvokePredictedChainingCell(cUnit, blockId); 1370 break; 1371 case kChainingCellHot: 1372 handleHotChainingCell(cUnit, 1373 chainingBlock->startOffset, blockId, labelList); 1374 break; 1375 case kChainingCellBackwardBranch: 1376 handleBackwardBranchChainingCell(cUnit, 1377 chainingBlock->startOffset, blockId, labelList); 1378 break; 1379 default: 1380 ALOGE("Bad blocktype %d", chainingBlock->blockType); 1381 dvmAbort(); 1382 break; 1383 } 1384 1385 if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { 1386 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart)); 1387 gDvmJit.codeCacheFull = true; 1388 cUnit->baseAddr = NULL; 1389 endOfTrace(true); /* need to free structures */ 1390 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes); 1391 return; 1392 } 1393 } 1394 } 1395 #if 0 1396 dvmCompilerApplyGlobalOptimizations(cUnit); 1397 #endif 1398 endOfTrace(false); 1399 1400 if (gDvmJit.codeCacheFull) { 1401 /* We hit code cache size limit inside endofTrace(false). 1402 * Bail out for this trace! 1403 */ 1404 ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart)); 1405 cUnit->baseAddr = NULL; 1406 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes); 1407 return; 1408 } 1409 1410 /* dump section for chaining cell counts, make sure it is 4-byte aligned */ 1411 padding = (4 - ((u4)stream & 3)) & 3; 1412 stream += padding; 1413 ChainCellCounts chainCellCounts; 1414 /* Install the chaining cell counts */ 1415 for (i=0; i< kChainingCellGap; i++) { 1416 chainCellCounts.u.count[i] = cUnit->numChainingCells[i]; 1417 } 1418 char* streamCountStart = (char*)stream; 1419 memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts)); 1420 stream += sizeof(chainCellCounts); 1421 1422 cUnit->baseAddr = streamMethodStart; 1423 cUnit->totalSize = (stream - streamStart); 1424 if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { 1425 ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart)); 1426 gDvmJit.codeCacheFull = true; 1427 cUnit->baseAddr = NULL; 1428 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes); 1429 return; 1430 } 1431 1432 /* write chaining cell count offset & chaining cell offset */ 1433 u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */ 1434 *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */ 1435 pOffset[1] = streamChainingStart - streamMethodStart; 1436 1437 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes); 1438 1439 gDvmJit.codeCacheByteUsed += (stream - streamStart); 1440 if (cUnit->printMe) { 1441 unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr; 1442 unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed; 1443 ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p", 1444 cUnit->method->clazz->descriptor, cUnit->method->name, 1445 codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache); 1446 ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor, 1447 cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset); 1448 printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext); 1449 } 1450 ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr, 1451 (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed, 1452 cUnit->totalSize, gDvmJit.codeCache); 1453 1454 gDvmJit.numCompilations++; 1455 1456 info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize; 1457 } 1458 1459 /* 1460 * Perform translation chain operation. 1461 */ 1462 void* dvmJitChain(void* tgtAddr, u4* branchAddr) 1463 { 1464 #ifdef JIT_CHAIN 1465 int relOffset = (int) tgtAddr - (int)branchAddr; 1466 1467 if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) && 1468 (gDvmJit.codeCacheFull == false)) { 1469 1470 gDvmJit.translationChains++; 1471 1472 //OpndSize immSize = estOpndSizeFromImm(relOffset); 1473 //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); 1474 /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in 1475 * the original code sequence. 1476 */ 1477 OpndSize immSize = OpndSize_32; 1478 relOffset -= 5; 1479 //can't use stream here since it is used by the compilation thread 1480 UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); 1481 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr 1482 PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); 1483 1484 gDvmJit.hasNewChain = true; 1485 1486 COMPILER_TRACE_CHAINING( 1487 ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x", 1488 (int) branchAddr, tgtAddr, relOffset)); 1489 } 1490 #endif 1491 return tgtAddr; 1492 } 1493 1494 /* 1495 * Accept the work and start compiling. Returns true if compilation 1496 * is attempted. 1497 */ 1498 bool dvmCompilerDoWork(CompilerWorkOrder *work) 1499 { 1500 JitTraceDescription *desc; 1501 bool isCompile; 1502 bool success = true; 1503 1504 if (gDvmJit.codeCacheFull) { 1505 return false; 1506 } 1507 1508 switch (work->kind) { 1509 case kWorkOrderTrace: 1510 isCompile = true; 1511 /* Start compilation with maximally allowed trace length */ 1512 desc = (JitTraceDescription *)work->info; 1513 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, 1514 work->bailPtr, 0 /* no hints */); 1515 break; 1516 case kWorkOrderTraceDebug: { 1517 bool oldPrintMe = gDvmJit.printMe; 1518 gDvmJit.printMe = true; 1519 isCompile = true; 1520 /* Start compilation with maximally allowed trace length */ 1521 desc = (JitTraceDescription *)work->info; 1522 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, 1523 work->bailPtr, 0 /* no hints */); 1524 gDvmJit.printMe = oldPrintMe; 1525 break; 1526 } 1527 case kWorkOrderProfileMode: 1528 dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info); 1529 isCompile = false; 1530 break; 1531 default: 1532 isCompile = false; 1533 ALOGE("Jit: unknown work order type"); 1534 assert(0); // Bail if debug build, discard otherwise 1535 } 1536 if (!success) 1537 work->result.codeAddress = NULL; 1538 return isCompile; 1539 } 1540 1541 void dvmCompilerCacheFlush(long start, long end, long flags) { 1542 /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */ 1543 } 1544 1545 //#endif 1546