1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <sys/mman.h> 17 #include "Dalvik.h" 18 #include "libdex/DexOpcodes.h" 19 #include "compiler/Compiler.h" 20 #include "compiler/CompilerIR.h" 21 #include "interp/Jit.h" 22 #include "libdex/DexFile.h" 23 #include "Lower.h" 24 #include "NcgAot.h" 25 #include "compiler/codegen/CompilerCodegen.h" 26 27 /* Init values when a predicted chain is initially assembled */ 28 /* E7FE is branch to self */ 29 #define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe 30 31 /* Target-specific save/restore */ 32 extern "C" void dvmJitCalleeSave(double *saveArea); 33 extern "C" void dvmJitCalleeRestore(double *saveArea); 34 35 /* 36 * Determine the initial instruction set to be used for this trace. 37 * Later components may decide to change this. 38 */ 39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit) 40 JitInstructionSetType dvmCompilerInstructionSet(void) 41 { 42 return DALVIK_JIT_IA32; 43 } 44 45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet() 46 { 47 return DALVIK_JIT_IA32; 48 } 49 50 /* we don't use template for IA32 */ 51 void *dvmCompilerGetInterpretTemplate() 52 { 53 return NULL; 54 } 55 56 /* Track the number of times that the code cache is patched */ 57 #if defined(WITH_JIT_TUNING) 58 #define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++) 59 #else 60 #define UPDATE_CODE_CACHE_PATCHES() 61 #endif 62 63 bool dvmCompilerArchInit() { 64 /* Target-specific configuration */ 65 gDvmJit.jitTableSize = 1 << 12; 66 gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1; 67 gDvmJit.threshold = 255; 68 gDvmJit.codeCacheSize = 512*1024; 69 gDvmJit.optLevel = kJitOptLevelO1; 70 71 #if defined(WITH_SELF_VERIFICATION) 72 /* Force into blocking mode */ 73 gDvmJit.blockingMode = true; 74 gDvm.nativeDebuggerActive = true; 75 #endif 76 77 // Make sure all threads have current values 78 dvmJitUpdateThreadStateAll(); 79 80 return true; 81 } 82 83 void dvmCompilerPatchInlineCache(void) 84 { 85 int i; 86 PredictedChainingCell *minAddr, *maxAddr; 87 88 /* Nothing to be done */ 89 if (gDvmJit.compilerICPatchIndex == 0) return; 90 91 /* 92 * Since all threads are already stopped we don't really need to acquire 93 * the lock. But race condition can be easily introduced in the future w/o 94 * paying attention so we still acquire the lock here. 95 */ 96 dvmLockMutex(&gDvmJit.compilerICPatchLock); 97 98 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 99 100 //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex); 101 102 /* Initialize the min/max address range */ 103 minAddr = (PredictedChainingCell *) 104 ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize); 105 maxAddr = (PredictedChainingCell *) gDvmJit.codeCache; 106 107 for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) { 108 ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i]; 109 PredictedChainingCell *cellAddr = workOrder->cellAddr; 110 PredictedChainingCell *cellContent = &workOrder->cellContent; 111 ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor, 112 workOrder->classLoader); 113 114 assert(clazz->serialNumber == workOrder->serialNumber); 115 116 /* Use the newly resolved clazz pointer */ 117 cellContent->clazz = clazz; 118 119 if (cellAddr->clazz == NULL) { 120 COMPILER_TRACE_CHAINING( 121 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized", 122 cellAddr, 123 cellContent->clazz->descriptor, 124 cellContent->method->name)); 125 } else { 126 COMPILER_TRACE_CHAINING( 127 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) " 128 "patched", 129 cellAddr, 130 cellAddr->clazz->descriptor, 131 cellContent->clazz->descriptor, 132 cellContent->method->name)); 133 } 134 135 /* Patch the chaining cell */ 136 *cellAddr = *cellContent; 137 minAddr = (cellAddr < minAddr) ? cellAddr : minAddr; 138 maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr; 139 } 140 141 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 142 143 gDvmJit.compilerICPatchIndex = 0; 144 dvmUnlockMutex(&gDvmJit.compilerICPatchLock); 145 } 146 147 /* Target-specific cache clearing */ 148 void dvmCompilerCacheClear(char *start, size_t size) 149 { 150 /* "0xFF 0xFF" is an invalid opcode for x86. */ 151 memset(start, 0xFF, size); 152 } 153 154 /* for JIT debugging, to be implemented */ 155 void dvmJitCalleeSave(double *saveArea) { 156 } 157 158 void dvmJitCalleeRestore(double *saveArea) { 159 } 160 161 void dvmJitToInterpSingleStep() { 162 } 163 164 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc, 165 const JitEntry *knownEntry) { 166 return NULL; 167 } 168 169 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c 170 { 171 } 172 173 void dvmCompilerArchDump(void) 174 { 175 } 176 177 char *getTraceBase(const JitEntry *p) 178 { 179 return NULL; 180 } 181 182 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info) 183 { 184 } 185 186 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress) 187 { 188 } 189 190 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit) 191 { 192 // Method-based JIT not supported for x86. 193 } 194 195 void dvmJitScanAllClassPointers(void (*callback)(void *)) 196 { 197 } 198 199 /* Handy function to retrieve the profile count */ 200 static inline int getProfileCount(const JitEntry *entry) 201 { 202 if (entry->dPC == 0 || entry->codeAddress == 0) 203 return 0; 204 u4 *pExecutionCount = (u4 *) getTraceBase(entry); 205 206 return pExecutionCount ? *pExecutionCount : 0; 207 } 208 209 /* qsort callback function */ 210 static int sortTraceProfileCount(const void *entry1, const void *entry2) 211 { 212 const JitEntry *jitEntry1 = (const JitEntry *)entry1; 213 const JitEntry *jitEntry2 = (const JitEntry *)entry2; 214 215 JitTraceCounter_t count1 = getProfileCount(jitEntry1); 216 JitTraceCounter_t count2 = getProfileCount(jitEntry2); 217 return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1); 218 } 219 220 /* Sort the trace profile counts and dump them */ 221 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c 222 { 223 JitEntry *sortedEntries; 224 int numTraces = 0; 225 unsigned long counts = 0; 226 unsigned int i; 227 228 /* Make sure that the table is not changing */ 229 dvmLockMutex(&gDvmJit.tableLock); 230 231 /* Sort the entries by descending order */ 232 sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize); 233 if (sortedEntries == NULL) 234 goto done; 235 memcpy(sortedEntries, gDvmJit.pJitEntryTable, 236 sizeof(JitEntry) * gDvmJit.jitTableSize); 237 qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry), 238 sortTraceProfileCount); 239 240 /* Dump the sorted entries */ 241 for (i=0; i < gDvmJit.jitTableSize; i++) { 242 if (sortedEntries[i].dPC != 0) { 243 numTraces++; 244 } 245 } 246 if (numTraces == 0) 247 numTraces = 1; 248 ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces)); 249 250 free(sortedEntries); 251 done: 252 dvmUnlockMutex(&gDvmJit.tableLock); 253 return; 254 } 255 256 void jumpWithRelOffset(char* instAddr, int relOffset) { 257 stream = instAddr; 258 OpndSize immSize = estOpndSizeFromImm(relOffset); 259 relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); 260 dump_imm(Mnemonic_JMP, immSize, relOffset); 261 } 262 263 // works whether instructions for target basic block are generated or not 264 LowOp* jumpToBasicBlock(char* instAddr, int targetId) { 265 stream = instAddr; 266 bool unknown; 267 OpndSize size; 268 int relativeNCG = targetId; 269 relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size); 270 unconditional_jump_int(relativeNCG, size); 271 return NULL; 272 } 273 274 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) { 275 stream = instAddr; 276 bool unknown; 277 OpndSize size; 278 int relativeNCG = targetId; 279 relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size); 280 conditional_jump_int(cc, relativeNCG, size); 281 return NULL; 282 } 283 284 /* 285 * Attempt to enqueue a work order to patch an inline cache for a predicted 286 * chaining cell for virtual/interface calls. 287 */ 288 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr, 289 PredictedChainingCell *newContent) 290 { 291 bool result = true; 292 293 /* 294 * Make sure only one thread gets here since updating the cell (ie fast 295 * path and queueing the request (ie the queued path) have to be done 296 * in an atomic fashion. 297 */ 298 dvmLockMutex(&gDvmJit.compilerICPatchLock); 299 300 /* Fast path for uninitialized chaining cell */ 301 if (cellAddr->clazz == NULL && 302 cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) { 303 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 304 305 cellAddr->method = newContent->method; 306 cellAddr->branch = newContent->branch; 307 cellAddr->branch2 = newContent->branch2; 308 309 /* 310 * The update order matters - make sure clazz is updated last since it 311 * will bring the uninitialized chaining cell to life. 312 */ 313 android_atomic_release_store((int32_t)newContent->clazz, 314 (volatile int32_t *)(void*) &cellAddr->clazz); 315 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); 316 UPDATE_CODE_CACHE_PATCHES(); 317 318 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 319 320 #if 0 321 MEM_BARRIER(); 322 cellAddr->clazz = newContent->clazz; 323 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0); 324 #endif 325 #if defined(IA_JIT_TUNING) 326 gDvmJit.icPatchInit++; 327 #endif 328 COMPILER_TRACE_CHAINING( 329 ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p", 330 cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method)); 331 /* Check if this is a frequently missed clazz */ 332 } else if (cellAddr->stagedClazz != newContent->clazz) { 333 /* Not proven to be frequent yet - build up the filter cache */ 334 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 335 336 cellAddr->stagedClazz = newContent->clazz; 337 338 UPDATE_CODE_CACHE_PATCHES(); 339 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 340 341 #if defined(WITH_JIT_TUNING) 342 gDvmJit.icPatchRejected++; 343 #endif 344 /* 345 * Different classes but same method implementation - it is safe to just 346 * patch the class value without the need to stop the world. 347 */ 348 } else if (cellAddr->method == newContent->method) { 349 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 350 351 cellAddr->clazz = newContent->clazz; 352 /* No need to flush the cache here since the branch is not patched */ 353 UPDATE_CODE_CACHE_PATCHES(); 354 355 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr)); 356 357 #if defined(WITH_JIT_TUNING) 358 gDvmJit.icPatchLockFree++; 359 #endif 360 /* 361 * Cannot patch the chaining cell inline - queue it until the next safe 362 * point. 363 */ 364 } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) { 365 int index = gDvmJit.compilerICPatchIndex++; 366 const ClassObject *clazz = newContent->clazz; 367 368 gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr; 369 gDvmJit.compilerICPatchQueue[index].cellContent = *newContent; 370 gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor; 371 gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader; 372 /* For verification purpose only */ 373 gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber; 374 375 #if defined(WITH_JIT_TUNING) 376 gDvmJit.icPatchQueued++; 377 #endif 378 COMPILER_TRACE_CHAINING( 379 ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s", 380 cellAddr, newContent->clazz->descriptor, newContent->method->name)); 381 } else { 382 /* Queue is full - just drop this patch request */ 383 #if defined(WITH_JIT_TUNING) 384 gDvmJit.icPatchDropped++; 385 #endif 386 387 COMPILER_TRACE_CHAINING( 388 ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s", 389 cellAddr, newContent->clazz->descriptor, newContent->method->name)); 390 } 391 392 dvmUnlockMutex(&gDvmJit.compilerICPatchLock); 393 return result; 394 } 395 396 /* 397 * This method is called from the invoke templates for virtual and interface 398 * methods to speculatively setup a chain to the callee. The templates are 399 * written in assembly and have setup method, cell, and clazz at r0, r2, and 400 * r3 respectively, so there is a unused argument in the list. Upon return one 401 * of the following three results may happen: 402 * 1) Chain is not setup because the callee is native. Reset the rechain 403 * count to a big number so that it will take a long time before the next 404 * rechain attempt to happen. 405 * 2) Chain is not setup because the callee has not been created yet. Reset 406 * the rechain count to a small number and retry in the near future. 407 * 3) Ask all other threads to stop before patching this chaining cell. 408 * This is required because another thread may have passed the class check 409 * but hasn't reached the chaining cell yet to follow the chain. If we 410 * patch the content before halting the other thread, there could be a 411 * small window for race conditions to happen that it may follow the new 412 * but wrong chain to invoke a different method. 413 */ 414 const Method *dvmJitToPatchPredictedChain(const Method *method, 415 Thread *self, 416 PredictedChainingCell *cell, 417 const ClassObject *clazz) 418 { 419 int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN; 420 /* Don't come back here for a long time if the method is native */ 421 if (dvmIsNativeMethod(method)) { 422 UNPROTECT_CODE_CACHE(cell, sizeof(*cell)); 423 424 /* 425 * Put a non-zero/bogus value in the clazz field so that it won't 426 * trigger immediate patching and will continue to fail to match with 427 * a real clazz pointer. 428 */ 429 cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ; 430 431 UPDATE_CODE_CACHE_PATCHES(); 432 PROTECT_CODE_CACHE(cell, sizeof(*cell)); 433 COMPILER_TRACE_CHAINING( 434 ALOGI("Jit Runtime: predicted chain %p to native method %s ignored", 435 cell, method->name)); 436 goto done; 437 } 438 { 439 int tgtAddr = (int) dvmJitGetTraceAddr(method->insns); 440 441 /* 442 * Compilation not made yet for the callee. Reset the counter to a small 443 * value and come back to check soon. 444 */ 445 if ((tgtAddr == 0) || 446 ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) { 447 COMPILER_TRACE_CHAINING( 448 ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed", 449 cell, method->clazz->descriptor, method->name)); 450 goto done; 451 } 452 453 PredictedChainingCell newCell; 454 455 if (cell->clazz == NULL) { 456 newRechainCount = self->icRechainCount; 457 } 458 459 int relOffset = (int) tgtAddr - (int)cell; 460 OpndSize immSize = estOpndSizeFromImm(relOffset); 461 int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond); 462 relOffset -= jumpSize; 463 COMPILER_TRACE_CHAINING( 464 ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d", 465 cell, method->clazz->descriptor, method->name, jumpSize)); 466 //can't use stream here since it is used by the compilation thread 467 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch 468 469 newCell.clazz = clazz; 470 newCell.method = method; 471 472 /* 473 * Enter the work order to the queue and the chaining cell will be patched 474 * the next time a safe point is entered. 475 * 476 * If the enqueuing fails reset the rechain count to a normal value so that 477 * it won't get indefinitely delayed. 478 */ 479 inlineCachePatchEnqueue(cell, &newCell); 480 } 481 done: 482 self->icRechainCount = newRechainCount; 483 return method; 484 } 485 486 /* 487 * Unchain a trace given the starting address of the translation 488 * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR. 489 * For ARM, it returns the address following the last cell unchained. 490 * For IA, it returns NULL since cacheflush is not required for IA. 491 */ 492 u4* dvmJitUnchain(void* codeAddr) 493 { 494 /* codeAddr is 4-byte aligned, so is chain cell count offset */ 495 u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4); 496 u2 chainCellCountOffset = *pChainCellCountOffset; 497 /* chain cell counts information is 4-byte aligned */ 498 ChainCellCounts *pChainCellCounts = 499 (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset); 500 u2* pChainCellOffset = (u2*)((char*)codeAddr - 2); 501 u2 chainCellOffset = *pChainCellOffset; 502 u1* pChainCells; 503 int i,j; 504 PredictedChainingCell *predChainCell; 505 int padding; 506 507 /* Locate the beginning of the chain cell region */ 508 pChainCells = (u1 *)((char*)codeAddr + chainCellOffset); 509 510 /* The cells are sorted in order - walk through them and reset */ 511 for (i = 0; i < kChainingCellGap; i++) { 512 /* for hot, normal, singleton chaining: 513 nop //padding. 514 jmp 0 515 mov imm32, reg1 516 mov imm32, reg2 517 call reg2 518 after chaining: 519 nop 520 jmp imm 521 mov imm32, reg1 522 mov imm32, reg2 523 call reg2 524 after unchaining: 525 nop 526 jmp 0 527 mov imm32, reg1 528 mov imm32, reg2 529 call reg2 530 Space occupied by the chaining cell in bytes: nop is for padding, 531 jump 0, the target 0 is 4 bytes aligned. 532 Space for predicted chaining: 5 words = 20 bytes 533 */ 534 int elemSize = 0; 535 if (i == kChainingCellInvokePredicted) { 536 elemSize = 20; 537 } 538 COMPILER_TRACE_CHAINING( 539 ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i])); 540 541 for (j = 0; j < pChainCellCounts->u.count[i]; j++) { 542 switch(i) { 543 case kChainingCellNormal: 544 case kChainingCellHot: 545 case kChainingCellInvokeSingleton: 546 case kChainingCellBackwardBranch: 547 COMPILER_TRACE_CHAINING( 548 ALOGI("Jit Runtime: unchaining of normal, hot, or singleton")); 549 pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03)); 550 elemSize = 4+5+5+2; 551 memset(pChainCells, 0, 4); 552 break; 553 case kChainingCellInvokePredicted: 554 COMPILER_TRACE_CHAINING( 555 ALOGI("Jit Runtime: unchaining of predicted")); 556 /* 4-byte aligned */ 557 padding = (4 - ((u4)pChainCells & 3)) & 3; 558 pChainCells += padding; 559 predChainCell = (PredictedChainingCell *) pChainCells; 560 /* 561 * There could be a race on another mutator thread to use 562 * this particular predicted cell and the check has passed 563 * the clazz comparison. So we cannot safely wipe the 564 * method and branch but it is safe to clear the clazz, 565 * which serves as the key. 566 */ 567 predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT; 568 break; 569 default: 570 ALOGE("Unexpected chaining type: %d", i); 571 dvmAbort(); // dvmAbort OK here - can't safely recover 572 } 573 COMPILER_TRACE_CHAINING( 574 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells)); 575 pChainCells += elemSize; /* Advance by a fixed number of bytes */ 576 } 577 } 578 return NULL; 579 } 580 581 /* Unchain all translation in the cache. */ 582 void dvmJitUnchainAll() 583 { 584 ALOGV("Jit Runtime: unchaining all"); 585 if (gDvmJit.pJitEntryTable != NULL) { 586 COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all")); 587 dvmLockMutex(&gDvmJit.tableLock); 588 589 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 590 591 for (size_t i = 0; i < gDvmJit.jitTableSize; i++) { 592 if (gDvmJit.pJitEntryTable[i].dPC && 593 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry && 594 gDvmJit.pJitEntryTable[i].codeAddress) { 595 dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress); 596 } 597 } 598 599 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed); 600 601 dvmUnlockMutex(&gDvmJit.tableLock); 602 gDvmJit.translationChains = 0; 603 } 604 gDvmJit.hasNewChain = false; 605 } 606 607 #define P_GPR_1 PhysicalReg_EBX 608 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/ 609 static void insertJumpHelp() 610 { 611 int rem = (uint)stream % 4; 612 int nop_size = 3 - rem; 613 dump_nop(nop_size); 614 unconditional_jump_int(0, OpndSize_32); 615 return; 616 } 617 618 /* Chaining cell for code that may need warmup. */ 619 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?) 620 blx r0 621 data 0xb23a //bytecode address: 0x5115b23a 622 data 0x5115 623 IA32 assembly: 624 jmp 0 //5 bytes 625 movl address, %ebx 626 movl dvmJitToInterpNormal, %eax 627 call %eax 628 <-- return address 629 */ 630 static void handleNormalChainingCell(CompilationUnit *cUnit, 631 unsigned int offset, int blockId, LowOpBlockLabel* labelList) 632 { 633 ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x", 634 cUnit->method->name, blockId, offset, stream - streamMethodStart); 635 if(dump_x86_inst) 636 ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p", 637 offset, stream - streamMethodStart, stream); 638 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 639 * reslove the multithreading issue. 640 */ 641 insertJumpHelp(); 642 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); 643 scratchRegs[0] = PhysicalReg_EAX; 644 call_dvmJitToInterpNormal(); 645 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ 646 } 647 648 /* 649 * Chaining cell for instructions that immediately following already translated 650 * code. 651 */ 652 static void handleHotChainingCell(CompilationUnit *cUnit, 653 unsigned int offset, int blockId, LowOpBlockLabel* labelList) 654 { 655 ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x", 656 cUnit->method->name, blockId, offset, stream - streamMethodStart); 657 if(dump_x86_inst) 658 ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p", 659 offset, stream - streamMethodStart, stream); 660 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 661 * reslove the multithreading issue. 662 */ 663 insertJumpHelp(); 664 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); 665 scratchRegs[0] = PhysicalReg_EAX; 666 call_dvmJitToInterpTraceSelect(); 667 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ 668 } 669 670 /* Chaining cell for branches that branch back into the same basic block */ 671 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit, 672 unsigned int offset, int blockId, LowOpBlockLabel* labelList) 673 { 674 ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x", 675 cUnit->method->name, blockId, offset, stream - streamMethodStart); 676 if(dump_x86_inst) 677 ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p", 678 offset, stream - streamMethodStart, stream); 679 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 680 * reslove the multithreading issue. 681 */ 682 insertJumpHelp(); 683 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); 684 scratchRegs[0] = PhysicalReg_EAX; 685 call_dvmJitToInterpNormal(); 686 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */ 687 } 688 689 /* Chaining cell for monomorphic method invocations. */ 690 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit, 691 const Method *callee, int blockId, LowOpBlockLabel* labelList) 692 { 693 ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x", 694 cUnit->method->name, blockId, callee->name, stream - streamMethodStart); 695 if(dump_x86_inst) 696 ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p", 697 blockId, stream - streamMethodStart, stream); 698 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps 699 * reslove the multithreading issue. 700 */ 701 insertJumpHelp(); 702 move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); 703 scratchRegs[0] = PhysicalReg_EAX; 704 call_dvmJitToInterpTraceSelect(); 705 //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */ 706 } 707 #undef P_GPR_1 708 709 /* Chaining cell for monomorphic method invocations. */ 710 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId) 711 { 712 if(dump_x86_inst) 713 ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p", 714 blockId, stream - streamMethodStart, stream); 715 #ifndef PREDICTED_CHAINING 716 //assume rPC for callee->insns in %ebx 717 scratchRegs[0] = PhysicalReg_EAX; 718 call_dvmJitToInterpTraceSelectNoChain(); 719 #else 720 /* make sure section for predicited chaining cell is 4-byte aligned */ 721 //int padding = (4 - ((u4)stream & 3)) & 3; 722 //stream += padding; 723 int* streamData = (int*)stream; 724 /* Should not be executed in the initial state */ 725 streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT; 726 streamData[1] = 0; 727 /* To be filled: class */ 728 streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT; 729 /* To be filled: method */ 730 streamData[3] = PREDICTED_CHAIN_METHOD_INIT; 731 /* 732 * Rechain count. The initial value of 0 here will trigger chaining upon 733 * the first invocation of this callsite. 734 */ 735 streamData[4] = PREDICTED_CHAIN_COUNTER_INIT; 736 #if 0 737 ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)), 738 *((int*)(stream+8)), *((int*)(stream+12))); 739 #endif 740 stream += 20; //5 *4 741 #endif 742 } 743 744 /* Load the Dalvik PC into r0 and jump to the specified target */ 745 static void handlePCReconstruction(CompilationUnit *cUnit, 746 LowOpBlockLabel *targetLabel) 747 { 748 #if 0 749 LowOp **pcrLabel = 750 (LowOp **) cUnit->pcReconstructionList.elemList; 751 int numElems = cUnit->pcReconstructionList.numUsed; 752 int i; 753 for (i = 0; i < numElems; i++) { 754 dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]); 755 /* r0 = dalvik PC */ 756 loadConstant(cUnit, r0, pcrLabel[i]->operands[0]); 757 genUnconditionalBranch(cUnit, targetLabel); 758 } 759 #endif 760 } 761 762 //use O0 code generator for hoisted checks outside of the loop 763 /* 764 * vA = arrayReg; 765 * vB = idxReg; 766 * vC = endConditionReg; 767 * arg[0] = maxC 768 * arg[1] = minC 769 * arg[2] = loopBranchConditionCode 770 */ 771 #define P_GPR_1 PhysicalReg_EBX 772 #define P_GPR_2 PhysicalReg_ECX 773 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir) 774 { 775 /* 776 * NOTE: these synthesized blocks don't have ssa names assigned 777 * for Dalvik registers. However, because they dominate the following 778 * blocks we can simply use the Dalvik name w/ subscript 0 as the 779 * ssa name. 780 */ 781 DecodedInstruction *dInsn = &mir->dalvikInsn; 782 const int maxC = dInsn->arg[0]; 783 784 /* assign array in virtual register to P_GPR_1 */ 785 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); 786 /* assign index in virtual register to P_GPR_2 */ 787 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true); 788 export_pc(); 789 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); 790 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); 791 int delta = maxC; 792 /* 793 * If the loop end condition is ">=" instead of ">", then the largest value 794 * of the index is "endCondition - 1". 795 */ 796 if (dInsn->arg[2] == OP_IF_GE) { 797 delta--; 798 } 799 800 if (delta < 0) { //+delta 801 //if P_GPR_2 is mapped to a VR, we can't do this 802 alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true); 803 } else if(delta > 0) { 804 alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true); 805 } 806 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); 807 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); 808 } 809 810 /* 811 * vA = arrayReg; 812 * vB = idxReg; 813 * vC = endConditionReg; 814 * arg[0] = maxC 815 * arg[1] = minC 816 * arg[2] = loopBranchConditionCode 817 */ 818 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir) 819 { 820 DecodedInstruction *dInsn = &mir->dalvikInsn; 821 const int maxC = dInsn->arg[0]; 822 823 /* assign array in virtual register to P_GPR_1 */ 824 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); 825 /* assign index in virtual register to P_GPR_2 */ 826 get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true); 827 export_pc(); 828 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true); 829 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId); 830 831 if (maxC < 0) { 832 //if P_GPR_2 is mapped to a VR, we can't do this 833 alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true); 834 } else if(maxC > 0) { 835 alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true); 836 } 837 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true); 838 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId); 839 840 } 841 #undef P_GPR_1 842 #undef P_GPR_2 843 844 /* 845 * vA = idxReg; 846 * vB = minC; 847 */ 848 #define P_GPR_1 PhysicalReg_ECX 849 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir) 850 { 851 DecodedInstruction *dInsn = &mir->dalvikInsn; 852 const int minC = dInsn->vB; 853 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array 854 export_pc(); 855 compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true); 856 condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId); 857 } 858 #undef P_GPR_1 859 860 #ifdef WITH_JIT_INLINING 861 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir) 862 { 863 CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo; 864 if(gDvm.executionMode == kExecutionModeNcgO0) { 865 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true); 866 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true); 867 compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true); 868 export_pc(); //use %edx 869 conditional_jump_global_API(, Condition_E, "common_errNullObject", false); 870 move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true); 871 compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true); 872 } else { 873 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false); 874 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false); 875 nullCheck(5, false, 1, mir->dalvikInsn.vC); 876 move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false); 877 compare_reg_reg(4, false, 6, false); 878 } 879 880 //immdiate will be updated later in genLandingPadForMispredictedCallee 881 streamMisPred = stream; 882 callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8); 883 } 884 #endif 885 886 /* Extended MIR instructions like PHI */ 887 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir) 888 { 889 ExecutionMode origMode = gDvm.executionMode; 890 gDvm.executionMode = kExecutionModeNcgO0; 891 switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) { 892 case kMirOpPhi: { 893 break; 894 } 895 case kMirOpNullNRangeUpCheck: { 896 genHoistedChecksForCountUpLoop(cUnit, mir); 897 break; 898 } 899 case kMirOpNullNRangeDownCheck: { 900 genHoistedChecksForCountDownLoop(cUnit, mir); 901 break; 902 } 903 case kMirOpLowerBound: { 904 genHoistedLowerBoundCheck(cUnit, mir); 905 break; 906 } 907 case kMirOpPunt: { 908 break; 909 } 910 #ifdef WITH_JIT_INLINING 911 case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c 912 genValidationForPredictedInline(cUnit, mir); 913 break; 914 } 915 #endif 916 default: 917 break; 918 } 919 gDvm.executionMode = origMode; 920 } 921 922 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry, 923 int bodyId) 924 { 925 /* 926 * Next, create two branches - one branch over to the loop body and the 927 * other branch to the PCR cell to punt. 928 */ 929 //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId); 930 //setupResourceMasks(branchToBody); 931 //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody); 932 933 #if 0 934 LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true); 935 branchToPCR->opCode = kThumbBUncond; 936 branchToPCR->generic.target = (LIR *) pcrLabel; 937 setupResourceMasks(branchToPCR); 938 cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR; 939 #endif 940 } 941 942 /* check whether we can merge the block at index i with its target block */ 943 bool mergeBlock(BasicBlock *bb) { 944 if(bb->blockType == kDalvikByteCode && 945 bb->firstMIRInsn != NULL && 946 (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 || 947 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO || 948 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) && 949 bb->fallThrough == NULL) {// && 950 //cUnit->hasLoop) { 951 //ALOGI("merge blocks ending with goto at index %d", i); 952 MIR* prevInsn = bb->lastMIRInsn->prev; 953 if(bb->taken == NULL) return false; 954 MIR* mergeInsn = bb->taken->firstMIRInsn; 955 if(mergeInsn == NULL) return false; 956 if(prevInsn == NULL) {//the block has a single instruction 957 bb->firstMIRInsn = mergeInsn; 958 } else { 959 prevInsn->next = mergeInsn; //remove goto from the chain 960 } 961 mergeInsn->prev = prevInsn; 962 bb->lastMIRInsn = bb->taken->lastMIRInsn; 963 bb->taken->firstMIRInsn = NULL; //block being merged in 964 bb->fallThrough = bb->taken->fallThrough; 965 bb->taken = bb->taken->taken; 966 return true; 967 } 968 return false; 969 } 970 971 static int genTraceProfileEntry(CompilationUnit *cUnit) 972 { 973 cUnit->headerSize = 6; 974 if ((gDvmJit.profileMode == kTraceProfilingContinuous) || 975 (gDvmJit.profileMode == kTraceProfilingDisabled)) { 976 return 12; 977 } else { 978 return 4; 979 } 980 981 } 982 983 #define PRINT_BUFFER_LEN 1024 984 /* Print the code block in code cache in the range of [startAddr, endAddr) 985 * in readable format. 986 */ 987 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr) 988 { 989 char strbuf[PRINT_BUFFER_LEN]; 990 unsigned char *addr; 991 unsigned char *next_addr; 992 int n; 993 994 if (gDvmJit.printBinary) { 995 // print binary in bytes 996 n = 0; 997 for (addr = startAddr; addr < endAddr; addr++) { 998 n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr); 999 if (n > PRINT_BUFFER_LEN - 10) { 1000 ALOGD("## %s", strbuf); 1001 n = 0; 1002 } 1003 } 1004 if (n > 0) 1005 ALOGD("## %s", strbuf); 1006 } 1007 1008 // print disassembled instructions 1009 addr = startAddr; 1010 while (addr < endAddr) { 1011 next_addr = reinterpret_cast<unsigned char*> 1012 (decoder_disassemble_instr(reinterpret_cast<char*>(addr), 1013 strbuf, PRINT_BUFFER_LEN)); 1014 if (addr != next_addr) { 1015 ALOGD("** %p: %s", addr, strbuf); 1016 } else { // check whether this is nop padding 1017 if (addr[0] == 0x90) { 1018 ALOGD("** %p: NOP (1 byte)", addr); 1019 next_addr += 1; 1020 } else if (addr[0] == 0x66 && addr[1] == 0x90) { 1021 ALOGD("** %p: NOP (2 bytes)", addr); 1022 next_addr += 2; 1023 } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) { 1024 ALOGD("** %p: NOP (3 bytes)", addr); 1025 next_addr += 3; 1026 } else { 1027 ALOGD("** unable to decode binary at %p", addr); 1028 break; 1029 } 1030 } 1031 addr = next_addr; 1032 } 1033 } 1034 1035 /* 4 is the number of additional bytes needed for chaining information for trace: 1036 * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */ 1037 #define EXTRA_BYTES_FOR_CHAINING 4 1038 1039 /* Entry function to invoke the backend of the JIT compiler */ 1040 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info) 1041 { 1042 dump_x86_inst = cUnit->printMe; 1043 /* Used to hold the labels of each block */ 1044 LowOpBlockLabel *labelList = 1045 (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c 1046 LowOp *headLIR = NULL; 1047 GrowableList chainingListByType[kChainingCellLast]; 1048 unsigned int i, padding; 1049 1050 /* 1051 * Initialize various types chaining lists. 1052 */ 1053 for (i = 0; i < kChainingCellLast; i++) { 1054 dvmInitGrowableList(&chainingListByType[i], 2); 1055 } 1056 1057 /* Clear the visited flag for each block */ 1058 dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag, 1059 kAllNodes, false /* isIterative */); 1060 1061 GrowableListIterator iterator; 1062 dvmGrowableListIteratorInit(&cUnit->blockList, &iterator); 1063 1064 /* Traces start with a profiling entry point. Generate it here */ 1065 cUnit->profileCodeSize = genTraceProfileEntry(cUnit); 1066 1067 //BasicBlock **blockList = cUnit->blockList; 1068 GrowableList *blockList = &cUnit->blockList; 1069 BasicBlock *bb; 1070 1071 info->codeAddress = NULL; 1072 stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed; 1073 1074 // TODO: compile into a temporary buffer and then copy into the code cache. 1075 // That would let us leave the code cache unprotected for a shorter time. 1076 size_t unprotected_code_cache_bytes = 1077 gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING; 1078 UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1079 1080 streamStart = stream; /* trace start before alignment */ 1081 stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */ 1082 stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */ 1083 streamMethodStart = stream; /* code start */ 1084 for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) { 1085 labelList[i].lop.generic.offset = -1; 1086 } 1087 cUnit->exceptionBlockId = -1; 1088 for (i = 0; i < blockList->numUsed; i++) { 1089 bb = (BasicBlock *) blockList->elemList[i]; 1090 if(bb->blockType == kExceptionHandling) 1091 cUnit->exceptionBlockId = i; 1092 } 1093 startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit); 1094 if(gDvm.executionMode == kExecutionModeNcgO1) { 1095 //merge blocks ending with "goto" with the fall through block 1096 if (cUnit->jitMode != kJitLoop) 1097 for (i = 0; i < blockList->numUsed; i++) { 1098 bb = (BasicBlock *) blockList->elemList[i]; 1099 bool merged = mergeBlock(bb); 1100 while(merged) merged = mergeBlock(bb); 1101 } 1102 for (i = 0; i < blockList->numUsed; i++) { 1103 bb = (BasicBlock *) blockList->elemList[i]; 1104 if(bb->blockType == kDalvikByteCode && 1105 bb->firstMIRInsn != NULL) { 1106 preprocessingBB(bb); 1107 } 1108 } 1109 preprocessingTrace(); 1110 } 1111 1112 /* Handle the content in each basic block */ 1113 for (i = 0; ; i++) { 1114 MIR *mir; 1115 bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator); 1116 if (bb == NULL) break; 1117 if (bb->visited == true) continue; 1118 1119 labelList[i].immOpnd.value = bb->startOffset; 1120 1121 if (bb->blockType >= kChainingCellLast) { 1122 /* 1123 * Append the label pseudo LIR first. Chaining cells will be handled 1124 * separately afterwards. 1125 */ 1126 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]); 1127 } 1128 1129 if (bb->blockType == kEntryBlock) { 1130 labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK; 1131 if (bb->firstMIRInsn == NULL) { 1132 continue; 1133 } else { 1134 setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id); 1135 //&labelList[blockList[i]->fallThrough->id]); 1136 } 1137 } else if (bb->blockType == kExitBlock) { 1138 labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK; 1139 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1140 goto gen_fallthrough; 1141 } else if (bb->blockType == kDalvikByteCode) { 1142 if (bb->hidden == true) continue; 1143 labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL; 1144 /* Reset the register state */ 1145 #if 0 1146 resetRegisterScoreboard(cUnit); 1147 #endif 1148 } else { 1149 switch (bb->blockType) { 1150 case kChainingCellNormal: 1151 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL; 1152 /* handle the codegen later */ 1153 dvmInsertGrowableList( 1154 &chainingListByType[kChainingCellNormal], i); 1155 break; 1156 case kChainingCellInvokeSingleton: 1157 labelList[i].lop.opCode2 = 1158 ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON; 1159 labelList[i].immOpnd.value = 1160 (int) bb->containingMethod; 1161 /* handle the codegen later */ 1162 dvmInsertGrowableList( 1163 &chainingListByType[kChainingCellInvokeSingleton], i); 1164 break; 1165 case kChainingCellInvokePredicted: 1166 labelList[i].lop.opCode2 = 1167 ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED; 1168 /* 1169 * Move the cached method pointer from operand 1 to 0. 1170 * Operand 0 was clobbered earlier in this routine to store 1171 * the block starting offset, which is not applicable to 1172 * predicted chaining cell. 1173 */ 1174 //TODO 1175 //labelList[i].operands[0] = labelList[i].operands[1]; 1176 1177 /* handle the codegen later */ 1178 dvmInsertGrowableList( 1179 &chainingListByType[kChainingCellInvokePredicted], i); 1180 break; 1181 case kChainingCellHot: 1182 labelList[i].lop.opCode2 = 1183 ATOM_PSEUDO_CHAINING_CELL_HOT; 1184 /* handle the codegen later */ 1185 dvmInsertGrowableList( 1186 &chainingListByType[kChainingCellHot], i); 1187 break; 1188 case kPCReconstruction: 1189 /* Make sure exception handling block is next */ 1190 labelList[i].lop.opCode2 = 1191 ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL; 1192 //assert (i == cUnit->numBlocks - 2); 1193 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1194 handlePCReconstruction(cUnit, 1195 &labelList[cUnit->puntBlock->id]); 1196 break; 1197 case kExceptionHandling: 1198 labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL; 1199 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1200 //if (cUnit->pcReconstructionList.numUsed) { 1201 scratchRegs[0] = PhysicalReg_EAX; 1202 jumpToInterpPunt(); 1203 //call_dvmJitToInterpPunt(); 1204 //} 1205 break; 1206 case kChainingCellBackwardBranch: 1207 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH; 1208 /* handle the codegen later */ 1209 dvmInsertGrowableList( 1210 &chainingListByType[kChainingCellBackwardBranch], 1211 i); 1212 break; 1213 default: 1214 break; 1215 } 1216 continue; 1217 } 1218 { 1219 //LowOp *headLIR = NULL; 1220 const DexCode *dexCode = dvmGetMethodCode(cUnit->method); 1221 const u2 *startCodePtr = dexCode->insns; 1222 const u2 *codePtr; 1223 labelList[i].lop.generic.offset = (stream - streamMethodStart); 1224 ALOGV("get ready to handle JIT bb %d type %d hidden %d", 1225 bb->id, bb->blockType, bb->hidden); 1226 for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) { 1227 bb = nextBB; 1228 bb->visited = true; 1229 cUnit->nextCodegenBlock = NULL; 1230 1231 if(gDvm.executionMode == kExecutionModeNcgO1 && 1232 bb->blockType != kEntryBlock && 1233 bb->firstMIRInsn != NULL) { 1234 startOfBasicBlock(bb); 1235 int cg_ret = codeGenBasicBlockJit(cUnit->method, bb); 1236 endOfBasicBlock(bb); 1237 if(cg_ret < 0) { 1238 endOfTrace(true/*freeOnly*/); 1239 cUnit->baseAddr = NULL; 1240 ALOGI("codeGenBasicBlockJit returns negative number"); 1241 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1242 return; 1243 } 1244 } else { 1245 for (mir = bb->firstMIRInsn; mir; mir = mir->next) { 1246 startOfBasicBlock(bb); //why here for O0 1247 Opcode dalvikOpCode = mir->dalvikInsn.opcode; 1248 if((int)dalvikOpCode >= (int)kMirOpFirst) { 1249 handleExtendedMIR(cUnit, mir); 1250 continue; 1251 } 1252 InstructionFormat dalvikFormat = 1253 dexGetFormatFromOpcode(dalvikOpCode); 1254 ALOGV("ready to handle bytecode at offset %x: opcode %d format %d", 1255 mir->offset, dalvikOpCode, dalvikFormat); 1256 LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset); 1257 /* Remember the first LIR for this block */ 1258 if (headLIR == NULL) { 1259 headLIR = (LowOp*)boundaryLIR; 1260 } 1261 bool notHandled = true; 1262 /* 1263 * Debugging: screen the opcode first to see if it is in the 1264 * do[-not]-compile list 1265 */ 1266 bool singleStepMe = 1267 gDvmJit.includeSelectedOp != 1268 ((gDvmJit.opList[dalvikOpCode >> 3] & 1269 (1 << (dalvikOpCode & 0x7))) != 1270 0); 1271 if (singleStepMe || cUnit->allSingleStep) { 1272 } else { 1273 codePtr = startCodePtr + mir->offset; 1274 //lower each byte code, update LIR 1275 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir); 1276 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) + 1277 CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { 1278 ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart)); 1279 gDvmJit.codeCacheFull = true; 1280 cUnit->baseAddr = NULL; 1281 endOfTrace(true/*freeOnly*/); 1282 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1283 return; 1284 } 1285 } 1286 if (notHandled) { 1287 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled", 1288 mir->offset, 1289 dalvikOpCode, dexGetOpcodeName(dalvikOpCode), 1290 dalvikFormat); 1291 dvmAbort(); 1292 break; 1293 } 1294 } // end for 1295 } // end else //JIT + O0 code generator 1296 } 1297 } // end for 1298 /* Eliminate redundant loads/stores and delay stores into later slots */ 1299 #if 0 1300 dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR, 1301 cUnit->lastLIRInsn); 1302 #endif 1303 if (headLIR) headLIR = NULL; 1304 gen_fallthrough: 1305 /* 1306 * Check if the block is terminated due to trace length constraint - 1307 * insert an unconditional branch to the chaining cell. 1308 */ 1309 if (bb->needFallThroughBranch) { 1310 jumpToBasicBlock(stream, bb->fallThrough->id); 1311 } 1312 1313 } 1314 1315 char* streamChainingStart = (char*)stream; 1316 /* Handle the chaining cells in predefined order */ 1317 for (i = 0; i < kChainingCellGap; i++) { 1318 size_t j; 1319 int *blockIdList = (int *) chainingListByType[i].elemList; 1320 1321 cUnit->numChainingCells[i] = chainingListByType[i].numUsed; 1322 1323 /* No chaining cells of this type */ 1324 if (cUnit->numChainingCells[i] == 0) 1325 continue; 1326 1327 /* Record the first LIR for a new type of chaining cell */ 1328 cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]]; 1329 for (j = 0; j < chainingListByType[i].numUsed; j++) { 1330 int blockId = blockIdList[j]; 1331 BasicBlock *chainingBlock = 1332 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList, 1333 blockId); 1334 1335 labelList[blockId].lop.generic.offset = (stream - streamMethodStart); 1336 1337 /* Align this chaining cell first */ 1338 #if 0 1339 newLIR0(cUnit, ATOM_PSEUDO_ALIGN4); 1340 #endif 1341 /* Insert the pseudo chaining instruction */ 1342 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]); 1343 1344 1345 switch (chainingBlock->blockType) { 1346 case kChainingCellNormal: 1347 handleNormalChainingCell(cUnit, 1348 chainingBlock->startOffset, blockId, labelList); 1349 break; 1350 case kChainingCellInvokeSingleton: 1351 handleInvokeSingletonChainingCell(cUnit, 1352 chainingBlock->containingMethod, blockId, labelList); 1353 break; 1354 case kChainingCellInvokePredicted: 1355 handleInvokePredictedChainingCell(cUnit, blockId); 1356 break; 1357 case kChainingCellHot: 1358 handleHotChainingCell(cUnit, 1359 chainingBlock->startOffset, blockId, labelList); 1360 break; 1361 case kChainingCellBackwardBranch: 1362 handleBackwardBranchChainingCell(cUnit, 1363 chainingBlock->startOffset, blockId, labelList); 1364 break; 1365 default: 1366 ALOGE("Bad blocktype %d", chainingBlock->blockType); 1367 dvmAbort(); 1368 break; 1369 } 1370 1371 if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { 1372 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart)); 1373 gDvmJit.codeCacheFull = true; 1374 cUnit->baseAddr = NULL; 1375 endOfTrace(true); /* need to free structures */ 1376 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1377 return; 1378 } 1379 } 1380 } 1381 #if 0 1382 dvmCompilerApplyGlobalOptimizations(cUnit); 1383 #endif 1384 endOfTrace(false); 1385 1386 if (gDvmJit.codeCacheFull) { 1387 /* We hit code cache size limit inside endofTrace(false). 1388 * Bail out for this trace! 1389 */ 1390 ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart)); 1391 cUnit->baseAddr = NULL; 1392 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1393 return; 1394 } 1395 1396 /* dump section for chaining cell counts, make sure it is 4-byte aligned */ 1397 padding = (4 - ((u4)stream & 3)) & 3; 1398 stream += padding; 1399 ChainCellCounts chainCellCounts; 1400 /* Install the chaining cell counts */ 1401 for (i=0; i< kChainingCellGap; i++) { 1402 chainCellCounts.u.count[i] = cUnit->numChainingCells[i]; 1403 } 1404 char* streamCountStart = (char*)stream; 1405 memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts)); 1406 stream += sizeof(chainCellCounts); 1407 1408 cUnit->baseAddr = streamMethodStart; 1409 cUnit->totalSize = (stream - streamStart); 1410 if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) { 1411 ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart)); 1412 gDvmJit.codeCacheFull = true; 1413 cUnit->baseAddr = NULL; 1414 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1415 return; 1416 } 1417 1418 /* write chaining cell count offset & chaining cell offset */ 1419 u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */ 1420 *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */ 1421 pOffset[1] = streamChainingStart - streamMethodStart; 1422 1423 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes); 1424 1425 gDvmJit.codeCacheByteUsed += (stream - streamStart); 1426 if (cUnit->printMe) { 1427 unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr; 1428 unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed; 1429 ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p", 1430 cUnit->method->clazz->descriptor, cUnit->method->name, 1431 codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache); 1432 ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor, 1433 cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset); 1434 printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext); 1435 } 1436 ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr, 1437 (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed, 1438 cUnit->totalSize, gDvmJit.codeCache); 1439 1440 gDvmJit.numCompilations++; 1441 1442 info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize; 1443 } 1444 1445 /* 1446 * Perform translation chain operation. 1447 */ 1448 void* dvmJitChain(void* tgtAddr, u4* branchAddr) 1449 { 1450 #ifdef JIT_CHAIN 1451 int relOffset = (int) tgtAddr - (int)branchAddr; 1452 1453 if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) && 1454 (gDvmJit.codeCacheFull == false)) { 1455 1456 gDvmJit.translationChains++; 1457 1458 //OpndSize immSize = estOpndSizeFromImm(relOffset); 1459 //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond); 1460 /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in 1461 * the original code sequence. 1462 */ 1463 OpndSize immSize = OpndSize_32; 1464 relOffset -= 5; 1465 //can't use stream here since it is used by the compilation thread 1466 UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); 1467 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr 1468 PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr)); 1469 1470 gDvmJit.hasNewChain = true; 1471 1472 COMPILER_TRACE_CHAINING( 1473 ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x", 1474 (int) branchAddr, tgtAddr, relOffset)); 1475 } 1476 #endif 1477 return tgtAddr; 1478 } 1479 1480 /* 1481 * Accept the work and start compiling. Returns true if compilation 1482 * is attempted. 1483 */ 1484 bool dvmCompilerDoWork(CompilerWorkOrder *work) 1485 { 1486 JitTraceDescription *desc; 1487 bool isCompile; 1488 bool success = true; 1489 1490 if (gDvmJit.codeCacheFull) { 1491 return false; 1492 } 1493 1494 switch (work->kind) { 1495 case kWorkOrderTrace: 1496 isCompile = true; 1497 /* Start compilation with maximally allowed trace length */ 1498 desc = (JitTraceDescription *)work->info; 1499 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, 1500 work->bailPtr, 0 /* no hints */); 1501 break; 1502 case kWorkOrderTraceDebug: { 1503 bool oldPrintMe = gDvmJit.printMe; 1504 gDvmJit.printMe = true; 1505 isCompile = true; 1506 /* Start compilation with maximally allowed trace length */ 1507 desc = (JitTraceDescription *)work->info; 1508 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result, 1509 work->bailPtr, 0 /* no hints */); 1510 gDvmJit.printMe = oldPrintMe; 1511 break; 1512 } 1513 case kWorkOrderProfileMode: 1514 dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info); 1515 isCompile = false; 1516 break; 1517 default: 1518 isCompile = false; 1519 ALOGE("Jit: unknown work order type"); 1520 assert(0); // Bail if debug build, discard otherwise 1521 } 1522 if (!success) 1523 work->result.codeAddress = NULL; 1524 return isCompile; 1525 } 1526 1527 void dvmCompilerCacheFlush(long start, long end, long flags) { 1528 /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */ 1529 } 1530 1531 //#endif 1532