Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 #include <sys/mman.h>
     17 #include "Dalvik.h"
     18 #include "libdex/DexOpcodes.h"
     19 #include "compiler/Compiler.h"
     20 #include "compiler/CompilerIR.h"
     21 #include "interp/Jit.h"
     22 #include "libdex/DexFile.h"
     23 #include "Lower.h"
     24 #include "NcgAot.h"
     25 #include "compiler/codegen/CompilerCodegen.h"
     26 
     27 /* Init values when a predicted chain is initially assembled */
     28 /* E7FE is branch to self */
     29 #define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
     30 
     31 /* Target-specific save/restore */
     32 extern "C" void dvmJitCalleeSave(double *saveArea);
     33 extern "C" void dvmJitCalleeRestore(double *saveArea);
     34 
     35 /*
     36  * Determine the initial instruction set to be used for this trace.
     37  * Later components may decide to change this.
     38  */
     39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
     40 JitInstructionSetType dvmCompilerInstructionSet(void)
     41 {
     42     return DALVIK_JIT_IA32;
     43 }
     44 
     45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
     46 {
     47     return DALVIK_JIT_IA32;
     48 }
     49 
     50 /* we don't use template for IA32 */
     51 void *dvmCompilerGetInterpretTemplate()
     52 {
     53       return NULL;
     54 }
     55 
     56 /* Track the number of times that the code cache is patched */
     57 #if defined(WITH_JIT_TUNING)
     58 #define UPDATE_CODE_CACHE_PATCHES()    (gDvmJit.codeCachePatches++)
     59 #else
     60 #define UPDATE_CODE_CACHE_PATCHES()
     61 #endif
     62 
     63 bool dvmCompilerArchInit() {
     64     /* Target-specific configuration */
     65     gDvmJit.jitTableSize = 1 << 12;
     66     gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
     67     gDvmJit.threshold = 255;
     68     gDvmJit.codeCacheSize = 512*1024;
     69     gDvmJit.optLevel = kJitOptLevelO1;
     70 
     71 #if defined(WITH_SELF_VERIFICATION)
     72     /* Force into blocking mode */
     73     gDvmJit.blockingMode = true;
     74     gDvm.nativeDebuggerActive = true;
     75 #endif
     76 
     77     // Make sure all threads have current values
     78     dvmJitUpdateThreadStateAll();
     79 
     80     return true;
     81 }
     82 
     83 void dvmCompilerPatchInlineCache(void)
     84 {
     85     int i;
     86     PredictedChainingCell *minAddr, *maxAddr;
     87 
     88     /* Nothing to be done */
     89     if (gDvmJit.compilerICPatchIndex == 0) return;
     90 
     91     /*
     92      * Since all threads are already stopped we don't really need to acquire
     93      * the lock. But race condition can be easily introduced in the future w/o
     94      * paying attention so we still acquire the lock here.
     95      */
     96     dvmLockMutex(&gDvmJit.compilerICPatchLock);
     97 
     98     UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
     99 
    100     //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
    101 
    102     /* Initialize the min/max address range */
    103     minAddr = (PredictedChainingCell *)
    104         ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
    105     maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
    106 
    107     for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
    108         ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
    109         PredictedChainingCell *cellAddr = workOrder->cellAddr;
    110         PredictedChainingCell *cellContent = &workOrder->cellContent;
    111         ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
    112                                                 workOrder->classLoader);
    113 
    114         assert(clazz->serialNumber == workOrder->serialNumber);
    115 
    116         /* Use the newly resolved clazz pointer */
    117         cellContent->clazz = clazz;
    118 
    119         if (cellAddr->clazz == NULL) {
    120             COMPILER_TRACE_CHAINING(
    121                 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
    122                       cellAddr,
    123                       cellContent->clazz->descriptor,
    124                       cellContent->method->name));
    125         } else {
    126             COMPILER_TRACE_CHAINING(
    127                 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
    128                       "patched",
    129                       cellAddr,
    130                       cellAddr->clazz->descriptor,
    131                       cellContent->clazz->descriptor,
    132                       cellContent->method->name));
    133         }
    134 
    135         /* Patch the chaining cell */
    136         *cellAddr = *cellContent;
    137         minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
    138         maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
    139     }
    140 
    141     PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    142 
    143     gDvmJit.compilerICPatchIndex = 0;
    144     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
    145 }
    146 
    147 /* Target-specific cache clearing */
    148 void dvmCompilerCacheClear(char *start, size_t size)
    149 {
    150     /* "0xFF 0xFF" is an invalid opcode for x86. */
    151     memset(start, 0xFF, size);
    152 }
    153 
    154 /* for JIT debugging, to be implemented */
    155 void dvmJitCalleeSave(double *saveArea) {
    156 }
    157 
    158 void dvmJitCalleeRestore(double *saveArea) {
    159 }
    160 
    161 void dvmJitToInterpSingleStep() {
    162 }
    163 
    164 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
    165                                             const JitEntry *knownEntry) {
    166     return NULL;
    167 }
    168 
    169 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
    170 {
    171 }
    172 
    173 void dvmCompilerArchDump(void)
    174 {
    175 }
    176 
    177 char *getTraceBase(const JitEntry *p)
    178 {
    179     return NULL;
    180 }
    181 
    182 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
    183 {
    184 }
    185 
    186 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
    187 {
    188 }
    189 
    190 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
    191 {
    192     // Method-based JIT not supported for x86.
    193 }
    194 
    195 void dvmJitScanAllClassPointers(void (*callback)(void *))
    196 {
    197 }
    198 
    199 /* Handy function to retrieve the profile count */
    200 static inline int getProfileCount(const JitEntry *entry)
    201 {
    202     if (entry->dPC == 0 || entry->codeAddress == 0)
    203         return 0;
    204     u4 *pExecutionCount = (u4 *) getTraceBase(entry);
    205 
    206     return pExecutionCount ? *pExecutionCount : 0;
    207 }
    208 
    209 /* qsort callback function */
    210 static int sortTraceProfileCount(const void *entry1, const void *entry2)
    211 {
    212     const JitEntry *jitEntry1 = (const JitEntry *)entry1;
    213     const JitEntry *jitEntry2 = (const JitEntry *)entry2;
    214 
    215     JitTraceCounter_t count1 = getProfileCount(jitEntry1);
    216     JitTraceCounter_t count2 = getProfileCount(jitEntry2);
    217     return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
    218 }
    219 
    220 /* Sort the trace profile counts and dump them */
    221 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
    222 {
    223     JitEntry *sortedEntries;
    224     int numTraces = 0;
    225     unsigned long counts = 0;
    226     unsigned int i;
    227 
    228     /* Make sure that the table is not changing */
    229     dvmLockMutex(&gDvmJit.tableLock);
    230 
    231     /* Sort the entries by descending order */
    232     sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
    233     if (sortedEntries == NULL)
    234         goto done;
    235     memcpy(sortedEntries, gDvmJit.pJitEntryTable,
    236            sizeof(JitEntry) * gDvmJit.jitTableSize);
    237     qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
    238           sortTraceProfileCount);
    239 
    240     /* Dump the sorted entries */
    241     for (i=0; i < gDvmJit.jitTableSize; i++) {
    242         if (sortedEntries[i].dPC != 0) {
    243             numTraces++;
    244         }
    245     }
    246     if (numTraces == 0)
    247         numTraces = 1;
    248     ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
    249 
    250     free(sortedEntries);
    251 done:
    252     dvmUnlockMutex(&gDvmJit.tableLock);
    253     return;
    254 }
    255 
    256 void jumpWithRelOffset(char* instAddr, int relOffset) {
    257     stream = instAddr;
    258     OpndSize immSize = estOpndSizeFromImm(relOffset);
    259     relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
    260     dump_imm(Mnemonic_JMP, immSize, relOffset);
    261 }
    262 
    263 // works whether instructions for target basic block are generated or not
    264 LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
    265     stream = instAddr;
    266     bool unknown;
    267     OpndSize size;
    268     int relativeNCG = targetId;
    269     relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
    270     unconditional_jump_int(relativeNCG, size);
    271     return NULL;
    272 }
    273 
    274 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
    275     stream = instAddr;
    276     bool unknown;
    277     OpndSize size;
    278     int relativeNCG = targetId;
    279     relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
    280     conditional_jump_int(cc, relativeNCG, size);
    281     return NULL;
    282 }
    283 
    284 /*
    285  * Attempt to enqueue a work order to patch an inline cache for a predicted
    286  * chaining cell for virtual/interface calls.
    287  */
    288 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
    289                                     PredictedChainingCell *newContent)
    290 {
    291     bool result = true;
    292 
    293     /*
    294      * Make sure only one thread gets here since updating the cell (ie fast
    295      * path and queueing the request (ie the queued path) have to be done
    296      * in an atomic fashion.
    297      */
    298     dvmLockMutex(&gDvmJit.compilerICPatchLock);
    299 
    300     /* Fast path for uninitialized chaining cell */
    301     if (cellAddr->clazz == NULL &&
    302         cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
    303         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    304 
    305         cellAddr->method = newContent->method;
    306         cellAddr->branch = newContent->branch;
    307         cellAddr->branch2 = newContent->branch2;
    308 
    309         /*
    310          * The update order matters - make sure clazz is updated last since it
    311          * will bring the uninitialized chaining cell to life.
    312          */
    313         android_atomic_release_store((int32_t)newContent->clazz,
    314             (volatile int32_t *)(void*) &cellAddr->clazz);
    315         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
    316         UPDATE_CODE_CACHE_PATCHES();
    317 
    318         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    319 
    320 #if 0
    321         MEM_BARRIER();
    322         cellAddr->clazz = newContent->clazz;
    323         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
    324 #endif
    325 #if defined(IA_JIT_TUNING)
    326         gDvmJit.icPatchInit++;
    327 #endif
    328         COMPILER_TRACE_CHAINING(
    329             ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
    330                   cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
    331     /* Check if this is a frequently missed clazz */
    332     } else if (cellAddr->stagedClazz != newContent->clazz) {
    333         /* Not proven to be frequent yet - build up the filter cache */
    334         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    335 
    336         cellAddr->stagedClazz = newContent->clazz;
    337 
    338         UPDATE_CODE_CACHE_PATCHES();
    339         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    340 
    341 #if defined(WITH_JIT_TUNING)
    342         gDvmJit.icPatchRejected++;
    343 #endif
    344     /*
    345      * Different classes but same method implementation - it is safe to just
    346      * patch the class value without the need to stop the world.
    347      */
    348     } else if (cellAddr->method == newContent->method) {
    349         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    350 
    351         cellAddr->clazz = newContent->clazz;
    352         /* No need to flush the cache here since the branch is not patched */
    353         UPDATE_CODE_CACHE_PATCHES();
    354 
    355         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    356 
    357 #if defined(WITH_JIT_TUNING)
    358         gDvmJit.icPatchLockFree++;
    359 #endif
    360     /*
    361      * Cannot patch the chaining cell inline - queue it until the next safe
    362      * point.
    363      */
    364     } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE)  {
    365         int index = gDvmJit.compilerICPatchIndex++;
    366         const ClassObject *clazz = newContent->clazz;
    367 
    368         gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
    369         gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
    370         gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
    371         gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
    372         /* For verification purpose only */
    373         gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
    374 
    375 #if defined(WITH_JIT_TUNING)
    376         gDvmJit.icPatchQueued++;
    377 #endif
    378         COMPILER_TRACE_CHAINING(
    379             ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
    380                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
    381     } else {
    382     /* Queue is full - just drop this patch request */
    383 #if defined(WITH_JIT_TUNING)
    384         gDvmJit.icPatchDropped++;
    385 #endif
    386 
    387         COMPILER_TRACE_CHAINING(
    388             ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
    389                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
    390     }
    391 
    392     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
    393     return result;
    394 }
    395 
    396 /*
    397  * This method is called from the invoke templates for virtual and interface
    398  * methods to speculatively setup a chain to the callee. The templates are
    399  * written in assembly and have setup method, cell, and clazz at r0, r2, and
    400  * r3 respectively, so there is a unused argument in the list. Upon return one
    401  * of the following three results may happen:
    402  *   1) Chain is not setup because the callee is native. Reset the rechain
    403  *      count to a big number so that it will take a long time before the next
    404  *      rechain attempt to happen.
    405  *   2) Chain is not setup because the callee has not been created yet. Reset
    406  *      the rechain count to a small number and retry in the near future.
    407  *   3) Ask all other threads to stop before patching this chaining cell.
    408  *      This is required because another thread may have passed the class check
    409  *      but hasn't reached the chaining cell yet to follow the chain. If we
    410  *      patch the content before halting the other thread, there could be a
    411  *      small window for race conditions to happen that it may follow the new
    412  *      but wrong chain to invoke a different method.
    413  */
    414 const Method *dvmJitToPatchPredictedChain(const Method *method,
    415                                           Thread *self,
    416                                           PredictedChainingCell *cell,
    417                                           const ClassObject *clazz)
    418 {
    419     int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
    420     /* Don't come back here for a long time if the method is native */
    421     if (dvmIsNativeMethod(method)) {
    422         UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
    423 
    424         /*
    425          * Put a non-zero/bogus value in the clazz field so that it won't
    426          * trigger immediate patching and will continue to fail to match with
    427          * a real clazz pointer.
    428          */
    429         cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
    430 
    431         UPDATE_CODE_CACHE_PATCHES();
    432         PROTECT_CODE_CACHE(cell, sizeof(*cell));
    433         COMPILER_TRACE_CHAINING(
    434             ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
    435                   cell, method->name));
    436         goto done;
    437     }
    438     {
    439     int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
    440 
    441     /*
    442      * Compilation not made yet for the callee. Reset the counter to a small
    443      * value and come back to check soon.
    444      */
    445     if ((tgtAddr == 0) ||
    446         ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
    447         COMPILER_TRACE_CHAINING(
    448             ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
    449                   cell, method->clazz->descriptor, method->name));
    450         goto done;
    451     }
    452 
    453     PredictedChainingCell newCell;
    454 
    455     if (cell->clazz == NULL) {
    456         newRechainCount = self->icRechainCount;
    457     }
    458 
    459     int relOffset = (int) tgtAddr - (int)cell;
    460     OpndSize immSize = estOpndSizeFromImm(relOffset);
    461     int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
    462     relOffset -= jumpSize;
    463     COMPILER_TRACE_CHAINING(
    464             ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
    465                   cell, method->clazz->descriptor, method->name, jumpSize));
    466     //can't use stream here since it is used by the compilation thread
    467     dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
    468 
    469     newCell.clazz = clazz;
    470     newCell.method = method;
    471 
    472     /*
    473      * Enter the work order to the queue and the chaining cell will be patched
    474      * the next time a safe point is entered.
    475      *
    476      * If the enqueuing fails reset the rechain count to a normal value so that
    477      * it won't get indefinitely delayed.
    478      */
    479     inlineCachePatchEnqueue(cell, &newCell);
    480     }
    481 done:
    482     self->icRechainCount = newRechainCount;
    483     return method;
    484 }
    485 
    486 /*
    487  * Unchain a trace given the starting address of the translation
    488  * in the code cache.  Refer to the diagram in dvmCompilerAssembleLIR.
    489  * For ARM, it returns the address following the last cell unchained.
    490  * For IA, it returns NULL since cacheflush is not required for IA.
    491  */
    492 u4* dvmJitUnchain(void* codeAddr)
    493 {
    494     /* codeAddr is 4-byte aligned, so is chain cell count offset */
    495     u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
    496     u2 chainCellCountOffset = *pChainCellCountOffset;
    497     /* chain cell counts information is 4-byte aligned */
    498     ChainCellCounts *pChainCellCounts =
    499           (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
    500     u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
    501     u2 chainCellOffset = *pChainCellOffset;
    502     u1* pChainCells;
    503     int i,j;
    504     PredictedChainingCell *predChainCell;
    505     int padding;
    506 
    507     /* Locate the beginning of the chain cell region */
    508     pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
    509 
    510     /* The cells are sorted in order - walk through them and reset */
    511     for (i = 0; i < kChainingCellGap; i++) {
    512         /* for hot, normal, singleton chaining:
    513                nop  //padding.
    514                jmp 0
    515                mov imm32, reg1
    516                mov imm32, reg2
    517                call reg2
    518            after chaining:
    519                nop
    520                jmp imm
    521                mov imm32, reg1
    522                mov imm32, reg2
    523                call reg2
    524            after unchaining:
    525                nop
    526                jmp 0
    527                mov imm32, reg1
    528                mov imm32, reg2
    529                call reg2
    530            Space occupied by the chaining cell in bytes: nop is for padding,
    531                 jump 0, the target 0 is 4 bytes aligned.
    532            Space for predicted chaining: 5 words = 20 bytes
    533         */
    534         int elemSize = 0;
    535         if (i == kChainingCellInvokePredicted) {
    536             elemSize = 20;
    537         }
    538         COMPILER_TRACE_CHAINING(
    539             ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
    540 
    541         for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
    542             switch(i) {
    543                 case kChainingCellNormal:
    544                 case kChainingCellHot:
    545                 case kChainingCellInvokeSingleton:
    546                 case kChainingCellBackwardBranch:
    547                     COMPILER_TRACE_CHAINING(
    548                         ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
    549                     pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
    550                     elemSize = 4+5+5+2;
    551                     memset(pChainCells, 0, 4);
    552                     break;
    553                 case kChainingCellInvokePredicted:
    554                     COMPILER_TRACE_CHAINING(
    555                         ALOGI("Jit Runtime: unchaining of predicted"));
    556                     /* 4-byte aligned */
    557                     padding = (4 - ((u4)pChainCells & 3)) & 3;
    558                     pChainCells += padding;
    559                     predChainCell = (PredictedChainingCell *) pChainCells;
    560                     /*
    561                      * There could be a race on another mutator thread to use
    562                      * this particular predicted cell and the check has passed
    563                      * the clazz comparison. So we cannot safely wipe the
    564                      * method and branch but it is safe to clear the clazz,
    565                      * which serves as the key.
    566                      */
    567                     predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
    568                     break;
    569                 default:
    570                     ALOGE("Unexpected chaining type: %d", i);
    571                     dvmAbort();  // dvmAbort OK here - can't safely recover
    572             }
    573             COMPILER_TRACE_CHAINING(
    574                 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
    575             pChainCells += elemSize;  /* Advance by a fixed number of bytes */
    576         }
    577     }
    578     return NULL;
    579 }
    580 
    581 /* Unchain all translation in the cache. */
    582 void dvmJitUnchainAll()
    583 {
    584     ALOGV("Jit Runtime: unchaining all");
    585     if (gDvmJit.pJitEntryTable != NULL) {
    586         COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
    587         dvmLockMutex(&gDvmJit.tableLock);
    588 
    589         UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    590 
    591         for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
    592             if (gDvmJit.pJitEntryTable[i].dPC &&
    593                 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
    594                 gDvmJit.pJitEntryTable[i].codeAddress) {
    595                       dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
    596             }
    597         }
    598 
    599         PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    600 
    601         dvmUnlockMutex(&gDvmJit.tableLock);
    602         gDvmJit.translationChains = 0;
    603     }
    604     gDvmJit.hasNewChain = false;
    605 }
    606 
    607 #define P_GPR_1 PhysicalReg_EBX
    608 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
    609 static void insertJumpHelp()
    610 {
    611     int rem = (uint)stream % 4;
    612     int nop_size = 3 - rem;
    613     dump_nop(nop_size);
    614     unconditional_jump_int(0, OpndSize_32);
    615     return;
    616 }
    617 
    618 /* Chaining cell for code that may need warmup. */
    619 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
    620                  blx r0
    621                  data 0xb23a //bytecode address: 0x5115b23a
    622                  data 0x5115
    623    IA32 assembly:
    624                   jmp  0 //5 bytes
    625                   movl address, %ebx
    626                   movl dvmJitToInterpNormal, %eax
    627                   call %eax
    628                   <-- return address
    629 */
    630 static void handleNormalChainingCell(CompilationUnit *cUnit,
    631                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
    632 {
    633     ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
    634           cUnit->method->name, blockId, offset, stream - streamMethodStart);
    635     if(dump_x86_inst)
    636         ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
    637               offset, stream - streamMethodStart, stream);
    638     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    639      * reslove the multithreading issue.
    640      */
    641     insertJumpHelp();
    642     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
    643     scratchRegs[0] = PhysicalReg_EAX;
    644     call_dvmJitToInterpNormal();
    645     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
    646 }
    647 
    648 /*
    649  * Chaining cell for instructions that immediately following already translated
    650  * code.
    651  */
    652 static void handleHotChainingCell(CompilationUnit *cUnit,
    653                                   unsigned int offset, int blockId, LowOpBlockLabel* labelList)
    654 {
    655     ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
    656           cUnit->method->name, blockId, offset, stream - streamMethodStart);
    657     if(dump_x86_inst)
    658         ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
    659               offset, stream - streamMethodStart, stream);
    660     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    661      * reslove the multithreading issue.
    662      */
    663     insertJumpHelp();
    664     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
    665     scratchRegs[0] = PhysicalReg_EAX;
    666     call_dvmJitToInterpTraceSelect();
    667     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
    668 }
    669 
    670 /* Chaining cell for branches that branch back into the same basic block */
    671 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
    672                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
    673 {
    674     ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
    675           cUnit->method->name, blockId, offset, stream - streamMethodStart);
    676     if(dump_x86_inst)
    677         ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
    678               offset, stream - streamMethodStart, stream);
    679     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    680      * reslove the multithreading issue.
    681      */
    682     insertJumpHelp();
    683     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
    684     scratchRegs[0] = PhysicalReg_EAX;
    685     call_dvmJitToInterpNormal();
    686     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
    687 }
    688 
    689 /* Chaining cell for monomorphic method invocations. */
    690 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
    691                                               const Method *callee, int blockId, LowOpBlockLabel* labelList)
    692 {
    693     ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
    694           cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
    695     if(dump_x86_inst)
    696         ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
    697               blockId, stream - streamMethodStart, stream);
    698     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    699      * reslove the multithreading issue.
    700      */
    701     insertJumpHelp();
    702     move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
    703     scratchRegs[0] = PhysicalReg_EAX;
    704     call_dvmJitToInterpTraceSelect();
    705     //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
    706 }
    707 #undef P_GPR_1
    708 
    709 /* Chaining cell for monomorphic method invocations. */
    710 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
    711 {
    712     if(dump_x86_inst)
    713         ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
    714               blockId, stream - streamMethodStart, stream);
    715 #ifndef PREDICTED_CHAINING
    716     //assume rPC for callee->insns in %ebx
    717     scratchRegs[0] = PhysicalReg_EAX;
    718     call_dvmJitToInterpTraceSelectNoChain();
    719 #else
    720     /* make sure section for predicited chaining cell is 4-byte aligned */
    721     //int padding = (4 - ((u4)stream & 3)) & 3;
    722     //stream += padding;
    723     int* streamData = (int*)stream;
    724     /* Should not be executed in the initial state */
    725     streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
    726     streamData[1] = 0;
    727     /* To be filled: class */
    728     streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
    729     /* To be filled: method */
    730     streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
    731     /*
    732      * Rechain count. The initial value of 0 here will trigger chaining upon
    733      * the first invocation of this callsite.
    734      */
    735     streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
    736 #if 0
    737     ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
    738           *((int*)(stream+8)), *((int*)(stream+12)));
    739 #endif
    740     stream += 20; //5 *4
    741 #endif
    742 }
    743 
    744 /* Load the Dalvik PC into r0 and jump to the specified target */
    745 static void handlePCReconstruction(CompilationUnit *cUnit,
    746                                    LowOpBlockLabel *targetLabel)
    747 {
    748 #if 0
    749     LowOp **pcrLabel =
    750         (LowOp **) cUnit->pcReconstructionList.elemList;
    751     int numElems = cUnit->pcReconstructionList.numUsed;
    752     int i;
    753     for (i = 0; i < numElems; i++) {
    754         dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
    755         /* r0 = dalvik PC */
    756         loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
    757         genUnconditionalBranch(cUnit, targetLabel);
    758     }
    759 #endif
    760 }
    761 
    762 //use O0 code generator for hoisted checks outside of the loop
    763 /*
    764  * vA = arrayReg;
    765  * vB = idxReg;
    766  * vC = endConditionReg;
    767  * arg[0] = maxC
    768  * arg[1] = minC
    769  * arg[2] = loopBranchConditionCode
    770  */
    771 #define P_GPR_1 PhysicalReg_EBX
    772 #define P_GPR_2 PhysicalReg_ECX
    773 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
    774 {
    775     /*
    776      * NOTE: these synthesized blocks don't have ssa names assigned
    777      * for Dalvik registers.  However, because they dominate the following
    778      * blocks we can simply use the Dalvik name w/ subscript 0 as the
    779      * ssa name.
    780      */
    781     DecodedInstruction *dInsn = &mir->dalvikInsn;
    782     const int maxC = dInsn->arg[0];
    783 
    784     /* assign array in virtual register to P_GPR_1 */
    785     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
    786     /* assign index in virtual register to P_GPR_2 */
    787     get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
    788     export_pc();
    789     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
    790     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
    791     int delta = maxC;
    792     /*
    793      * If the loop end condition is ">=" instead of ">", then the largest value
    794      * of the index is "endCondition - 1".
    795      */
    796     if (dInsn->arg[2] == OP_IF_GE) {
    797         delta--;
    798     }
    799 
    800     if (delta < 0) { //+delta
    801         //if P_GPR_2 is mapped to a VR, we can't do this
    802         alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
    803     } else if(delta > 0) {
    804         alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
    805     }
    806     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
    807     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
    808 }
    809 
    810 /*
    811  * vA = arrayReg;
    812  * vB = idxReg;
    813  * vC = endConditionReg;
    814  * arg[0] = maxC
    815  * arg[1] = minC
    816  * arg[2] = loopBranchConditionCode
    817  */
    818 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
    819 {
    820     DecodedInstruction *dInsn = &mir->dalvikInsn;
    821     const int maxC = dInsn->arg[0];
    822 
    823     /* assign array in virtual register to P_GPR_1 */
    824     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
    825     /* assign index in virtual register to P_GPR_2 */
    826     get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
    827     export_pc();
    828     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
    829     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
    830 
    831     if (maxC < 0) {
    832         //if P_GPR_2 is mapped to a VR, we can't do this
    833         alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
    834     } else if(maxC > 0) {
    835         alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
    836     }
    837     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
    838     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
    839 
    840 }
    841 #undef P_GPR_1
    842 #undef P_GPR_2
    843 
    844 /*
    845  * vA = idxReg;
    846  * vB = minC;
    847  */
    848 #define P_GPR_1 PhysicalReg_ECX
    849 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
    850 {
    851     DecodedInstruction *dInsn = &mir->dalvikInsn;
    852     const int minC = dInsn->vB;
    853     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
    854     export_pc();
    855     compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
    856     condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
    857 }
    858 #undef P_GPR_1
    859 
    860 #ifdef WITH_JIT_INLINING
    861 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
    862 {
    863     CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
    864     if(gDvm.executionMode == kExecutionModeNcgO0) {
    865         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
    866         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
    867         compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
    868         export_pc(); //use %edx
    869         conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
    870         move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
    871         compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
    872     } else {
    873         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
    874         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
    875         nullCheck(5, false, 1, mir->dalvikInsn.vC);
    876         move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
    877         compare_reg_reg(4, false, 6, false);
    878     }
    879 
    880     //immdiate will be updated later in genLandingPadForMispredictedCallee
    881     streamMisPred = stream;
    882     callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
    883 }
    884 #endif
    885 
    886 /* Extended MIR instructions like PHI */
    887 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
    888 {
    889     ExecutionMode origMode = gDvm.executionMode;
    890     gDvm.executionMode = kExecutionModeNcgO0;
    891     switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
    892         case kMirOpPhi: {
    893             break;
    894         }
    895         case kMirOpNullNRangeUpCheck: {
    896             genHoistedChecksForCountUpLoop(cUnit, mir);
    897             break;
    898         }
    899         case kMirOpNullNRangeDownCheck: {
    900             genHoistedChecksForCountDownLoop(cUnit, mir);
    901             break;
    902         }
    903         case kMirOpLowerBound: {
    904             genHoistedLowerBoundCheck(cUnit, mir);
    905             break;
    906         }
    907         case kMirOpPunt: {
    908             break;
    909         }
    910 #ifdef WITH_JIT_INLINING
    911         case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
    912             genValidationForPredictedInline(cUnit, mir);
    913             break;
    914         }
    915 #endif
    916         default:
    917             break;
    918     }
    919     gDvm.executionMode = origMode;
    920 }
    921 
    922 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
    923                                 int bodyId)
    924 {
    925     /*
    926      * Next, create two branches - one branch over to the loop body and the
    927      * other branch to the PCR cell to punt.
    928      */
    929     //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
    930     //setupResourceMasks(branchToBody);
    931     //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
    932 
    933 #if 0
    934     LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
    935     branchToPCR->opCode = kThumbBUncond;
    936     branchToPCR->generic.target = (LIR *) pcrLabel;
    937     setupResourceMasks(branchToPCR);
    938     cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
    939 #endif
    940 }
    941 
    942 /* check whether we can merge the block at index i with its target block */
    943 bool mergeBlock(BasicBlock *bb) {
    944     if(bb->blockType == kDalvikByteCode &&
    945        bb->firstMIRInsn != NULL &&
    946        (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
    947         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
    948         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
    949        bb->fallThrough == NULL) {// &&
    950        //cUnit->hasLoop) {
    951         //ALOGI("merge blocks ending with goto at index %d", i);
    952         MIR* prevInsn = bb->lastMIRInsn->prev;
    953         if(bb->taken == NULL) return false;
    954         MIR* mergeInsn = bb->taken->firstMIRInsn;
    955         if(mergeInsn == NULL) return false;
    956         if(prevInsn == NULL) {//the block has a single instruction
    957             bb->firstMIRInsn = mergeInsn;
    958         } else {
    959             prevInsn->next = mergeInsn; //remove goto from the chain
    960         }
    961         mergeInsn->prev = prevInsn;
    962         bb->lastMIRInsn = bb->taken->lastMIRInsn;
    963         bb->taken->firstMIRInsn = NULL; //block being merged in
    964         bb->fallThrough = bb->taken->fallThrough;
    965         bb->taken = bb->taken->taken;
    966         return true;
    967     }
    968     return false;
    969 }
    970 
    971 static int genTraceProfileEntry(CompilationUnit *cUnit)
    972 {
    973     cUnit->headerSize = 6;
    974     if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
    975         (gDvmJit.profileMode == kTraceProfilingDisabled)) {
    976         return 12;
    977     } else {
    978         return 4;
    979     }
    980 
    981 }
    982 
    983 #define PRINT_BUFFER_LEN 1024
    984 /* Print the code block in code cache in the range of [startAddr, endAddr)
    985  * in readable format.
    986  */
    987 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
    988 {
    989     char strbuf[PRINT_BUFFER_LEN];
    990     unsigned char *addr;
    991     unsigned char *next_addr;
    992     int n;
    993 
    994     if (gDvmJit.printBinary) {
    995         // print binary in bytes
    996         n = 0;
    997         for (addr = startAddr; addr < endAddr; addr++) {
    998             n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
    999             if (n > PRINT_BUFFER_LEN - 10) {
   1000                 ALOGD("## %s", strbuf);
   1001                 n = 0;
   1002             }
   1003         }
   1004         if (n > 0)
   1005             ALOGD("## %s", strbuf);
   1006     }
   1007 
   1008     // print disassembled instructions
   1009     addr = startAddr;
   1010     while (addr < endAddr) {
   1011         next_addr = reinterpret_cast<unsigned char*>
   1012             (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
   1013                                        strbuf, PRINT_BUFFER_LEN));
   1014         if (addr != next_addr) {
   1015             ALOGD("**  %p: %s", addr, strbuf);
   1016         } else {                // check whether this is nop padding
   1017             if (addr[0] == 0x90) {
   1018                 ALOGD("**  %p: NOP (1 byte)", addr);
   1019                 next_addr += 1;
   1020             } else if (addr[0] == 0x66 && addr[1] == 0x90) {
   1021                 ALOGD("**  %p: NOP (2 bytes)", addr);
   1022                 next_addr += 2;
   1023             } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
   1024                 ALOGD("**  %p: NOP (3 bytes)", addr);
   1025                 next_addr += 3;
   1026             } else {
   1027                 ALOGD("** unable to decode binary at %p", addr);
   1028                 break;
   1029             }
   1030         }
   1031         addr = next_addr;
   1032     }
   1033 }
   1034 
   1035 /* 4 is the number of additional bytes needed for chaining information for trace:
   1036  * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
   1037 #define EXTRA_BYTES_FOR_CHAINING 4
   1038 
   1039 /* Entry function to invoke the backend of the JIT compiler */
   1040 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
   1041 {
   1042     dump_x86_inst = cUnit->printMe;
   1043     /* Used to hold the labels of each block */
   1044     LowOpBlockLabel *labelList =
   1045         (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
   1046     LowOp *headLIR = NULL;
   1047     GrowableList chainingListByType[kChainingCellLast];
   1048     unsigned int i, padding;
   1049 
   1050     /*
   1051      * Initialize various types chaining lists.
   1052      */
   1053     for (i = 0; i < kChainingCellLast; i++) {
   1054         dvmInitGrowableList(&chainingListByType[i], 2);
   1055     }
   1056 
   1057     /* Clear the visited flag for each block */
   1058     dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
   1059                                           kAllNodes, false /* isIterative */);
   1060 
   1061     GrowableListIterator iterator;
   1062     dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
   1063 
   1064     /* Traces start with a profiling entry point.  Generate it here */
   1065     cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
   1066 
   1067     //BasicBlock **blockList = cUnit->blockList;
   1068     GrowableList *blockList = &cUnit->blockList;
   1069     BasicBlock *bb;
   1070 
   1071     info->codeAddress = NULL;
   1072     stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
   1073 
   1074     // TODO: compile into a temporary buffer and then copy into the code cache.
   1075     // That would let us leave the code cache unprotected for a shorter time.
   1076     size_t unprotected_code_cache_bytes =
   1077             gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING;
   1078     UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1079 
   1080     streamStart = stream; /* trace start before alignment */
   1081     stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
   1082     stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
   1083     streamMethodStart = stream; /* code start */
   1084     for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
   1085         labelList[i].lop.generic.offset = -1;
   1086     }
   1087     cUnit->exceptionBlockId = -1;
   1088     for (i = 0; i < blockList->numUsed; i++) {
   1089         bb = (BasicBlock *) blockList->elemList[i];
   1090         if(bb->blockType == kExceptionHandling)
   1091             cUnit->exceptionBlockId = i;
   1092     }
   1093     startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
   1094     if(gDvm.executionMode == kExecutionModeNcgO1) {
   1095         //merge blocks ending with "goto" with the fall through block
   1096         if (cUnit->jitMode != kJitLoop)
   1097             for (i = 0; i < blockList->numUsed; i++) {
   1098                 bb = (BasicBlock *) blockList->elemList[i];
   1099                 bool merged = mergeBlock(bb);
   1100                 while(merged) merged = mergeBlock(bb);
   1101             }
   1102         for (i = 0; i < blockList->numUsed; i++) {
   1103             bb = (BasicBlock *) blockList->elemList[i];
   1104             if(bb->blockType == kDalvikByteCode &&
   1105                bb->firstMIRInsn != NULL) {
   1106                 preprocessingBB(bb);
   1107             }
   1108         }
   1109         preprocessingTrace();
   1110     }
   1111 
   1112     /* Handle the content in each basic block */
   1113     for (i = 0; ; i++) {
   1114         MIR *mir;
   1115         bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
   1116         if (bb == NULL) break;
   1117         if (bb->visited == true) continue;
   1118 
   1119         labelList[i].immOpnd.value = bb->startOffset;
   1120 
   1121         if (bb->blockType >= kChainingCellLast) {
   1122             /*
   1123              * Append the label pseudo LIR first. Chaining cells will be handled
   1124              * separately afterwards.
   1125              */
   1126             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
   1127         }
   1128 
   1129         if (bb->blockType == kEntryBlock) {
   1130             labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
   1131             if (bb->firstMIRInsn == NULL) {
   1132                 continue;
   1133             } else {
   1134               setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
   1135                                   //&labelList[blockList[i]->fallThrough->id]);
   1136             }
   1137         } else if (bb->blockType == kExitBlock) {
   1138             labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
   1139             labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1140             goto gen_fallthrough;
   1141         } else if (bb->blockType == kDalvikByteCode) {
   1142             if (bb->hidden == true) continue;
   1143             labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
   1144             /* Reset the register state */
   1145 #if 0
   1146             resetRegisterScoreboard(cUnit);
   1147 #endif
   1148         } else {
   1149             switch (bb->blockType) {
   1150                 case kChainingCellNormal:
   1151                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
   1152                     /* handle the codegen later */
   1153                     dvmInsertGrowableList(
   1154                         &chainingListByType[kChainingCellNormal], i);
   1155                     break;
   1156                 case kChainingCellInvokeSingleton:
   1157                     labelList[i].lop.opCode2 =
   1158                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
   1159                     labelList[i].immOpnd.value =
   1160                         (int) bb->containingMethod;
   1161                     /* handle the codegen later */
   1162                     dvmInsertGrowableList(
   1163                         &chainingListByType[kChainingCellInvokeSingleton], i);
   1164                     break;
   1165                 case kChainingCellInvokePredicted:
   1166                     labelList[i].lop.opCode2 =
   1167                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
   1168                    /*
   1169                      * Move the cached method pointer from operand 1 to 0.
   1170                      * Operand 0 was clobbered earlier in this routine to store
   1171                      * the block starting offset, which is not applicable to
   1172                      * predicted chaining cell.
   1173                      */
   1174                     //TODO
   1175                     //labelList[i].operands[0] = labelList[i].operands[1];
   1176 
   1177                     /* handle the codegen later */
   1178                     dvmInsertGrowableList(
   1179                         &chainingListByType[kChainingCellInvokePredicted], i);
   1180                     break;
   1181                 case kChainingCellHot:
   1182                     labelList[i].lop.opCode2 =
   1183                         ATOM_PSEUDO_CHAINING_CELL_HOT;
   1184                     /* handle the codegen later */
   1185                     dvmInsertGrowableList(
   1186                         &chainingListByType[kChainingCellHot], i);
   1187                     break;
   1188                 case kPCReconstruction:
   1189                     /* Make sure exception handling block is next */
   1190                     labelList[i].lop.opCode2 =
   1191                         ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
   1192                     //assert (i == cUnit->numBlocks - 2);
   1193                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1194                     handlePCReconstruction(cUnit,
   1195                                            &labelList[cUnit->puntBlock->id]);
   1196                     break;
   1197                 case kExceptionHandling:
   1198                     labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
   1199                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1200                     //if (cUnit->pcReconstructionList.numUsed) {
   1201                         scratchRegs[0] = PhysicalReg_EAX;
   1202                         jumpToInterpPunt();
   1203                         //call_dvmJitToInterpPunt();
   1204                     //}
   1205                     break;
   1206                 case kChainingCellBackwardBranch:
   1207                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
   1208                     /* handle the codegen later */
   1209                     dvmInsertGrowableList(
   1210                         &chainingListByType[kChainingCellBackwardBranch],
   1211                         i);
   1212                     break;
   1213                 default:
   1214                     break;
   1215             }
   1216             continue;
   1217         }
   1218         {
   1219         //LowOp *headLIR = NULL;
   1220         const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
   1221         const u2 *startCodePtr = dexCode->insns;
   1222         const u2 *codePtr;
   1223         labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1224         ALOGV("get ready to handle JIT bb %d type %d hidden %d",
   1225               bb->id, bb->blockType, bb->hidden);
   1226         for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
   1227             bb = nextBB;
   1228             bb->visited = true;
   1229             cUnit->nextCodegenBlock = NULL;
   1230 
   1231         if(gDvm.executionMode == kExecutionModeNcgO1 &&
   1232            bb->blockType != kEntryBlock &&
   1233            bb->firstMIRInsn != NULL) {
   1234             startOfBasicBlock(bb);
   1235             int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
   1236             endOfBasicBlock(bb);
   1237             if(cg_ret < 0) {
   1238                 endOfTrace(true/*freeOnly*/);
   1239                 cUnit->baseAddr = NULL;
   1240                 ALOGI("codeGenBasicBlockJit returns negative number");
   1241                 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1242                 return;
   1243             }
   1244         } else {
   1245         for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
   1246             startOfBasicBlock(bb); //why here for O0
   1247             Opcode dalvikOpCode = mir->dalvikInsn.opcode;
   1248             if((int)dalvikOpCode >= (int)kMirOpFirst) {
   1249                 handleExtendedMIR(cUnit, mir);
   1250                 continue;
   1251             }
   1252             InstructionFormat dalvikFormat =
   1253                 dexGetFormatFromOpcode(dalvikOpCode);
   1254             ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
   1255                   mir->offset, dalvikOpCode, dalvikFormat);
   1256             LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
   1257             /* Remember the first LIR for this block */
   1258             if (headLIR == NULL) {
   1259                 headLIR = (LowOp*)boundaryLIR;
   1260             }
   1261             bool notHandled = true;
   1262             /*
   1263              * Debugging: screen the opcode first to see if it is in the
   1264              * do[-not]-compile list
   1265              */
   1266             bool singleStepMe =
   1267                 gDvmJit.includeSelectedOp !=
   1268                 ((gDvmJit.opList[dalvikOpCode >> 3] &
   1269                   (1 << (dalvikOpCode & 0x7))) !=
   1270                  0);
   1271             if (singleStepMe || cUnit->allSingleStep) {
   1272             } else {
   1273                 codePtr = startCodePtr + mir->offset;
   1274                 //lower each byte code, update LIR
   1275                 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
   1276                 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
   1277                    CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
   1278                     ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
   1279                     gDvmJit.codeCacheFull = true;
   1280                     cUnit->baseAddr = NULL;
   1281                     endOfTrace(true/*freeOnly*/);
   1282                     PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1283                     return;
   1284                 }
   1285             }
   1286             if (notHandled) {
   1287                 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
   1288                      mir->offset,
   1289                      dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
   1290                      dalvikFormat);
   1291                 dvmAbort();
   1292                 break;
   1293             }
   1294         } // end for
   1295         } // end else //JIT + O0 code generator
   1296         }
   1297         } // end for
   1298         /* Eliminate redundant loads/stores and delay stores into later slots */
   1299 #if 0
   1300         dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
   1301                                            cUnit->lastLIRInsn);
   1302 #endif
   1303         if (headLIR) headLIR = NULL;
   1304 gen_fallthrough:
   1305         /*
   1306          * Check if the block is terminated due to trace length constraint -
   1307          * insert an unconditional branch to the chaining cell.
   1308          */
   1309         if (bb->needFallThroughBranch) {
   1310             jumpToBasicBlock(stream, bb->fallThrough->id);
   1311         }
   1312 
   1313     }
   1314 
   1315     char* streamChainingStart = (char*)stream;
   1316     /* Handle the chaining cells in predefined order */
   1317     for (i = 0; i < kChainingCellGap; i++) {
   1318         size_t j;
   1319         int *blockIdList = (int *) chainingListByType[i].elemList;
   1320 
   1321         cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
   1322 
   1323         /* No chaining cells of this type */
   1324         if (cUnit->numChainingCells[i] == 0)
   1325             continue;
   1326 
   1327         /* Record the first LIR for a new type of chaining cell */
   1328         cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
   1329         for (j = 0; j < chainingListByType[i].numUsed; j++) {
   1330             int blockId = blockIdList[j];
   1331             BasicBlock *chainingBlock =
   1332                 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
   1333                                                          blockId);
   1334 
   1335             labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
   1336 
   1337             /* Align this chaining cell first */
   1338 #if 0
   1339             newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
   1340 #endif
   1341             /* Insert the pseudo chaining instruction */
   1342             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
   1343 
   1344 
   1345             switch (chainingBlock->blockType) {
   1346                 case kChainingCellNormal:
   1347                     handleNormalChainingCell(cUnit,
   1348                      chainingBlock->startOffset, blockId, labelList);
   1349                     break;
   1350                 case kChainingCellInvokeSingleton:
   1351                     handleInvokeSingletonChainingCell(cUnit,
   1352                         chainingBlock->containingMethod, blockId, labelList);
   1353                     break;
   1354                 case kChainingCellInvokePredicted:
   1355                     handleInvokePredictedChainingCell(cUnit, blockId);
   1356                     break;
   1357                 case kChainingCellHot:
   1358                     handleHotChainingCell(cUnit,
   1359                         chainingBlock->startOffset, blockId, labelList);
   1360                     break;
   1361                 case kChainingCellBackwardBranch:
   1362                     handleBackwardBranchChainingCell(cUnit,
   1363                         chainingBlock->startOffset, blockId, labelList);
   1364                     break;
   1365                 default:
   1366                     ALOGE("Bad blocktype %d", chainingBlock->blockType);
   1367                     dvmAbort();
   1368                     break;
   1369             }
   1370 
   1371             if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
   1372                 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
   1373                 gDvmJit.codeCacheFull = true;
   1374                 cUnit->baseAddr = NULL;
   1375                 endOfTrace(true); /* need to free structures */
   1376                 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1377                 return;
   1378             }
   1379         }
   1380     }
   1381 #if 0
   1382     dvmCompilerApplyGlobalOptimizations(cUnit);
   1383 #endif
   1384     endOfTrace(false);
   1385 
   1386     if (gDvmJit.codeCacheFull) {
   1387         /* We hit code cache size limit inside endofTrace(false).
   1388          * Bail out for this trace!
   1389          */
   1390         ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
   1391         cUnit->baseAddr = NULL;
   1392         PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1393         return;
   1394     }
   1395 
   1396     /* dump section for chaining cell counts, make sure it is 4-byte aligned */
   1397     padding = (4 - ((u4)stream & 3)) & 3;
   1398     stream += padding;
   1399     ChainCellCounts chainCellCounts;
   1400     /* Install the chaining cell counts */
   1401     for (i=0; i< kChainingCellGap; i++) {
   1402         chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
   1403     }
   1404     char* streamCountStart = (char*)stream;
   1405     memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
   1406     stream += sizeof(chainCellCounts);
   1407 
   1408     cUnit->baseAddr = streamMethodStart;
   1409     cUnit->totalSize = (stream - streamStart);
   1410     if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
   1411         ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
   1412         gDvmJit.codeCacheFull = true;
   1413         cUnit->baseAddr = NULL;
   1414         PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1415         return;
   1416     }
   1417 
   1418     /* write chaining cell count offset & chaining cell offset */
   1419     u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
   1420     *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
   1421     pOffset[1] = streamChainingStart - streamMethodStart;
   1422 
   1423     PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1424 
   1425     gDvmJit.codeCacheByteUsed += (stream - streamStart);
   1426     if (cUnit->printMe) {
   1427         unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
   1428         unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
   1429         ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
   1430               cUnit->method->clazz->descriptor, cUnit->method->name,
   1431               codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
   1432         ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
   1433               cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
   1434         printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
   1435     }
   1436     ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
   1437           (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
   1438           cUnit->totalSize, gDvmJit.codeCache);
   1439 
   1440     gDvmJit.numCompilations++;
   1441 
   1442     info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
   1443 }
   1444 
   1445 /*
   1446  * Perform translation chain operation.
   1447  */
   1448 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
   1449 {
   1450 #ifdef JIT_CHAIN
   1451     int relOffset = (int) tgtAddr - (int)branchAddr;
   1452 
   1453     if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
   1454         (gDvmJit.codeCacheFull == false)) {
   1455 
   1456         gDvmJit.translationChains++;
   1457 
   1458         //OpndSize immSize = estOpndSizeFromImm(relOffset);
   1459         //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
   1460         /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
   1461          * the original code sequence.
   1462          */
   1463         OpndSize immSize = OpndSize_32;
   1464         relOffset -= 5;
   1465         //can't use stream here since it is used by the compilation thread
   1466         UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
   1467         dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
   1468         PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
   1469 
   1470         gDvmJit.hasNewChain = true;
   1471 
   1472         COMPILER_TRACE_CHAINING(
   1473             ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
   1474                   (int) branchAddr, tgtAddr, relOffset));
   1475     }
   1476 #endif
   1477     return tgtAddr;
   1478 }
   1479 
   1480 /*
   1481  * Accept the work and start compiling.  Returns true if compilation
   1482  * is attempted.
   1483  */
   1484 bool dvmCompilerDoWork(CompilerWorkOrder *work)
   1485 {
   1486     JitTraceDescription *desc;
   1487     bool isCompile;
   1488     bool success = true;
   1489 
   1490     if (gDvmJit.codeCacheFull) {
   1491         return false;
   1492     }
   1493 
   1494     switch (work->kind) {
   1495         case kWorkOrderTrace:
   1496             isCompile = true;
   1497             /* Start compilation with maximally allowed trace length */
   1498             desc = (JitTraceDescription *)work->info;
   1499             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
   1500                                         work->bailPtr, 0 /* no hints */);
   1501             break;
   1502         case kWorkOrderTraceDebug: {
   1503             bool oldPrintMe = gDvmJit.printMe;
   1504             gDvmJit.printMe = true;
   1505             isCompile = true;
   1506             /* Start compilation with maximally allowed trace length */
   1507             desc = (JitTraceDescription *)work->info;
   1508             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
   1509                                         work->bailPtr, 0 /* no hints */);
   1510             gDvmJit.printMe = oldPrintMe;
   1511             break;
   1512         }
   1513         case kWorkOrderProfileMode:
   1514             dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
   1515             isCompile = false;
   1516             break;
   1517         default:
   1518             isCompile = false;
   1519             ALOGE("Jit: unknown work order type");
   1520             assert(0);  // Bail if debug build, discard otherwise
   1521     }
   1522     if (!success)
   1523         work->result.codeAddress = NULL;
   1524     return isCompile;
   1525 }
   1526 
   1527 void dvmCompilerCacheFlush(long start, long end, long flags) {
   1528   /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
   1529 }
   1530 
   1531 //#endif
   1532