Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 #include <sys/mman.h>
     17 #include "Dalvik.h"
     18 #include "libdex/DexOpcodes.h"
     19 #include "compiler/Compiler.h"
     20 #include "compiler/CompilerIR.h"
     21 #include "interp/Jit.h"
     22 #include "libdex/DexFile.h"
     23 #include "Lower.h"
     24 #include "NcgAot.h"
     25 #include "compiler/codegen/CompilerCodegen.h"
     26 
     27 /* Init values when a predicted chain is initially assembled */
     28 /* E7FE is branch to self */
     29 #define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
     30 
     31 /* Target-specific save/restore */
     32 extern "C" void dvmJitCalleeSave(double *saveArea);
     33 extern "C" void dvmJitCalleeRestore(double *saveArea);
     34 
     35 /*
     36  * Determine the initial instruction set to be used for this trace.
     37  * Later components may decide to change this.
     38  */
     39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
     40 JitInstructionSetType dvmCompilerInstructionSet(void)
     41 {
     42     return DALVIK_JIT_IA32;
     43 }
     44 
     45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
     46 {
     47     return DALVIK_JIT_IA32;
     48 }
     49 
     50 /* we don't use template for IA32 */
     51 void *dvmCompilerGetInterpretTemplate()
     52 {
     53       return NULL;
     54 }
     55 
     56 /* Track the number of times that the code cache is patched */
     57 #if defined(WITH_JIT_TUNING)
     58 #define UPDATE_CODE_CACHE_PATCHES()    (gDvmJit.codeCachePatches++)
     59 #else
     60 #define UPDATE_CODE_CACHE_PATCHES()
     61 #endif
     62 
     63 bool dvmCompilerArchInit() {
     64     /* Target-specific configuration */
     65     gDvmJit.jitTableSize = 1 << 12;
     66     gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
     67     if (gDvmJit.threshold == 0) {
     68         gDvmJit.threshold = 255;
     69     }
     70     if (gDvmJit.codeCacheSize == DEFAULT_CODE_CACHE_SIZE) {
     71       gDvmJit.codeCacheSize = 512 * 1024;
     72     } else if ((gDvmJit.codeCacheSize == 0) && (gDvm.executionMode == kExecutionModeJit)) {
     73       gDvm.executionMode = kExecutionModeInterpFast;
     74     }
     75     gDvmJit.optLevel = kJitOptLevelO1;
     76 
     77     //Disable Method-JIT
     78     gDvmJit.disableOpt |= (1 << kMethodJit);
     79 
     80 #if defined(WITH_SELF_VERIFICATION)
     81     /* Force into blocking mode */
     82     gDvmJit.blockingMode = true;
     83     gDvm.nativeDebuggerActive = true;
     84 #endif
     85 
     86     // Make sure all threads have current values
     87     dvmJitUpdateThreadStateAll();
     88 
     89     return true;
     90 }
     91 
     92 void dvmCompilerPatchInlineCache(void)
     93 {
     94     int i;
     95     PredictedChainingCell *minAddr, *maxAddr;
     96 
     97     /* Nothing to be done */
     98     if (gDvmJit.compilerICPatchIndex == 0) return;
     99 
    100     /*
    101      * Since all threads are already stopped we don't really need to acquire
    102      * the lock. But race condition can be easily introduced in the future w/o
    103      * paying attention so we still acquire the lock here.
    104      */
    105     dvmLockMutex(&gDvmJit.compilerICPatchLock);
    106 
    107     UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    108 
    109     //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
    110 
    111     /* Initialize the min/max address range */
    112     minAddr = (PredictedChainingCell *)
    113         ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
    114     maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
    115 
    116     for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
    117         ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
    118         PredictedChainingCell *cellAddr = workOrder->cellAddr;
    119         PredictedChainingCell *cellContent = &workOrder->cellContent;
    120         ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
    121                                                 workOrder->classLoader);
    122 
    123         assert(clazz->serialNumber == workOrder->serialNumber);
    124 
    125         /* Use the newly resolved clazz pointer */
    126         cellContent->clazz = clazz;
    127 
    128         if (cellAddr->clazz == NULL) {
    129             COMPILER_TRACE_CHAINING(
    130                 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
    131                       cellAddr,
    132                       cellContent->clazz->descriptor,
    133                       cellContent->method->name));
    134         } else {
    135             COMPILER_TRACE_CHAINING(
    136                 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
    137                       "patched",
    138                       cellAddr,
    139                       cellAddr->clazz->descriptor,
    140                       cellContent->clazz->descriptor,
    141                       cellContent->method->name));
    142         }
    143 
    144         /* Patch the chaining cell */
    145         *cellAddr = *cellContent;
    146         minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
    147         maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
    148     }
    149 
    150     PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    151 
    152     gDvmJit.compilerICPatchIndex = 0;
    153     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
    154 }
    155 
    156 /* Target-specific cache clearing */
    157 void dvmCompilerCacheClear(char *start, size_t size)
    158 {
    159     /* "0xFF 0xFF" is an invalid opcode for x86. */
    160     memset(start, 0xFF, size);
    161 }
    162 
    163 /* for JIT debugging, to be implemented */
    164 void dvmJitCalleeSave(double *saveArea) {
    165 }
    166 
    167 void dvmJitCalleeRestore(double *saveArea) {
    168 }
    169 
    170 void dvmJitToInterpSingleStep() {
    171 }
    172 
    173 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
    174                                             const JitEntry *knownEntry) {
    175     return NULL;
    176 }
    177 
    178 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
    179 {
    180 }
    181 
    182 void dvmCompilerArchDump(void)
    183 {
    184 }
    185 
    186 char *getTraceBase(const JitEntry *p)
    187 {
    188     return NULL;
    189 }
    190 
    191 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
    192 {
    193 }
    194 
    195 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
    196 {
    197 }
    198 
    199 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
    200 {
    201     // Method-based JIT not supported for x86.
    202 }
    203 
    204 void dvmJitScanAllClassPointers(void (*callback)(void *))
    205 {
    206 }
    207 
    208 /* Handy function to retrieve the profile count */
    209 static inline int getProfileCount(const JitEntry *entry)
    210 {
    211     if (entry->dPC == 0 || entry->codeAddress == 0)
    212         return 0;
    213     u4 *pExecutionCount = (u4 *) getTraceBase(entry);
    214 
    215     return pExecutionCount ? *pExecutionCount : 0;
    216 }
    217 
    218 /* qsort callback function */
    219 static int sortTraceProfileCount(const void *entry1, const void *entry2)
    220 {
    221     const JitEntry *jitEntry1 = (const JitEntry *)entry1;
    222     const JitEntry *jitEntry2 = (const JitEntry *)entry2;
    223 
    224     JitTraceCounter_t count1 = getProfileCount(jitEntry1);
    225     JitTraceCounter_t count2 = getProfileCount(jitEntry2);
    226     return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
    227 }
    228 
    229 /* Sort the trace profile counts and dump them */
    230 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
    231 {
    232     JitEntry *sortedEntries;
    233     int numTraces = 0;
    234     unsigned long counts = 0;
    235     unsigned int i;
    236 
    237     /* Make sure that the table is not changing */
    238     dvmLockMutex(&gDvmJit.tableLock);
    239 
    240     /* Sort the entries by descending order */
    241     sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
    242     if (sortedEntries == NULL)
    243         goto done;
    244     memcpy(sortedEntries, gDvmJit.pJitEntryTable,
    245            sizeof(JitEntry) * gDvmJit.jitTableSize);
    246     qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
    247           sortTraceProfileCount);
    248 
    249     /* Dump the sorted entries */
    250     for (i=0; i < gDvmJit.jitTableSize; i++) {
    251         if (sortedEntries[i].dPC != 0) {
    252             numTraces++;
    253         }
    254     }
    255     if (numTraces == 0)
    256         numTraces = 1;
    257     ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
    258 
    259     free(sortedEntries);
    260 done:
    261     dvmUnlockMutex(&gDvmJit.tableLock);
    262     return;
    263 }
    264 
    265 void jumpWithRelOffset(char* instAddr, int relOffset) {
    266     stream = instAddr;
    267     OpndSize immSize = estOpndSizeFromImm(relOffset);
    268     relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
    269     dump_imm(Mnemonic_JMP, immSize, relOffset);
    270 }
    271 
    272 // works whether instructions for target basic block are generated or not
    273 LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
    274     stream = instAddr;
    275     bool unknown;
    276     OpndSize size;
    277     int relativeNCG = targetId;
    278     relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
    279     unconditional_jump_int(relativeNCG, size);
    280     return NULL;
    281 }
    282 
    283 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
    284     stream = instAddr;
    285     bool unknown;
    286     OpndSize size;
    287     int relativeNCG = targetId;
    288     relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
    289     conditional_jump_int(cc, relativeNCG, size);
    290     return NULL;
    291 }
    292 
    293 /*
    294  * Attempt to enqueue a work order to patch an inline cache for a predicted
    295  * chaining cell for virtual/interface calls.
    296  */
    297 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
    298                                     PredictedChainingCell *newContent)
    299 {
    300     bool result = true;
    301 
    302     /*
    303      * Make sure only one thread gets here since updating the cell (ie fast
    304      * path and queueing the request (ie the queued path) have to be done
    305      * in an atomic fashion.
    306      */
    307     dvmLockMutex(&gDvmJit.compilerICPatchLock);
    308 
    309     /* Fast path for uninitialized chaining cell */
    310     if (cellAddr->clazz == NULL &&
    311         cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
    312         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    313 
    314         cellAddr->method = newContent->method;
    315         cellAddr->branch = newContent->branch;
    316         cellAddr->branch2 = newContent->branch2;
    317 
    318         /*
    319          * The update order matters - make sure clazz is updated last since it
    320          * will bring the uninitialized chaining cell to life.
    321          */
    322         android_atomic_release_store((int32_t)newContent->clazz,
    323             (volatile int32_t *)(void*) &cellAddr->clazz);
    324         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
    325         UPDATE_CODE_CACHE_PATCHES();
    326 
    327         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    328 
    329 #if 0
    330         MEM_BARRIER();
    331         cellAddr->clazz = newContent->clazz;
    332         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
    333 #endif
    334 #if defined(WITH_JIT_TUNING)
    335         gDvmJit.icPatchInit++;
    336 #endif
    337         COMPILER_TRACE_CHAINING(
    338             ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
    339                   cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
    340     /* Check if this is a frequently missed clazz */
    341     } else if (cellAddr->stagedClazz != newContent->clazz) {
    342         /* Not proven to be frequent yet - build up the filter cache */
    343         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    344 
    345         cellAddr->stagedClazz = newContent->clazz;
    346 
    347         UPDATE_CODE_CACHE_PATCHES();
    348         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    349 
    350 #if defined(WITH_JIT_TUNING)
    351         gDvmJit.icPatchRejected++;
    352 #endif
    353     /*
    354      * Different classes but same method implementation - it is safe to just
    355      * patch the class value without the need to stop the world.
    356      */
    357     } else if (cellAddr->method == newContent->method) {
    358         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    359 
    360         cellAddr->clazz = newContent->clazz;
    361         /* No need to flush the cache here since the branch is not patched */
    362         UPDATE_CODE_CACHE_PATCHES();
    363 
    364         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    365 
    366 #if defined(WITH_JIT_TUNING)
    367         gDvmJit.icPatchLockFree++;
    368 #endif
    369     /*
    370      * Cannot patch the chaining cell inline - queue it until the next safe
    371      * point.
    372      */
    373     } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE)  {
    374         int index = gDvmJit.compilerICPatchIndex++;
    375         const ClassObject *clazz = newContent->clazz;
    376 
    377         gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
    378         gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
    379         gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
    380         gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
    381         /* For verification purpose only */
    382         gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
    383 
    384 #if defined(WITH_JIT_TUNING)
    385         gDvmJit.icPatchQueued++;
    386 #endif
    387         COMPILER_TRACE_CHAINING(
    388             ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
    389                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
    390     } else {
    391     /* Queue is full - just drop this patch request */
    392 #if defined(WITH_JIT_TUNING)
    393         gDvmJit.icPatchDropped++;
    394 #endif
    395 
    396         COMPILER_TRACE_CHAINING(
    397             ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
    398                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
    399     }
    400 
    401     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
    402     return result;
    403 }
    404 
    405 /*
    406  * This method is called from the invoke templates for virtual and interface
    407  * methods to speculatively setup a chain to the callee. The templates are
    408  * written in assembly and have setup method, cell, and clazz at r0, r2, and
    409  * r3 respectively, so there is a unused argument in the list. Upon return one
    410  * of the following three results may happen:
    411  *   1) Chain is not setup because the callee is native. Reset the rechain
    412  *      count to a big number so that it will take a long time before the next
    413  *      rechain attempt to happen.
    414  *   2) Chain is not setup because the callee has not been created yet. Reset
    415  *      the rechain count to a small number and retry in the near future.
    416  *   3) Ask all other threads to stop before patching this chaining cell.
    417  *      This is required because another thread may have passed the class check
    418  *      but hasn't reached the chaining cell yet to follow the chain. If we
    419  *      patch the content before halting the other thread, there could be a
    420  *      small window for race conditions to happen that it may follow the new
    421  *      but wrong chain to invoke a different method.
    422  */
    423 const Method *dvmJitToPatchPredictedChain(const Method *method,
    424                                           Thread *self,
    425                                           PredictedChainingCell *cell,
    426                                           const ClassObject *clazz)
    427 {
    428     int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
    429     /* Don't come back here for a long time if the method is native */
    430     if (dvmIsNativeMethod(method)) {
    431         UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
    432 
    433         /*
    434          * Put a non-zero/bogus value in the clazz field so that it won't
    435          * trigger immediate patching and will continue to fail to match with
    436          * a real clazz pointer.
    437          */
    438         cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
    439 
    440         UPDATE_CODE_CACHE_PATCHES();
    441         PROTECT_CODE_CACHE(cell, sizeof(*cell));
    442         COMPILER_TRACE_CHAINING(
    443             ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
    444                   cell, method->name));
    445         goto done;
    446     }
    447     {
    448     int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
    449 
    450     /*
    451      * Compilation not made yet for the callee. Reset the counter to a small
    452      * value and come back to check soon.
    453      */
    454     if ((tgtAddr == 0) ||
    455         ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
    456         COMPILER_TRACE_CHAINING(
    457             ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
    458                   cell, method->clazz->descriptor, method->name));
    459         goto done;
    460     }
    461 
    462     PredictedChainingCell newCell;
    463 
    464     if (cell->clazz == NULL) {
    465         newRechainCount = self->icRechainCount;
    466     }
    467 
    468     int relOffset = (int) tgtAddr - (int)cell;
    469     OpndSize immSize = estOpndSizeFromImm(relOffset);
    470     int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
    471     relOffset -= jumpSize;
    472     COMPILER_TRACE_CHAINING(
    473             ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
    474                   cell, method->clazz->descriptor, method->name, jumpSize));
    475     //can't use stream here since it is used by the compilation thread
    476     dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
    477 
    478     newCell.clazz = clazz;
    479     newCell.method = method;
    480 
    481     /*
    482      * Enter the work order to the queue and the chaining cell will be patched
    483      * the next time a safe point is entered.
    484      *
    485      * If the enqueuing fails reset the rechain count to a normal value so that
    486      * it won't get indefinitely delayed.
    487      */
    488     inlineCachePatchEnqueue(cell, &newCell);
    489     }
    490 done:
    491     self->icRechainCount = newRechainCount;
    492     return method;
    493 }
    494 
    495 /*
    496  * Unchain a trace given the starting address of the translation
    497  * in the code cache.  Refer to the diagram in dvmCompilerAssembleLIR.
    498  * For ARM, it returns the address following the last cell unchained.
    499  * For IA, it returns NULL since cacheflush is not required for IA.
    500  */
    501 u4* dvmJitUnchain(void* codeAddr)
    502 {
    503     /* codeAddr is 4-byte aligned, so is chain cell count offset */
    504     u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
    505     u2 chainCellCountOffset = *pChainCellCountOffset;
    506     /* chain cell counts information is 4-byte aligned */
    507     ChainCellCounts *pChainCellCounts =
    508           (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
    509     u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
    510     u2 chainCellOffset = *pChainCellOffset;
    511     u1* pChainCells;
    512     int i,j;
    513     PredictedChainingCell *predChainCell;
    514     int padding;
    515 
    516     /* Locate the beginning of the chain cell region */
    517     pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
    518 
    519     /* The cells are sorted in order - walk through them and reset */
    520     for (i = 0; i < kChainingCellGap; i++) {
    521         /* for hot, normal, singleton chaining:
    522                nop  //padding.
    523                jmp 0
    524                mov imm32, reg1
    525                mov imm32, reg2
    526                call reg2
    527            after chaining:
    528                nop
    529                jmp imm
    530                mov imm32, reg1
    531                mov imm32, reg2
    532                call reg2
    533            after unchaining:
    534                nop
    535                jmp 0
    536                mov imm32, reg1
    537                mov imm32, reg2
    538                call reg2
    539            Space occupied by the chaining cell in bytes: nop is for padding,
    540                 jump 0, the target 0 is 4 bytes aligned.
    541            Space for predicted chaining: 5 words = 20 bytes
    542         */
    543         int elemSize = 0;
    544         if (i == kChainingCellInvokePredicted) {
    545             elemSize = 20;
    546         }
    547         COMPILER_TRACE_CHAINING(
    548             ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
    549 
    550         for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
    551             switch(i) {
    552                 case kChainingCellNormal:
    553                 case kChainingCellHot:
    554                 case kChainingCellInvokeSingleton:
    555                 case kChainingCellBackwardBranch:
    556                     COMPILER_TRACE_CHAINING(
    557                         ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
    558                     pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
    559                     elemSize = 4+5+5+2;
    560                     memset(pChainCells, 0, 4);
    561                     break;
    562                 case kChainingCellInvokePredicted:
    563                     COMPILER_TRACE_CHAINING(
    564                         ALOGI("Jit Runtime: unchaining of predicted"));
    565                     /* 4-byte aligned */
    566                     padding = (4 - ((u4)pChainCells & 3)) & 3;
    567                     pChainCells += padding;
    568                     predChainCell = (PredictedChainingCell *) pChainCells;
    569                     /*
    570                      * There could be a race on another mutator thread to use
    571                      * this particular predicted cell and the check has passed
    572                      * the clazz comparison. So we cannot safely wipe the
    573                      * method and branch but it is safe to clear the clazz,
    574                      * which serves as the key.
    575                      */
    576                     predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
    577                     break;
    578                 default:
    579                     ALOGE("Unexpected chaining type: %d", i);
    580                     dvmAbort();  // dvmAbort OK here - can't safely recover
    581             }
    582             COMPILER_TRACE_CHAINING(
    583                 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
    584             pChainCells += elemSize;  /* Advance by a fixed number of bytes */
    585         }
    586     }
    587     return NULL;
    588 }
    589 
    590 /* Unchain all translation in the cache. */
    591 void dvmJitUnchainAll()
    592 {
    593     ALOGV("Jit Runtime: unchaining all");
    594     if (gDvmJit.pJitEntryTable != NULL) {
    595         COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
    596         dvmLockMutex(&gDvmJit.tableLock);
    597 
    598         UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    599 
    600         for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
    601             if (gDvmJit.pJitEntryTable[i].dPC &&
    602                 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
    603                 gDvmJit.pJitEntryTable[i].codeAddress) {
    604                       dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
    605             }
    606         }
    607 
    608         PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    609 
    610         dvmUnlockMutex(&gDvmJit.tableLock);
    611         gDvmJit.translationChains = 0;
    612     }
    613     gDvmJit.hasNewChain = false;
    614 }
    615 
    616 #define P_GPR_1 PhysicalReg_EBX
    617 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
    618 static void insertJumpHelp()
    619 {
    620     int rem = (uint)stream % 4;
    621     int nop_size = 3 - rem;
    622     dump_nop(nop_size);
    623     unconditional_jump_int(0, OpndSize_32);
    624     return;
    625 }
    626 
    627 /* Chaining cell for code that may need warmup. */
    628 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
    629                  blx r0
    630                  data 0xb23a //bytecode address: 0x5115b23a
    631                  data 0x5115
    632    IA32 assembly:
    633                   jmp  0 //5 bytes
    634                   movl address, %ebx
    635                   movl dvmJitToInterpNormal, %eax
    636                   call %eax
    637                   <-- return address
    638 */
    639 static void handleNormalChainingCell(CompilationUnit *cUnit,
    640                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
    641 {
    642     ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
    643           cUnit->method->name, blockId, offset, stream - streamMethodStart);
    644     if(dump_x86_inst)
    645         ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
    646               offset, stream - streamMethodStart, stream);
    647     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    648      * reslove the multithreading issue.
    649      */
    650     insertJumpHelp();
    651     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
    652     scratchRegs[0] = PhysicalReg_EAX;
    653     call_dvmJitToInterpNormal();
    654     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
    655 }
    656 
    657 /*
    658  * Chaining cell for instructions that immediately following already translated
    659  * code.
    660  */
    661 static void handleHotChainingCell(CompilationUnit *cUnit,
    662                                   unsigned int offset, int blockId, LowOpBlockLabel* labelList)
    663 {
    664     ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
    665           cUnit->method->name, blockId, offset, stream - streamMethodStart);
    666     if(dump_x86_inst)
    667         ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
    668               offset, stream - streamMethodStart, stream);
    669     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    670      * reslove the multithreading issue.
    671      */
    672     insertJumpHelp();
    673     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
    674     scratchRegs[0] = PhysicalReg_EAX;
    675     call_dvmJitToInterpTraceSelect();
    676     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
    677 }
    678 
    679 /* Chaining cell for branches that branch back into the same basic block */
    680 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
    681                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
    682 {
    683     ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
    684           cUnit->method->name, blockId, offset, stream - streamMethodStart);
    685     if(dump_x86_inst)
    686         ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
    687               offset, stream - streamMethodStart, stream);
    688     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    689      * reslove the multithreading issue.
    690      */
    691     insertJumpHelp();
    692     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
    693     scratchRegs[0] = PhysicalReg_EAX;
    694     call_dvmJitToInterpNormal();
    695     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
    696 }
    697 
    698 /* Chaining cell for monomorphic method invocations. */
    699 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
    700                                               const Method *callee, int blockId, LowOpBlockLabel* labelList)
    701 {
    702     ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
    703           cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
    704     if(dump_x86_inst)
    705         ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
    706               blockId, stream - streamMethodStart, stream);
    707     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    708      * reslove the multithreading issue.
    709      */
    710     insertJumpHelp();
    711     move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
    712     scratchRegs[0] = PhysicalReg_EAX;
    713     call_dvmJitToInterpTraceSelect();
    714     //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
    715 }
    716 #undef P_GPR_1
    717 
    718 /* Chaining cell for monomorphic method invocations. */
    719 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
    720 {
    721     if(dump_x86_inst)
    722         ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
    723               blockId, stream - streamMethodStart, stream);
    724 #ifndef PREDICTED_CHAINING
    725     //assume rPC for callee->insns in %ebx
    726     scratchRegs[0] = PhysicalReg_EAX;
    727 #if defined(WITH_JIT_TUNING)
    728     /* Predicted chaining is not enabled. Fall back to interpreter and
    729      * indicate that predicted chaining was not done.
    730      */
    731     move_imm_to_reg(OpndSize_32, kInlineCacheMiss, PhysicalReg_EDX, true);
    732 #endif
    733     call_dvmJitToInterpTraceSelectNoChain();
    734 #else
    735     /* make sure section for predicited chaining cell is 4-byte aligned */
    736     //int padding = (4 - ((u4)stream & 3)) & 3;
    737     //stream += padding;
    738     int* streamData = (int*)stream;
    739     /* Should not be executed in the initial state */
    740     streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
    741     streamData[1] = 0;
    742     /* To be filled: class */
    743     streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
    744     /* To be filled: method */
    745     streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
    746     /*
    747      * Rechain count. The initial value of 0 here will trigger chaining upon
    748      * the first invocation of this callsite.
    749      */
    750     streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
    751 #if 0
    752     ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
    753           *((int*)(stream+8)), *((int*)(stream+12)));
    754 #endif
    755     stream += 20; //5 *4
    756 #endif
    757 }
    758 
    759 /* Load the Dalvik PC into r0 and jump to the specified target */
    760 static void handlePCReconstruction(CompilationUnit *cUnit,
    761                                    LowOpBlockLabel *targetLabel)
    762 {
    763 #if 0
    764     LowOp **pcrLabel =
    765         (LowOp **) cUnit->pcReconstructionList.elemList;
    766     int numElems = cUnit->pcReconstructionList.numUsed;
    767     int i;
    768     for (i = 0; i < numElems; i++) {
    769         dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
    770         /* r0 = dalvik PC */
    771         loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
    772         genUnconditionalBranch(cUnit, targetLabel);
    773     }
    774 #endif
    775 }
    776 
    777 //use O0 code generator for hoisted checks outside of the loop
    778 /*
    779  * vA = arrayReg;
    780  * vB = idxReg;
    781  * vC = endConditionReg;
    782  * arg[0] = maxC
    783  * arg[1] = minC
    784  * arg[2] = loopBranchConditionCode
    785  */
    786 #define P_GPR_1 PhysicalReg_EBX
    787 #define P_GPR_2 PhysicalReg_ECX
    788 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
    789 {
    790     /*
    791      * NOTE: these synthesized blocks don't have ssa names assigned
    792      * for Dalvik registers.  However, because they dominate the following
    793      * blocks we can simply use the Dalvik name w/ subscript 0 as the
    794      * ssa name.
    795      */
    796     DecodedInstruction *dInsn = &mir->dalvikInsn;
    797     const int maxC = dInsn->arg[0];
    798 
    799     /* assign array in virtual register to P_GPR_1 */
    800     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
    801     /* assign index in virtual register to P_GPR_2 */
    802     get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
    803     export_pc();
    804     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
    805     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
    806     int delta = maxC;
    807     /*
    808      * If the loop end condition is ">=" instead of ">", then the largest value
    809      * of the index is "endCondition - 1".
    810      */
    811     if (dInsn->arg[2] == OP_IF_GE) {
    812         delta--;
    813     }
    814 
    815     if (delta < 0) { //+delta
    816         //if P_GPR_2 is mapped to a VR, we can't do this
    817         alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
    818     } else if(delta > 0) {
    819         alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
    820     }
    821     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
    822     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
    823 }
    824 
    825 /*
    826  * vA = arrayReg;
    827  * vB = idxReg;
    828  * vC = endConditionReg;
    829  * arg[0] = maxC
    830  * arg[1] = minC
    831  * arg[2] = loopBranchConditionCode
    832  */
    833 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
    834 {
    835     DecodedInstruction *dInsn = &mir->dalvikInsn;
    836     const int maxC = dInsn->arg[0];
    837 
    838     /* assign array in virtual register to P_GPR_1 */
    839     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
    840     /* assign index in virtual register to P_GPR_2 */
    841     get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
    842     export_pc();
    843     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
    844     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
    845 
    846     if (maxC < 0) {
    847         //if P_GPR_2 is mapped to a VR, we can't do this
    848         alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
    849     } else if(maxC > 0) {
    850         alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
    851     }
    852     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
    853     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
    854 
    855 }
    856 #undef P_GPR_1
    857 #undef P_GPR_2
    858 
    859 /*
    860  * vA = idxReg;
    861  * vB = minC;
    862  */
    863 #define P_GPR_1 PhysicalReg_ECX
    864 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
    865 {
    866     DecodedInstruction *dInsn = &mir->dalvikInsn;
    867     const int minC = dInsn->vB;
    868     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
    869     export_pc();
    870     compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
    871     condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
    872 }
    873 #undef P_GPR_1
    874 
    875 #ifdef WITH_JIT_INLINING
    876 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
    877 {
    878     CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
    879     if(gDvm.executionMode == kExecutionModeNcgO0) {
    880         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
    881         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
    882         compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
    883         export_pc(); //use %edx
    884         conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
    885         move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
    886         compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
    887     } else {
    888         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
    889         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
    890         nullCheck(5, false, 1, mir->dalvikInsn.vC);
    891         move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
    892         compare_reg_reg(4, false, 6, false);
    893     }
    894 
    895     //immdiate will be updated later in genLandingPadForMispredictedCallee
    896     streamMisPred = stream;
    897     callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
    898 }
    899 #endif
    900 
    901 /* Extended MIR instructions like PHI */
    902 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
    903 {
    904     ExecutionMode origMode = gDvm.executionMode;
    905     gDvm.executionMode = kExecutionModeNcgO0;
    906     switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
    907         case kMirOpPhi: {
    908             break;
    909         }
    910         case kMirOpNullNRangeUpCheck: {
    911             genHoistedChecksForCountUpLoop(cUnit, mir);
    912             break;
    913         }
    914         case kMirOpNullNRangeDownCheck: {
    915             genHoistedChecksForCountDownLoop(cUnit, mir);
    916             break;
    917         }
    918         case kMirOpLowerBound: {
    919             genHoistedLowerBoundCheck(cUnit, mir);
    920             break;
    921         }
    922         case kMirOpPunt: {
    923             break;
    924         }
    925 #ifdef WITH_JIT_INLINING
    926         case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
    927             genValidationForPredictedInline(cUnit, mir);
    928             break;
    929         }
    930 #endif
    931         default:
    932             break;
    933     }
    934     gDvm.executionMode = origMode;
    935 }
    936 
    937 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
    938                                 int bodyId)
    939 {
    940     /*
    941      * Next, create two branches - one branch over to the loop body and the
    942      * other branch to the PCR cell to punt.
    943      */
    944     //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
    945     //setupResourceMasks(branchToBody);
    946     //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
    947 
    948 #if 0
    949     LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
    950     branchToPCR->opCode = kThumbBUncond;
    951     branchToPCR->generic.target = (LIR *) pcrLabel;
    952     setupResourceMasks(branchToPCR);
    953     cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
    954 #endif
    955 }
    956 
    957 /* check whether we can merge the block at index i with its target block */
    958 bool mergeBlock(BasicBlock *bb) {
    959     if(bb->blockType == kDalvikByteCode &&
    960        bb->firstMIRInsn != NULL &&
    961        (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
    962         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
    963         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
    964        bb->fallThrough == NULL) {// &&
    965        //cUnit->hasLoop) {
    966         //ALOGI("merge blocks ending with goto at index %d", i);
    967         MIR* prevInsn = bb->lastMIRInsn->prev;
    968         if(bb->taken == NULL) return false;
    969         MIR* mergeInsn = bb->taken->firstMIRInsn;
    970         if(mergeInsn == NULL) return false;
    971         if(prevInsn == NULL) {//the block has a single instruction
    972             bb->firstMIRInsn = mergeInsn;
    973         } else {
    974             prevInsn->next = mergeInsn; //remove goto from the chain
    975         }
    976         mergeInsn->prev = prevInsn;
    977         bb->lastMIRInsn = bb->taken->lastMIRInsn;
    978         bb->taken->firstMIRInsn = NULL; //block being merged in
    979         bb->fallThrough = bb->taken->fallThrough;
    980         bb->taken = bb->taken->taken;
    981         return true;
    982     }
    983     return false;
    984 }
    985 
    986 static int genTraceProfileEntry(CompilationUnit *cUnit)
    987 {
    988     cUnit->headerSize = 6;
    989     if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
    990         (gDvmJit.profileMode == kTraceProfilingDisabled)) {
    991         return 12;
    992     } else {
    993         return 4;
    994     }
    995 
    996 }
    997 
    998 #define PRINT_BUFFER_LEN 1024
    999 /* Print the code block in code cache in the range of [startAddr, endAddr)
   1000  * in readable format.
   1001  */
   1002 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
   1003 {
   1004     char strbuf[PRINT_BUFFER_LEN];
   1005     unsigned char *addr;
   1006     unsigned char *next_addr;
   1007     int n;
   1008 
   1009     if (gDvmJit.printBinary) {
   1010         // print binary in bytes
   1011         n = 0;
   1012         for (addr = startAddr; addr < endAddr; addr++) {
   1013             n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
   1014             if (n > PRINT_BUFFER_LEN - 10) {
   1015                 ALOGD("## %s", strbuf);
   1016                 n = 0;
   1017             }
   1018         }
   1019         if (n > 0)
   1020             ALOGD("## %s", strbuf);
   1021     }
   1022 
   1023     // print disassembled instructions
   1024     addr = startAddr;
   1025     while (addr < endAddr) {
   1026         next_addr = reinterpret_cast<unsigned char*>
   1027             (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
   1028                                        strbuf, PRINT_BUFFER_LEN));
   1029         if (addr != next_addr) {
   1030             ALOGD("**  %p: %s", addr, strbuf);
   1031         } else {                // check whether this is nop padding
   1032             if (addr[0] == 0x90) {
   1033                 ALOGD("**  %p: NOP (1 byte)", addr);
   1034                 next_addr += 1;
   1035             } else if (addr[0] == 0x66 && addr[1] == 0x90) {
   1036                 ALOGD("**  %p: NOP (2 bytes)", addr);
   1037                 next_addr += 2;
   1038             } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
   1039                 ALOGD("**  %p: NOP (3 bytes)", addr);
   1040                 next_addr += 3;
   1041             } else {
   1042                 ALOGD("** unable to decode binary at %p", addr);
   1043                 break;
   1044             }
   1045         }
   1046         addr = next_addr;
   1047     }
   1048 }
   1049 
   1050 /* 4 is the number of additional bytes needed for chaining information for trace:
   1051  * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
   1052 #define EXTRA_BYTES_FOR_CHAINING 4
   1053 
   1054 /* Entry function to invoke the backend of the JIT compiler */
   1055 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
   1056 {
   1057     dump_x86_inst = cUnit->printMe;
   1058     /* Used to hold the labels of each block */
   1059     LowOpBlockLabel *labelList =
   1060         (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
   1061     LowOp *headLIR = NULL;
   1062     GrowableList chainingListByType[kChainingCellLast];
   1063     unsigned int i, padding;
   1064 
   1065     /*
   1066      * Initialize various types chaining lists.
   1067      */
   1068     for (i = 0; i < kChainingCellLast; i++) {
   1069         dvmInitGrowableList(&chainingListByType[i], 2);
   1070     }
   1071 
   1072     /* Clear the visited flag for each block */
   1073     dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
   1074                                           kAllNodes, false /* isIterative */);
   1075 
   1076     GrowableListIterator iterator;
   1077     dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
   1078 
   1079     /* Traces start with a profiling entry point.  Generate it here */
   1080     cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
   1081 
   1082     //BasicBlock **blockList = cUnit->blockList;
   1083     GrowableList *blockList = &cUnit->blockList;
   1084     BasicBlock *bb;
   1085 
   1086     info->codeAddress = NULL;
   1087     stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
   1088     streamStart = stream; /* trace start before alignment */
   1089 
   1090     // TODO: compile into a temporary buffer and then copy into the code cache.
   1091     // That would let us leave the code cache unprotected for a shorter time.
   1092     size_t unprotected_code_cache_bytes =
   1093             gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed;
   1094     UNPROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
   1095 
   1096     stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
   1097     stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
   1098     streamMethodStart = stream; /* code start */
   1099     for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
   1100         labelList[i].lop.generic.offset = -1;
   1101     }
   1102     cUnit->exceptionBlockId = -1;
   1103     for (i = 0; i < blockList->numUsed; i++) {
   1104         bb = (BasicBlock *) blockList->elemList[i];
   1105         if(bb->blockType == kExceptionHandling)
   1106             cUnit->exceptionBlockId = i;
   1107     }
   1108     startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
   1109     if(gDvm.executionMode == kExecutionModeNcgO1) {
   1110         //merge blocks ending with "goto" with the fall through block
   1111         if (cUnit->jitMode != kJitLoop)
   1112             for (i = 0; i < blockList->numUsed; i++) {
   1113                 bb = (BasicBlock *) blockList->elemList[i];
   1114                 bool merged = mergeBlock(bb);
   1115                 while(merged) merged = mergeBlock(bb);
   1116             }
   1117         for (i = 0; i < blockList->numUsed; i++) {
   1118             bb = (BasicBlock *) blockList->elemList[i];
   1119             if(bb->blockType == kDalvikByteCode &&
   1120                bb->firstMIRInsn != NULL) {
   1121                 preprocessingBB(bb);
   1122             }
   1123         }
   1124         preprocessingTrace();
   1125     }
   1126 
   1127     /* Handle the content in each basic block */
   1128     for (i = 0; ; i++) {
   1129         MIR *mir;
   1130         bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
   1131         if (bb == NULL) break;
   1132         if (bb->visited == true) continue;
   1133 
   1134         labelList[i].immOpnd.value = bb->startOffset;
   1135 
   1136         if (bb->blockType >= kChainingCellLast) {
   1137             /*
   1138              * Append the label pseudo LIR first. Chaining cells will be handled
   1139              * separately afterwards.
   1140              */
   1141             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
   1142         }
   1143 
   1144         if (bb->blockType == kEntryBlock) {
   1145             labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
   1146             if (bb->firstMIRInsn == NULL) {
   1147                 continue;
   1148             } else {
   1149               setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
   1150                                   //&labelList[blockList[i]->fallThrough->id]);
   1151             }
   1152         } else if (bb->blockType == kExitBlock) {
   1153             labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
   1154             labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1155             goto gen_fallthrough;
   1156         } else if (bb->blockType == kDalvikByteCode) {
   1157             if (bb->hidden == true) continue;
   1158             labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
   1159             /* Reset the register state */
   1160 #if 0
   1161             resetRegisterScoreboard(cUnit);
   1162 #endif
   1163         } else {
   1164             switch (bb->blockType) {
   1165                 case kChainingCellNormal:
   1166                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
   1167                     /* handle the codegen later */
   1168                     dvmInsertGrowableList(
   1169                         &chainingListByType[kChainingCellNormal], i);
   1170                     break;
   1171                 case kChainingCellInvokeSingleton:
   1172                     labelList[i].lop.opCode2 =
   1173                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
   1174                     labelList[i].immOpnd.value =
   1175                         (int) bb->containingMethod;
   1176                     /* handle the codegen later */
   1177                     dvmInsertGrowableList(
   1178                         &chainingListByType[kChainingCellInvokeSingleton], i);
   1179                     break;
   1180                 case kChainingCellInvokePredicted:
   1181                     labelList[i].lop.opCode2 =
   1182                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
   1183                    /*
   1184                      * Move the cached method pointer from operand 1 to 0.
   1185                      * Operand 0 was clobbered earlier in this routine to store
   1186                      * the block starting offset, which is not applicable to
   1187                      * predicted chaining cell.
   1188                      */
   1189                     //TODO
   1190                     //labelList[i].operands[0] = labelList[i].operands[1];
   1191 
   1192                     /* handle the codegen later */
   1193                     dvmInsertGrowableList(
   1194                         &chainingListByType[kChainingCellInvokePredicted], i);
   1195                     break;
   1196                 case kChainingCellHot:
   1197                     labelList[i].lop.opCode2 =
   1198                         ATOM_PSEUDO_CHAINING_CELL_HOT;
   1199                     /* handle the codegen later */
   1200                     dvmInsertGrowableList(
   1201                         &chainingListByType[kChainingCellHot], i);
   1202                     break;
   1203                 case kPCReconstruction:
   1204                     /* Make sure exception handling block is next */
   1205                     labelList[i].lop.opCode2 =
   1206                         ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
   1207                     //assert (i == cUnit->numBlocks - 2);
   1208                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1209                     handlePCReconstruction(cUnit,
   1210                                            &labelList[cUnit->puntBlock->id]);
   1211                     break;
   1212                 case kExceptionHandling:
   1213                     labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
   1214                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1215                     //if (cUnit->pcReconstructionList.numUsed) {
   1216                         scratchRegs[0] = PhysicalReg_EAX;
   1217                         jumpToInterpPunt();
   1218                         //call_dvmJitToInterpPunt();
   1219                     //}
   1220                     break;
   1221                 case kChainingCellBackwardBranch:
   1222                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
   1223                     /* handle the codegen later */
   1224                     dvmInsertGrowableList(
   1225                         &chainingListByType[kChainingCellBackwardBranch],
   1226                         i);
   1227                     break;
   1228                 default:
   1229                     break;
   1230             }
   1231             continue;
   1232         }
   1233         {
   1234         //LowOp *headLIR = NULL;
   1235         const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
   1236         const u2 *startCodePtr = dexCode->insns;
   1237         const u2 *codePtr;
   1238         labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1239         ALOGV("get ready to handle JIT bb %d type %d hidden %d",
   1240               bb->id, bb->blockType, bb->hidden);
   1241         for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
   1242             bb = nextBB;
   1243             bb->visited = true;
   1244             cUnit->nextCodegenBlock = NULL;
   1245 
   1246         if(gDvm.executionMode == kExecutionModeNcgO1 &&
   1247            bb->blockType != kEntryBlock &&
   1248            bb->firstMIRInsn != NULL) {
   1249             startOfBasicBlock(bb);
   1250             int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
   1251             endOfBasicBlock(bb);
   1252             if(cg_ret < 0) {
   1253                 endOfTrace(true/*freeOnly*/);
   1254                 cUnit->baseAddr = NULL;
   1255                 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
   1256                 return;
   1257             }
   1258         } else {
   1259         for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
   1260             startOfBasicBlock(bb); //why here for O0
   1261             Opcode dalvikOpCode = mir->dalvikInsn.opcode;
   1262             if((int)dalvikOpCode >= (int)kMirOpFirst) {
   1263                 handleExtendedMIR(cUnit, mir);
   1264                 continue;
   1265             }
   1266             InstructionFormat dalvikFormat =
   1267                 dexGetFormatFromOpcode(dalvikOpCode);
   1268             ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
   1269                   mir->offset, dalvikOpCode, dalvikFormat);
   1270             LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
   1271             /* Remember the first LIR for this block */
   1272             if (headLIR == NULL) {
   1273                 headLIR = (LowOp*)boundaryLIR;
   1274             }
   1275             bool notHandled = true;
   1276             /*
   1277              * Debugging: screen the opcode first to see if it is in the
   1278              * do[-not]-compile list
   1279              */
   1280             bool singleStepMe =
   1281                 gDvmJit.includeSelectedOp !=
   1282                 ((gDvmJit.opList[dalvikOpCode >> 3] &
   1283                   (1 << (dalvikOpCode & 0x7))) !=
   1284                  0);
   1285             if (singleStepMe || cUnit->allSingleStep) {
   1286             } else {
   1287                 codePtr = startCodePtr + mir->offset;
   1288                 //lower each byte code, update LIR
   1289                 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
   1290                 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
   1291                    CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
   1292                     ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
   1293                     gDvmJit.codeCacheFull = true;
   1294                     cUnit->baseAddr = NULL;
   1295                     endOfTrace(true/*freeOnly*/);
   1296                     PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
   1297                     return;
   1298                 }
   1299             }
   1300             if (notHandled) {
   1301                 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
   1302                      mir->offset,
   1303                      dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
   1304                      dalvikFormat);
   1305                 dvmAbort();
   1306                 break;
   1307             }
   1308         } // end for
   1309         } // end else //JIT + O0 code generator
   1310         }
   1311         } // end for
   1312         /* Eliminate redundant loads/stores and delay stores into later slots */
   1313 #if 0
   1314         dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
   1315                                            cUnit->lastLIRInsn);
   1316 #endif
   1317         if (headLIR) headLIR = NULL;
   1318 gen_fallthrough:
   1319         /*
   1320          * Check if the block is terminated due to trace length constraint -
   1321          * insert an unconditional branch to the chaining cell.
   1322          */
   1323         if (bb->needFallThroughBranch) {
   1324             jumpToBasicBlock(stream, bb->fallThrough->id);
   1325         }
   1326 
   1327     }
   1328 
   1329     char* streamChainingStart = (char*)stream;
   1330     /* Handle the chaining cells in predefined order */
   1331     for (i = 0; i < kChainingCellGap; i++) {
   1332         size_t j;
   1333         int *blockIdList = (int *) chainingListByType[i].elemList;
   1334 
   1335         cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
   1336 
   1337         /* No chaining cells of this type */
   1338         if (cUnit->numChainingCells[i] == 0)
   1339             continue;
   1340 
   1341         /* Record the first LIR for a new type of chaining cell */
   1342         cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
   1343         for (j = 0; j < chainingListByType[i].numUsed; j++) {
   1344             int blockId = blockIdList[j];
   1345             BasicBlock *chainingBlock =
   1346                 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
   1347                                                          blockId);
   1348 
   1349             labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
   1350 
   1351             /* Align this chaining cell first */
   1352 #if 0
   1353             newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
   1354 #endif
   1355             /* Insert the pseudo chaining instruction */
   1356             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
   1357 
   1358 
   1359             switch (chainingBlock->blockType) {
   1360                 case kChainingCellNormal:
   1361                     handleNormalChainingCell(cUnit,
   1362                      chainingBlock->startOffset, blockId, labelList);
   1363                     break;
   1364                 case kChainingCellInvokeSingleton:
   1365                     handleInvokeSingletonChainingCell(cUnit,
   1366                         chainingBlock->containingMethod, blockId, labelList);
   1367                     break;
   1368                 case kChainingCellInvokePredicted:
   1369                     handleInvokePredictedChainingCell(cUnit, blockId);
   1370                     break;
   1371                 case kChainingCellHot:
   1372                     handleHotChainingCell(cUnit,
   1373                         chainingBlock->startOffset, blockId, labelList);
   1374                     break;
   1375                 case kChainingCellBackwardBranch:
   1376                     handleBackwardBranchChainingCell(cUnit,
   1377                         chainingBlock->startOffset, blockId, labelList);
   1378                     break;
   1379                 default:
   1380                     ALOGE("Bad blocktype %d", chainingBlock->blockType);
   1381                     dvmAbort();
   1382                     break;
   1383             }
   1384 
   1385             if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
   1386                 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
   1387                 gDvmJit.codeCacheFull = true;
   1388                 cUnit->baseAddr = NULL;
   1389                 endOfTrace(true); /* need to free structures */
   1390                 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
   1391                 return;
   1392             }
   1393         }
   1394     }
   1395 #if 0
   1396     dvmCompilerApplyGlobalOptimizations(cUnit);
   1397 #endif
   1398     endOfTrace(false);
   1399 
   1400     if (gDvmJit.codeCacheFull) {
   1401         /* We hit code cache size limit inside endofTrace(false).
   1402          * Bail out for this trace!
   1403          */
   1404         ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
   1405         cUnit->baseAddr = NULL;
   1406         PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
   1407         return;
   1408     }
   1409 
   1410     /* dump section for chaining cell counts, make sure it is 4-byte aligned */
   1411     padding = (4 - ((u4)stream & 3)) & 3;
   1412     stream += padding;
   1413     ChainCellCounts chainCellCounts;
   1414     /* Install the chaining cell counts */
   1415     for (i=0; i< kChainingCellGap; i++) {
   1416         chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
   1417     }
   1418     char* streamCountStart = (char*)stream;
   1419     memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
   1420     stream += sizeof(chainCellCounts);
   1421 
   1422     cUnit->baseAddr = streamMethodStart;
   1423     cUnit->totalSize = (stream - streamStart);
   1424     if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
   1425         ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
   1426         gDvmJit.codeCacheFull = true;
   1427         cUnit->baseAddr = NULL;
   1428         PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
   1429         return;
   1430     }
   1431 
   1432     /* write chaining cell count offset & chaining cell offset */
   1433     u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
   1434     *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
   1435     pOffset[1] = streamChainingStart - streamMethodStart;
   1436 
   1437     PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
   1438 
   1439     gDvmJit.codeCacheByteUsed += (stream - streamStart);
   1440     if (cUnit->printMe) {
   1441         unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
   1442         unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
   1443         ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
   1444               cUnit->method->clazz->descriptor, cUnit->method->name,
   1445               codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
   1446         ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
   1447               cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
   1448         printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
   1449     }
   1450     ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
   1451           (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
   1452           cUnit->totalSize, gDvmJit.codeCache);
   1453 
   1454     gDvmJit.numCompilations++;
   1455 
   1456     info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
   1457 }
   1458 
   1459 /*
   1460  * Perform translation chain operation.
   1461  */
   1462 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
   1463 {
   1464 #ifdef JIT_CHAIN
   1465     int relOffset = (int) tgtAddr - (int)branchAddr;
   1466 
   1467     if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
   1468         (gDvmJit.codeCacheFull == false)) {
   1469 
   1470         gDvmJit.translationChains++;
   1471 
   1472         //OpndSize immSize = estOpndSizeFromImm(relOffset);
   1473         //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
   1474         /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
   1475          * the original code sequence.
   1476          */
   1477         OpndSize immSize = OpndSize_32;
   1478         relOffset -= 5;
   1479         //can't use stream here since it is used by the compilation thread
   1480         UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
   1481         dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
   1482         PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
   1483 
   1484         gDvmJit.hasNewChain = true;
   1485 
   1486         COMPILER_TRACE_CHAINING(
   1487             ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
   1488                   (int) branchAddr, tgtAddr, relOffset));
   1489     }
   1490 #endif
   1491     return tgtAddr;
   1492 }
   1493 
   1494 /*
   1495  * Accept the work and start compiling.  Returns true if compilation
   1496  * is attempted.
   1497  */
   1498 bool dvmCompilerDoWork(CompilerWorkOrder *work)
   1499 {
   1500     JitTraceDescription *desc;
   1501     bool isCompile;
   1502     bool success = true;
   1503 
   1504     if (gDvmJit.codeCacheFull) {
   1505         return false;
   1506     }
   1507 
   1508     switch (work->kind) {
   1509         case kWorkOrderTrace:
   1510             isCompile = true;
   1511             /* Start compilation with maximally allowed trace length */
   1512             desc = (JitTraceDescription *)work->info;
   1513             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
   1514                                         work->bailPtr, 0 /* no hints */);
   1515             break;
   1516         case kWorkOrderTraceDebug: {
   1517             bool oldPrintMe = gDvmJit.printMe;
   1518             gDvmJit.printMe = true;
   1519             isCompile = true;
   1520             /* Start compilation with maximally allowed trace length */
   1521             desc = (JitTraceDescription *)work->info;
   1522             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
   1523                                         work->bailPtr, 0 /* no hints */);
   1524             gDvmJit.printMe = oldPrintMe;
   1525             break;
   1526         }
   1527         case kWorkOrderProfileMode:
   1528             dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
   1529             isCompile = false;
   1530             break;
   1531         default:
   1532             isCompile = false;
   1533             ALOGE("Jit: unknown work order type");
   1534             assert(0);  // Bail if debug build, discard otherwise
   1535     }
   1536     if (!success)
   1537         work->result.codeAddress = NULL;
   1538     return isCompile;
   1539 }
   1540 
   1541 void dvmCompilerCacheFlush(long start, long end, long flags) {
   1542   /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
   1543 }
   1544 
   1545 //#endif
   1546