Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 #include <sys/mman.h>
     17 #include "Dalvik.h"
     18 #include "libdex/DexOpcodes.h"
     19 #include "compiler/Compiler.h"
     20 #include "compiler/CompilerIR.h"
     21 #include "interp/Jit.h"
     22 #include "libdex/DexFile.h"
     23 #include "Lower.h"
     24 #include "NcgAot.h"
     25 #include "compiler/codegen/CompilerCodegen.h"
     26 
     27 /* Init values when a predicted chain is initially assembled */
     28 /* E7FE is branch to self */
     29 #define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
     30 
     31 /* Target-specific save/restore */
     32 extern "C" void dvmJitCalleeSave(double *saveArea);
     33 extern "C" void dvmJitCalleeRestore(double *saveArea);
     34 
     35 /*
     36  * Determine the initial instruction set to be used for this trace.
     37  * Later components may decide to change this.
     38  */
     39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
     40 JitInstructionSetType dvmCompilerInstructionSet(void)
     41 {
     42     return DALVIK_JIT_IA32;
     43 }
     44 
     45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
     46 {
     47     return DALVIK_JIT_IA32;
     48 }
     49 
     50 /* we don't use template for IA32 */
     51 void *dvmCompilerGetInterpretTemplate()
     52 {
     53       return NULL;
     54 }
     55 
     56 /* Track the number of times that the code cache is patched */
     57 #if defined(WITH_JIT_TUNING)
     58 #define UPDATE_CODE_CACHE_PATCHES()    (gDvmJit.codeCachePatches++)
     59 #else
     60 #define UPDATE_CODE_CACHE_PATCHES()
     61 #endif
     62 
     63 bool dvmCompilerArchInit() {
     64     /* Target-specific configuration */
     65     gDvmJit.jitTableSize = 1 << 12;
     66     gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
     67     if (gDvmJit.threshold == 0) {
     68         gDvmJit.threshold = 255;
     69     }
     70     gDvmJit.codeCacheSize = 512*1024;
     71     gDvmJit.optLevel = kJitOptLevelO1;
     72 
     73     //Disable Method-JIT
     74     gDvmJit.disableOpt |= (1 << kMethodJit);
     75 
     76 #if defined(WITH_SELF_VERIFICATION)
     77     /* Force into blocking mode */
     78     gDvmJit.blockingMode = true;
     79     gDvm.nativeDebuggerActive = true;
     80 #endif
     81 
     82     // Make sure all threads have current values
     83     dvmJitUpdateThreadStateAll();
     84 
     85     return true;
     86 }
     87 
     88 void dvmCompilerPatchInlineCache(void)
     89 {
     90     int i;
     91     PredictedChainingCell *minAddr, *maxAddr;
     92 
     93     /* Nothing to be done */
     94     if (gDvmJit.compilerICPatchIndex == 0) return;
     95 
     96     /*
     97      * Since all threads are already stopped we don't really need to acquire
     98      * the lock. But race condition can be easily introduced in the future w/o
     99      * paying attention so we still acquire the lock here.
    100      */
    101     dvmLockMutex(&gDvmJit.compilerICPatchLock);
    102 
    103     UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    104 
    105     //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
    106 
    107     /* Initialize the min/max address range */
    108     minAddr = (PredictedChainingCell *)
    109         ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
    110     maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
    111 
    112     for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
    113         ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
    114         PredictedChainingCell *cellAddr = workOrder->cellAddr;
    115         PredictedChainingCell *cellContent = &workOrder->cellContent;
    116         ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
    117                                                 workOrder->classLoader);
    118 
    119         assert(clazz->serialNumber == workOrder->serialNumber);
    120 
    121         /* Use the newly resolved clazz pointer */
    122         cellContent->clazz = clazz;
    123 
    124         if (cellAddr->clazz == NULL) {
    125             COMPILER_TRACE_CHAINING(
    126                 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
    127                       cellAddr,
    128                       cellContent->clazz->descriptor,
    129                       cellContent->method->name));
    130         } else {
    131             COMPILER_TRACE_CHAINING(
    132                 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
    133                       "patched",
    134                       cellAddr,
    135                       cellAddr->clazz->descriptor,
    136                       cellContent->clazz->descriptor,
    137                       cellContent->method->name));
    138         }
    139 
    140         /* Patch the chaining cell */
    141         *cellAddr = *cellContent;
    142         minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
    143         maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
    144     }
    145 
    146     PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    147 
    148     gDvmJit.compilerICPatchIndex = 0;
    149     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
    150 }
    151 
    152 /* Target-specific cache clearing */
    153 void dvmCompilerCacheClear(char *start, size_t size)
    154 {
    155     /* "0xFF 0xFF" is an invalid opcode for x86. */
    156     memset(start, 0xFF, size);
    157 }
    158 
    159 /* for JIT debugging, to be implemented */
    160 void dvmJitCalleeSave(double *saveArea) {
    161 }
    162 
    163 void dvmJitCalleeRestore(double *saveArea) {
    164 }
    165 
    166 void dvmJitToInterpSingleStep() {
    167 }
    168 
    169 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
    170                                             const JitEntry *knownEntry) {
    171     return NULL;
    172 }
    173 
    174 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
    175 {
    176 }
    177 
    178 void dvmCompilerArchDump(void)
    179 {
    180 }
    181 
    182 char *getTraceBase(const JitEntry *p)
    183 {
    184     return NULL;
    185 }
    186 
    187 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
    188 {
    189 }
    190 
    191 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
    192 {
    193 }
    194 
    195 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
    196 {
    197     // Method-based JIT not supported for x86.
    198 }
    199 
    200 void dvmJitScanAllClassPointers(void (*callback)(void *))
    201 {
    202 }
    203 
    204 /* Handy function to retrieve the profile count */
    205 static inline int getProfileCount(const JitEntry *entry)
    206 {
    207     if (entry->dPC == 0 || entry->codeAddress == 0)
    208         return 0;
    209     u4 *pExecutionCount = (u4 *) getTraceBase(entry);
    210 
    211     return pExecutionCount ? *pExecutionCount : 0;
    212 }
    213 
    214 /* qsort callback function */
    215 static int sortTraceProfileCount(const void *entry1, const void *entry2)
    216 {
    217     const JitEntry *jitEntry1 = (const JitEntry *)entry1;
    218     const JitEntry *jitEntry2 = (const JitEntry *)entry2;
    219 
    220     JitTraceCounter_t count1 = getProfileCount(jitEntry1);
    221     JitTraceCounter_t count2 = getProfileCount(jitEntry2);
    222     return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
    223 }
    224 
    225 /* Sort the trace profile counts and dump them */
    226 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
    227 {
    228     JitEntry *sortedEntries;
    229     int numTraces = 0;
    230     unsigned long counts = 0;
    231     unsigned int i;
    232 
    233     /* Make sure that the table is not changing */
    234     dvmLockMutex(&gDvmJit.tableLock);
    235 
    236     /* Sort the entries by descending order */
    237     sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
    238     if (sortedEntries == NULL)
    239         goto done;
    240     memcpy(sortedEntries, gDvmJit.pJitEntryTable,
    241            sizeof(JitEntry) * gDvmJit.jitTableSize);
    242     qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
    243           sortTraceProfileCount);
    244 
    245     /* Dump the sorted entries */
    246     for (i=0; i < gDvmJit.jitTableSize; i++) {
    247         if (sortedEntries[i].dPC != 0) {
    248             numTraces++;
    249         }
    250     }
    251     if (numTraces == 0)
    252         numTraces = 1;
    253     ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
    254 
    255     free(sortedEntries);
    256 done:
    257     dvmUnlockMutex(&gDvmJit.tableLock);
    258     return;
    259 }
    260 
    261 void jumpWithRelOffset(char* instAddr, int relOffset) {
    262     stream = instAddr;
    263     OpndSize immSize = estOpndSizeFromImm(relOffset);
    264     relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
    265     dump_imm(Mnemonic_JMP, immSize, relOffset);
    266 }
    267 
    268 // works whether instructions for target basic block are generated or not
    269 LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
    270     stream = instAddr;
    271     bool unknown;
    272     OpndSize size;
    273     int relativeNCG = targetId;
    274     relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
    275     unconditional_jump_int(relativeNCG, size);
    276     return NULL;
    277 }
    278 
    279 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
    280     stream = instAddr;
    281     bool unknown;
    282     OpndSize size;
    283     int relativeNCG = targetId;
    284     relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
    285     conditional_jump_int(cc, relativeNCG, size);
    286     return NULL;
    287 }
    288 
    289 /*
    290  * Attempt to enqueue a work order to patch an inline cache for a predicted
    291  * chaining cell for virtual/interface calls.
    292  */
    293 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
    294                                     PredictedChainingCell *newContent)
    295 {
    296     bool result = true;
    297 
    298     /*
    299      * Make sure only one thread gets here since updating the cell (ie fast
    300      * path and queueing the request (ie the queued path) have to be done
    301      * in an atomic fashion.
    302      */
    303     dvmLockMutex(&gDvmJit.compilerICPatchLock);
    304 
    305     /* Fast path for uninitialized chaining cell */
    306     if (cellAddr->clazz == NULL &&
    307         cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
    308         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    309 
    310         cellAddr->method = newContent->method;
    311         cellAddr->branch = newContent->branch;
    312         cellAddr->branch2 = newContent->branch2;
    313 
    314         /*
    315          * The update order matters - make sure clazz is updated last since it
    316          * will bring the uninitialized chaining cell to life.
    317          */
    318         android_atomic_release_store((int32_t)newContent->clazz,
    319             (volatile int32_t *)(void*) &cellAddr->clazz);
    320         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
    321         UPDATE_CODE_CACHE_PATCHES();
    322 
    323         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    324 
    325 #if 0
    326         MEM_BARRIER();
    327         cellAddr->clazz = newContent->clazz;
    328         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
    329 #endif
    330 #if defined(WITH_JIT_TUNING)
    331         gDvmJit.icPatchInit++;
    332 #endif
    333         COMPILER_TRACE_CHAINING(
    334             ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
    335                   cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
    336     /* Check if this is a frequently missed clazz */
    337     } else if (cellAddr->stagedClazz != newContent->clazz) {
    338         /* Not proven to be frequent yet - build up the filter cache */
    339         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    340 
    341         cellAddr->stagedClazz = newContent->clazz;
    342 
    343         UPDATE_CODE_CACHE_PATCHES();
    344         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    345 
    346 #if defined(WITH_JIT_TUNING)
    347         gDvmJit.icPatchRejected++;
    348 #endif
    349     /*
    350      * Different classes but same method implementation - it is safe to just
    351      * patch the class value without the need to stop the world.
    352      */
    353     } else if (cellAddr->method == newContent->method) {
    354         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    355 
    356         cellAddr->clazz = newContent->clazz;
    357         /* No need to flush the cache here since the branch is not patched */
    358         UPDATE_CODE_CACHE_PATCHES();
    359 
    360         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
    361 
    362 #if defined(WITH_JIT_TUNING)
    363         gDvmJit.icPatchLockFree++;
    364 #endif
    365     /*
    366      * Cannot patch the chaining cell inline - queue it until the next safe
    367      * point.
    368      */
    369     } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE)  {
    370         int index = gDvmJit.compilerICPatchIndex++;
    371         const ClassObject *clazz = newContent->clazz;
    372 
    373         gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
    374         gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
    375         gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
    376         gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
    377         /* For verification purpose only */
    378         gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
    379 
    380 #if defined(WITH_JIT_TUNING)
    381         gDvmJit.icPatchQueued++;
    382 #endif
    383         COMPILER_TRACE_CHAINING(
    384             ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
    385                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
    386     } else {
    387     /* Queue is full - just drop this patch request */
    388 #if defined(WITH_JIT_TUNING)
    389         gDvmJit.icPatchDropped++;
    390 #endif
    391 
    392         COMPILER_TRACE_CHAINING(
    393             ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
    394                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
    395     }
    396 
    397     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
    398     return result;
    399 }
    400 
    401 /*
    402  * This method is called from the invoke templates for virtual and interface
    403  * methods to speculatively setup a chain to the callee. The templates are
    404  * written in assembly and have setup method, cell, and clazz at r0, r2, and
    405  * r3 respectively, so there is a unused argument in the list. Upon return one
    406  * of the following three results may happen:
    407  *   1) Chain is not setup because the callee is native. Reset the rechain
    408  *      count to a big number so that it will take a long time before the next
    409  *      rechain attempt to happen.
    410  *   2) Chain is not setup because the callee has not been created yet. Reset
    411  *      the rechain count to a small number and retry in the near future.
    412  *   3) Ask all other threads to stop before patching this chaining cell.
    413  *      This is required because another thread may have passed the class check
    414  *      but hasn't reached the chaining cell yet to follow the chain. If we
    415  *      patch the content before halting the other thread, there could be a
    416  *      small window for race conditions to happen that it may follow the new
    417  *      but wrong chain to invoke a different method.
    418  */
    419 const Method *dvmJitToPatchPredictedChain(const Method *method,
    420                                           Thread *self,
    421                                           PredictedChainingCell *cell,
    422                                           const ClassObject *clazz)
    423 {
    424     int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
    425     /* Don't come back here for a long time if the method is native */
    426     if (dvmIsNativeMethod(method)) {
    427         UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
    428 
    429         /*
    430          * Put a non-zero/bogus value in the clazz field so that it won't
    431          * trigger immediate patching and will continue to fail to match with
    432          * a real clazz pointer.
    433          */
    434         cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
    435 
    436         UPDATE_CODE_CACHE_PATCHES();
    437         PROTECT_CODE_CACHE(cell, sizeof(*cell));
    438         COMPILER_TRACE_CHAINING(
    439             ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
    440                   cell, method->name));
    441         goto done;
    442     }
    443     {
    444     int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
    445 
    446     /*
    447      * Compilation not made yet for the callee. Reset the counter to a small
    448      * value and come back to check soon.
    449      */
    450     if ((tgtAddr == 0) ||
    451         ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
    452         COMPILER_TRACE_CHAINING(
    453             ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
    454                   cell, method->clazz->descriptor, method->name));
    455         goto done;
    456     }
    457 
    458     PredictedChainingCell newCell;
    459 
    460     if (cell->clazz == NULL) {
    461         newRechainCount = self->icRechainCount;
    462     }
    463 
    464     int relOffset = (int) tgtAddr - (int)cell;
    465     OpndSize immSize = estOpndSizeFromImm(relOffset);
    466     int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
    467     relOffset -= jumpSize;
    468     COMPILER_TRACE_CHAINING(
    469             ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
    470                   cell, method->clazz->descriptor, method->name, jumpSize));
    471     //can't use stream here since it is used by the compilation thread
    472     dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
    473 
    474     newCell.clazz = clazz;
    475     newCell.method = method;
    476 
    477     /*
    478      * Enter the work order to the queue and the chaining cell will be patched
    479      * the next time a safe point is entered.
    480      *
    481      * If the enqueuing fails reset the rechain count to a normal value so that
    482      * it won't get indefinitely delayed.
    483      */
    484     inlineCachePatchEnqueue(cell, &newCell);
    485     }
    486 done:
    487     self->icRechainCount = newRechainCount;
    488     return method;
    489 }
    490 
    491 /*
    492  * Unchain a trace given the starting address of the translation
    493  * in the code cache.  Refer to the diagram in dvmCompilerAssembleLIR.
    494  * For ARM, it returns the address following the last cell unchained.
    495  * For IA, it returns NULL since cacheflush is not required for IA.
    496  */
    497 u4* dvmJitUnchain(void* codeAddr)
    498 {
    499     /* codeAddr is 4-byte aligned, so is chain cell count offset */
    500     u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
    501     u2 chainCellCountOffset = *pChainCellCountOffset;
    502     /* chain cell counts information is 4-byte aligned */
    503     ChainCellCounts *pChainCellCounts =
    504           (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
    505     u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
    506     u2 chainCellOffset = *pChainCellOffset;
    507     u1* pChainCells;
    508     int i,j;
    509     PredictedChainingCell *predChainCell;
    510     int padding;
    511 
    512     /* Locate the beginning of the chain cell region */
    513     pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
    514 
    515     /* The cells are sorted in order - walk through them and reset */
    516     for (i = 0; i < kChainingCellGap; i++) {
    517         /* for hot, normal, singleton chaining:
    518                nop  //padding.
    519                jmp 0
    520                mov imm32, reg1
    521                mov imm32, reg2
    522                call reg2
    523            after chaining:
    524                nop
    525                jmp imm
    526                mov imm32, reg1
    527                mov imm32, reg2
    528                call reg2
    529            after unchaining:
    530                nop
    531                jmp 0
    532                mov imm32, reg1
    533                mov imm32, reg2
    534                call reg2
    535            Space occupied by the chaining cell in bytes: nop is for padding,
    536                 jump 0, the target 0 is 4 bytes aligned.
    537            Space for predicted chaining: 5 words = 20 bytes
    538         */
    539         int elemSize = 0;
    540         if (i == kChainingCellInvokePredicted) {
    541             elemSize = 20;
    542         }
    543         COMPILER_TRACE_CHAINING(
    544             ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
    545 
    546         for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
    547             switch(i) {
    548                 case kChainingCellNormal:
    549                 case kChainingCellHot:
    550                 case kChainingCellInvokeSingleton:
    551                 case kChainingCellBackwardBranch:
    552                     COMPILER_TRACE_CHAINING(
    553                         ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
    554                     pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
    555                     elemSize = 4+5+5+2;
    556                     memset(pChainCells, 0, 4);
    557                     break;
    558                 case kChainingCellInvokePredicted:
    559                     COMPILER_TRACE_CHAINING(
    560                         ALOGI("Jit Runtime: unchaining of predicted"));
    561                     /* 4-byte aligned */
    562                     padding = (4 - ((u4)pChainCells & 3)) & 3;
    563                     pChainCells += padding;
    564                     predChainCell = (PredictedChainingCell *) pChainCells;
    565                     /*
    566                      * There could be a race on another mutator thread to use
    567                      * this particular predicted cell and the check has passed
    568                      * the clazz comparison. So we cannot safely wipe the
    569                      * method and branch but it is safe to clear the clazz,
    570                      * which serves as the key.
    571                      */
    572                     predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
    573                     break;
    574                 default:
    575                     ALOGE("Unexpected chaining type: %d", i);
    576                     dvmAbort();  // dvmAbort OK here - can't safely recover
    577             }
    578             COMPILER_TRACE_CHAINING(
    579                 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
    580             pChainCells += elemSize;  /* Advance by a fixed number of bytes */
    581         }
    582     }
    583     return NULL;
    584 }
    585 
    586 /* Unchain all translation in the cache. */
    587 void dvmJitUnchainAll()
    588 {
    589     ALOGV("Jit Runtime: unchaining all");
    590     if (gDvmJit.pJitEntryTable != NULL) {
    591         COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
    592         dvmLockMutex(&gDvmJit.tableLock);
    593 
    594         UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    595 
    596         for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
    597             if (gDvmJit.pJitEntryTable[i].dPC &&
    598                 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
    599                 gDvmJit.pJitEntryTable[i].codeAddress) {
    600                       dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
    601             }
    602         }
    603 
    604         PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
    605 
    606         dvmUnlockMutex(&gDvmJit.tableLock);
    607         gDvmJit.translationChains = 0;
    608     }
    609     gDvmJit.hasNewChain = false;
    610 }
    611 
    612 #define P_GPR_1 PhysicalReg_EBX
    613 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
    614 static void insertJumpHelp()
    615 {
    616     int rem = (uint)stream % 4;
    617     int nop_size = 3 - rem;
    618     dump_nop(nop_size);
    619     unconditional_jump_int(0, OpndSize_32);
    620     return;
    621 }
    622 
    623 /* Chaining cell for code that may need warmup. */
    624 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
    625                  blx r0
    626                  data 0xb23a //bytecode address: 0x5115b23a
    627                  data 0x5115
    628    IA32 assembly:
    629                   jmp  0 //5 bytes
    630                   movl address, %ebx
    631                   movl dvmJitToInterpNormal, %eax
    632                   call %eax
    633                   <-- return address
    634 */
    635 static void handleNormalChainingCell(CompilationUnit *cUnit,
    636                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
    637 {
    638     ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
    639           cUnit->method->name, blockId, offset, stream - streamMethodStart);
    640     if(dump_x86_inst)
    641         ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
    642               offset, stream - streamMethodStart, stream);
    643     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    644      * reslove the multithreading issue.
    645      */
    646     insertJumpHelp();
    647     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
    648     scratchRegs[0] = PhysicalReg_EAX;
    649     call_dvmJitToInterpNormal();
    650     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
    651 }
    652 
    653 /*
    654  * Chaining cell for instructions that immediately following already translated
    655  * code.
    656  */
    657 static void handleHotChainingCell(CompilationUnit *cUnit,
    658                                   unsigned int offset, int blockId, LowOpBlockLabel* labelList)
    659 {
    660     ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
    661           cUnit->method->name, blockId, offset, stream - streamMethodStart);
    662     if(dump_x86_inst)
    663         ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
    664               offset, stream - streamMethodStart, stream);
    665     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    666      * reslove the multithreading issue.
    667      */
    668     insertJumpHelp();
    669     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
    670     scratchRegs[0] = PhysicalReg_EAX;
    671     call_dvmJitToInterpTraceSelect();
    672     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
    673 }
    674 
    675 /* Chaining cell for branches that branch back into the same basic block */
    676 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
    677                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
    678 {
    679     ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
    680           cUnit->method->name, blockId, offset, stream - streamMethodStart);
    681     if(dump_x86_inst)
    682         ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
    683               offset, stream - streamMethodStart, stream);
    684     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    685      * reslove the multithreading issue.
    686      */
    687     insertJumpHelp();
    688     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
    689     scratchRegs[0] = PhysicalReg_EAX;
    690     call_dvmJitToInterpNormal();
    691     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
    692 }
    693 
    694 /* Chaining cell for monomorphic method invocations. */
    695 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
    696                                               const Method *callee, int blockId, LowOpBlockLabel* labelList)
    697 {
    698     ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
    699           cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
    700     if(dump_x86_inst)
    701         ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
    702               blockId, stream - streamMethodStart, stream);
    703     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
    704      * reslove the multithreading issue.
    705      */
    706     insertJumpHelp();
    707     move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
    708     scratchRegs[0] = PhysicalReg_EAX;
    709     call_dvmJitToInterpTraceSelect();
    710     //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
    711 }
    712 #undef P_GPR_1
    713 
    714 /* Chaining cell for monomorphic method invocations. */
    715 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
    716 {
    717     if(dump_x86_inst)
    718         ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
    719               blockId, stream - streamMethodStart, stream);
    720 #ifndef PREDICTED_CHAINING
    721     //assume rPC for callee->insns in %ebx
    722     scratchRegs[0] = PhysicalReg_EAX;
    723 #if defined(WITH_JIT_TUNING)
    724     /* Predicted chaining is not enabled. Fall back to interpreter and
    725      * indicate that predicted chaining was not done.
    726      */
    727     move_imm_to_reg(OpndSize_32, kInlineCacheMiss, PhysicalReg_EDX, true);
    728 #endif
    729     call_dvmJitToInterpTraceSelectNoChain();
    730 #else
    731     /* make sure section for predicited chaining cell is 4-byte aligned */
    732     //int padding = (4 - ((u4)stream & 3)) & 3;
    733     //stream += padding;
    734     int* streamData = (int*)stream;
    735     /* Should not be executed in the initial state */
    736     streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
    737     streamData[1] = 0;
    738     /* To be filled: class */
    739     streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
    740     /* To be filled: method */
    741     streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
    742     /*
    743      * Rechain count. The initial value of 0 here will trigger chaining upon
    744      * the first invocation of this callsite.
    745      */
    746     streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
    747 #if 0
    748     ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
    749           *((int*)(stream+8)), *((int*)(stream+12)));
    750 #endif
    751     stream += 20; //5 *4
    752 #endif
    753 }
    754 
    755 /* Load the Dalvik PC into r0 and jump to the specified target */
    756 static void handlePCReconstruction(CompilationUnit *cUnit,
    757                                    LowOpBlockLabel *targetLabel)
    758 {
    759 #if 0
    760     LowOp **pcrLabel =
    761         (LowOp **) cUnit->pcReconstructionList.elemList;
    762     int numElems = cUnit->pcReconstructionList.numUsed;
    763     int i;
    764     for (i = 0; i < numElems; i++) {
    765         dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
    766         /* r0 = dalvik PC */
    767         loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
    768         genUnconditionalBranch(cUnit, targetLabel);
    769     }
    770 #endif
    771 }
    772 
    773 //use O0 code generator for hoisted checks outside of the loop
    774 /*
    775  * vA = arrayReg;
    776  * vB = idxReg;
    777  * vC = endConditionReg;
    778  * arg[0] = maxC
    779  * arg[1] = minC
    780  * arg[2] = loopBranchConditionCode
    781  */
    782 #define P_GPR_1 PhysicalReg_EBX
    783 #define P_GPR_2 PhysicalReg_ECX
    784 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
    785 {
    786     /*
    787      * NOTE: these synthesized blocks don't have ssa names assigned
    788      * for Dalvik registers.  However, because they dominate the following
    789      * blocks we can simply use the Dalvik name w/ subscript 0 as the
    790      * ssa name.
    791      */
    792     DecodedInstruction *dInsn = &mir->dalvikInsn;
    793     const int maxC = dInsn->arg[0];
    794 
    795     /* assign array in virtual register to P_GPR_1 */
    796     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
    797     /* assign index in virtual register to P_GPR_2 */
    798     get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
    799     export_pc();
    800     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
    801     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
    802     int delta = maxC;
    803     /*
    804      * If the loop end condition is ">=" instead of ">", then the largest value
    805      * of the index is "endCondition - 1".
    806      */
    807     if (dInsn->arg[2] == OP_IF_GE) {
    808         delta--;
    809     }
    810 
    811     if (delta < 0) { //+delta
    812         //if P_GPR_2 is mapped to a VR, we can't do this
    813         alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
    814     } else if(delta > 0) {
    815         alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
    816     }
    817     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
    818     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
    819 }
    820 
    821 /*
    822  * vA = arrayReg;
    823  * vB = idxReg;
    824  * vC = endConditionReg;
    825  * arg[0] = maxC
    826  * arg[1] = minC
    827  * arg[2] = loopBranchConditionCode
    828  */
    829 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
    830 {
    831     DecodedInstruction *dInsn = &mir->dalvikInsn;
    832     const int maxC = dInsn->arg[0];
    833 
    834     /* assign array in virtual register to P_GPR_1 */
    835     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
    836     /* assign index in virtual register to P_GPR_2 */
    837     get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
    838     export_pc();
    839     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
    840     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
    841 
    842     if (maxC < 0) {
    843         //if P_GPR_2 is mapped to a VR, we can't do this
    844         alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
    845     } else if(maxC > 0) {
    846         alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
    847     }
    848     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
    849     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
    850 
    851 }
    852 #undef P_GPR_1
    853 #undef P_GPR_2
    854 
    855 /*
    856  * vA = idxReg;
    857  * vB = minC;
    858  */
    859 #define P_GPR_1 PhysicalReg_ECX
    860 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
    861 {
    862     DecodedInstruction *dInsn = &mir->dalvikInsn;
    863     const int minC = dInsn->vB;
    864     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
    865     export_pc();
    866     compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
    867     condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
    868 }
    869 #undef P_GPR_1
    870 
    871 #ifdef WITH_JIT_INLINING
    872 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
    873 {
    874     CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
    875     if(gDvm.executionMode == kExecutionModeNcgO0) {
    876         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
    877         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
    878         compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
    879         export_pc(); //use %edx
    880         conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
    881         move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
    882         compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
    883     } else {
    884         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
    885         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
    886         nullCheck(5, false, 1, mir->dalvikInsn.vC);
    887         move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
    888         compare_reg_reg(4, false, 6, false);
    889     }
    890 
    891     //immdiate will be updated later in genLandingPadForMispredictedCallee
    892     streamMisPred = stream;
    893     callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
    894 }
    895 #endif
    896 
    897 /* Extended MIR instructions like PHI */
    898 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
    899 {
    900     ExecutionMode origMode = gDvm.executionMode;
    901     gDvm.executionMode = kExecutionModeNcgO0;
    902     switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
    903         case kMirOpPhi: {
    904             break;
    905         }
    906         case kMirOpNullNRangeUpCheck: {
    907             genHoistedChecksForCountUpLoop(cUnit, mir);
    908             break;
    909         }
    910         case kMirOpNullNRangeDownCheck: {
    911             genHoistedChecksForCountDownLoop(cUnit, mir);
    912             break;
    913         }
    914         case kMirOpLowerBound: {
    915             genHoistedLowerBoundCheck(cUnit, mir);
    916             break;
    917         }
    918         case kMirOpPunt: {
    919             break;
    920         }
    921 #ifdef WITH_JIT_INLINING
    922         case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
    923             genValidationForPredictedInline(cUnit, mir);
    924             break;
    925         }
    926 #endif
    927         default:
    928             break;
    929     }
    930     gDvm.executionMode = origMode;
    931 }
    932 
    933 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
    934                                 int bodyId)
    935 {
    936     /*
    937      * Next, create two branches - one branch over to the loop body and the
    938      * other branch to the PCR cell to punt.
    939      */
    940     //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
    941     //setupResourceMasks(branchToBody);
    942     //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
    943 
    944 #if 0
    945     LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
    946     branchToPCR->opCode = kThumbBUncond;
    947     branchToPCR->generic.target = (LIR *) pcrLabel;
    948     setupResourceMasks(branchToPCR);
    949     cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
    950 #endif
    951 }
    952 
    953 /* check whether we can merge the block at index i with its target block */
    954 bool mergeBlock(BasicBlock *bb) {
    955     if(bb->blockType == kDalvikByteCode &&
    956        bb->firstMIRInsn != NULL &&
    957        (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
    958         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
    959         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
    960        bb->fallThrough == NULL) {// &&
    961        //cUnit->hasLoop) {
    962         //ALOGI("merge blocks ending with goto at index %d", i);
    963         MIR* prevInsn = bb->lastMIRInsn->prev;
    964         if(bb->taken == NULL) return false;
    965         MIR* mergeInsn = bb->taken->firstMIRInsn;
    966         if(mergeInsn == NULL) return false;
    967         if(prevInsn == NULL) {//the block has a single instruction
    968             bb->firstMIRInsn = mergeInsn;
    969         } else {
    970             prevInsn->next = mergeInsn; //remove goto from the chain
    971         }
    972         mergeInsn->prev = prevInsn;
    973         bb->lastMIRInsn = bb->taken->lastMIRInsn;
    974         bb->taken->firstMIRInsn = NULL; //block being merged in
    975         bb->fallThrough = bb->taken->fallThrough;
    976         bb->taken = bb->taken->taken;
    977         return true;
    978     }
    979     return false;
    980 }
    981 
    982 static int genTraceProfileEntry(CompilationUnit *cUnit)
    983 {
    984     cUnit->headerSize = 6;
    985     if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
    986         (gDvmJit.profileMode == kTraceProfilingDisabled)) {
    987         return 12;
    988     } else {
    989         return 4;
    990     }
    991 
    992 }
    993 
    994 #define PRINT_BUFFER_LEN 1024
    995 /* Print the code block in code cache in the range of [startAddr, endAddr)
    996  * in readable format.
    997  */
    998 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
    999 {
   1000     char strbuf[PRINT_BUFFER_LEN];
   1001     unsigned char *addr;
   1002     unsigned char *next_addr;
   1003     int n;
   1004 
   1005     if (gDvmJit.printBinary) {
   1006         // print binary in bytes
   1007         n = 0;
   1008         for (addr = startAddr; addr < endAddr; addr++) {
   1009             n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
   1010             if (n > PRINT_BUFFER_LEN - 10) {
   1011                 ALOGD("## %s", strbuf);
   1012                 n = 0;
   1013             }
   1014         }
   1015         if (n > 0)
   1016             ALOGD("## %s", strbuf);
   1017     }
   1018 
   1019     // print disassembled instructions
   1020     addr = startAddr;
   1021     while (addr < endAddr) {
   1022         next_addr = reinterpret_cast<unsigned char*>
   1023             (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
   1024                                        strbuf, PRINT_BUFFER_LEN));
   1025         if (addr != next_addr) {
   1026             ALOGD("**  %p: %s", addr, strbuf);
   1027         } else {                // check whether this is nop padding
   1028             if (addr[0] == 0x90) {
   1029                 ALOGD("**  %p: NOP (1 byte)", addr);
   1030                 next_addr += 1;
   1031             } else if (addr[0] == 0x66 && addr[1] == 0x90) {
   1032                 ALOGD("**  %p: NOP (2 bytes)", addr);
   1033                 next_addr += 2;
   1034             } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
   1035                 ALOGD("**  %p: NOP (3 bytes)", addr);
   1036                 next_addr += 3;
   1037             } else {
   1038                 ALOGD("** unable to decode binary at %p", addr);
   1039                 break;
   1040             }
   1041         }
   1042         addr = next_addr;
   1043     }
   1044 }
   1045 
   1046 /* 4 is the number of additional bytes needed for chaining information for trace:
   1047  * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
   1048 #define EXTRA_BYTES_FOR_CHAINING 4
   1049 
   1050 /* Entry function to invoke the backend of the JIT compiler */
   1051 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
   1052 {
   1053     dump_x86_inst = cUnit->printMe;
   1054     /* Used to hold the labels of each block */
   1055     LowOpBlockLabel *labelList =
   1056         (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
   1057     LowOp *headLIR = NULL;
   1058     GrowableList chainingListByType[kChainingCellLast];
   1059     unsigned int i, padding;
   1060 
   1061     /*
   1062      * Initialize various types chaining lists.
   1063      */
   1064     for (i = 0; i < kChainingCellLast; i++) {
   1065         dvmInitGrowableList(&chainingListByType[i], 2);
   1066     }
   1067 
   1068     /* Clear the visited flag for each block */
   1069     dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
   1070                                           kAllNodes, false /* isIterative */);
   1071 
   1072     GrowableListIterator iterator;
   1073     dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
   1074 
   1075     /* Traces start with a profiling entry point.  Generate it here */
   1076     cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
   1077 
   1078     //BasicBlock **blockList = cUnit->blockList;
   1079     GrowableList *blockList = &cUnit->blockList;
   1080     BasicBlock *bb;
   1081 
   1082     info->codeAddress = NULL;
   1083     stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
   1084 
   1085     // TODO: compile into a temporary buffer and then copy into the code cache.
   1086     // That would let us leave the code cache unprotected for a shorter time.
   1087     size_t unprotected_code_cache_bytes =
   1088             gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING;
   1089     UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1090 
   1091     streamStart = stream; /* trace start before alignment */
   1092     stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
   1093     stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
   1094     streamMethodStart = stream; /* code start */
   1095     for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
   1096         labelList[i].lop.generic.offset = -1;
   1097     }
   1098     cUnit->exceptionBlockId = -1;
   1099     for (i = 0; i < blockList->numUsed; i++) {
   1100         bb = (BasicBlock *) blockList->elemList[i];
   1101         if(bb->blockType == kExceptionHandling)
   1102             cUnit->exceptionBlockId = i;
   1103     }
   1104     startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
   1105     if(gDvm.executionMode == kExecutionModeNcgO1) {
   1106         //merge blocks ending with "goto" with the fall through block
   1107         if (cUnit->jitMode != kJitLoop)
   1108             for (i = 0; i < blockList->numUsed; i++) {
   1109                 bb = (BasicBlock *) blockList->elemList[i];
   1110                 bool merged = mergeBlock(bb);
   1111                 while(merged) merged = mergeBlock(bb);
   1112             }
   1113         for (i = 0; i < blockList->numUsed; i++) {
   1114             bb = (BasicBlock *) blockList->elemList[i];
   1115             if(bb->blockType == kDalvikByteCode &&
   1116                bb->firstMIRInsn != NULL) {
   1117                 preprocessingBB(bb);
   1118             }
   1119         }
   1120         preprocessingTrace();
   1121     }
   1122 
   1123     /* Handle the content in each basic block */
   1124     for (i = 0; ; i++) {
   1125         MIR *mir;
   1126         bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
   1127         if (bb == NULL) break;
   1128         if (bb->visited == true) continue;
   1129 
   1130         labelList[i].immOpnd.value = bb->startOffset;
   1131 
   1132         if (bb->blockType >= kChainingCellLast) {
   1133             /*
   1134              * Append the label pseudo LIR first. Chaining cells will be handled
   1135              * separately afterwards.
   1136              */
   1137             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
   1138         }
   1139 
   1140         if (bb->blockType == kEntryBlock) {
   1141             labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
   1142             if (bb->firstMIRInsn == NULL) {
   1143                 continue;
   1144             } else {
   1145               setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
   1146                                   //&labelList[blockList[i]->fallThrough->id]);
   1147             }
   1148         } else if (bb->blockType == kExitBlock) {
   1149             labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
   1150             labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1151             goto gen_fallthrough;
   1152         } else if (bb->blockType == kDalvikByteCode) {
   1153             if (bb->hidden == true) continue;
   1154             labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
   1155             /* Reset the register state */
   1156 #if 0
   1157             resetRegisterScoreboard(cUnit);
   1158 #endif
   1159         } else {
   1160             switch (bb->blockType) {
   1161                 case kChainingCellNormal:
   1162                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
   1163                     /* handle the codegen later */
   1164                     dvmInsertGrowableList(
   1165                         &chainingListByType[kChainingCellNormal], i);
   1166                     break;
   1167                 case kChainingCellInvokeSingleton:
   1168                     labelList[i].lop.opCode2 =
   1169                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
   1170                     labelList[i].immOpnd.value =
   1171                         (int) bb->containingMethod;
   1172                     /* handle the codegen later */
   1173                     dvmInsertGrowableList(
   1174                         &chainingListByType[kChainingCellInvokeSingleton], i);
   1175                     break;
   1176                 case kChainingCellInvokePredicted:
   1177                     labelList[i].lop.opCode2 =
   1178                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
   1179                    /*
   1180                      * Move the cached method pointer from operand 1 to 0.
   1181                      * Operand 0 was clobbered earlier in this routine to store
   1182                      * the block starting offset, which is not applicable to
   1183                      * predicted chaining cell.
   1184                      */
   1185                     //TODO
   1186                     //labelList[i].operands[0] = labelList[i].operands[1];
   1187 
   1188                     /* handle the codegen later */
   1189                     dvmInsertGrowableList(
   1190                         &chainingListByType[kChainingCellInvokePredicted], i);
   1191                     break;
   1192                 case kChainingCellHot:
   1193                     labelList[i].lop.opCode2 =
   1194                         ATOM_PSEUDO_CHAINING_CELL_HOT;
   1195                     /* handle the codegen later */
   1196                     dvmInsertGrowableList(
   1197                         &chainingListByType[kChainingCellHot], i);
   1198                     break;
   1199                 case kPCReconstruction:
   1200                     /* Make sure exception handling block is next */
   1201                     labelList[i].lop.opCode2 =
   1202                         ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
   1203                     //assert (i == cUnit->numBlocks - 2);
   1204                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1205                     handlePCReconstruction(cUnit,
   1206                                            &labelList[cUnit->puntBlock->id]);
   1207                     break;
   1208                 case kExceptionHandling:
   1209                     labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
   1210                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1211                     //if (cUnit->pcReconstructionList.numUsed) {
   1212                         scratchRegs[0] = PhysicalReg_EAX;
   1213                         jumpToInterpPunt();
   1214                         //call_dvmJitToInterpPunt();
   1215                     //}
   1216                     break;
   1217                 case kChainingCellBackwardBranch:
   1218                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
   1219                     /* handle the codegen later */
   1220                     dvmInsertGrowableList(
   1221                         &chainingListByType[kChainingCellBackwardBranch],
   1222                         i);
   1223                     break;
   1224                 default:
   1225                     break;
   1226             }
   1227             continue;
   1228         }
   1229         {
   1230         //LowOp *headLIR = NULL;
   1231         const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
   1232         const u2 *startCodePtr = dexCode->insns;
   1233         const u2 *codePtr;
   1234         labelList[i].lop.generic.offset = (stream - streamMethodStart);
   1235         ALOGV("get ready to handle JIT bb %d type %d hidden %d",
   1236               bb->id, bb->blockType, bb->hidden);
   1237         for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
   1238             bb = nextBB;
   1239             bb->visited = true;
   1240             cUnit->nextCodegenBlock = NULL;
   1241 
   1242         if(gDvm.executionMode == kExecutionModeNcgO1 &&
   1243            bb->blockType != kEntryBlock &&
   1244            bb->firstMIRInsn != NULL) {
   1245             startOfBasicBlock(bb);
   1246             int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
   1247             endOfBasicBlock(bb);
   1248             if(cg_ret < 0) {
   1249                 endOfTrace(true/*freeOnly*/);
   1250                 cUnit->baseAddr = NULL;
   1251                 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1252                 return;
   1253             }
   1254         } else {
   1255         for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
   1256             startOfBasicBlock(bb); //why here for O0
   1257             Opcode dalvikOpCode = mir->dalvikInsn.opcode;
   1258             if((int)dalvikOpCode >= (int)kMirOpFirst) {
   1259                 handleExtendedMIR(cUnit, mir);
   1260                 continue;
   1261             }
   1262             InstructionFormat dalvikFormat =
   1263                 dexGetFormatFromOpcode(dalvikOpCode);
   1264             ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
   1265                   mir->offset, dalvikOpCode, dalvikFormat);
   1266             LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
   1267             /* Remember the first LIR for this block */
   1268             if (headLIR == NULL) {
   1269                 headLIR = (LowOp*)boundaryLIR;
   1270             }
   1271             bool notHandled = true;
   1272             /*
   1273              * Debugging: screen the opcode first to see if it is in the
   1274              * do[-not]-compile list
   1275              */
   1276             bool singleStepMe =
   1277                 gDvmJit.includeSelectedOp !=
   1278                 ((gDvmJit.opList[dalvikOpCode >> 3] &
   1279                   (1 << (dalvikOpCode & 0x7))) !=
   1280                  0);
   1281             if (singleStepMe || cUnit->allSingleStep) {
   1282             } else {
   1283                 codePtr = startCodePtr + mir->offset;
   1284                 //lower each byte code, update LIR
   1285                 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
   1286                 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
   1287                    CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
   1288                     ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
   1289                     gDvmJit.codeCacheFull = true;
   1290                     cUnit->baseAddr = NULL;
   1291                     endOfTrace(true/*freeOnly*/);
   1292                     PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1293                     return;
   1294                 }
   1295             }
   1296             if (notHandled) {
   1297                 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
   1298                      mir->offset,
   1299                      dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
   1300                      dalvikFormat);
   1301                 dvmAbort();
   1302                 break;
   1303             }
   1304         } // end for
   1305         } // end else //JIT + O0 code generator
   1306         }
   1307         } // end for
   1308         /* Eliminate redundant loads/stores and delay stores into later slots */
   1309 #if 0
   1310         dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
   1311                                            cUnit->lastLIRInsn);
   1312 #endif
   1313         if (headLIR) headLIR = NULL;
   1314 gen_fallthrough:
   1315         /*
   1316          * Check if the block is terminated due to trace length constraint -
   1317          * insert an unconditional branch to the chaining cell.
   1318          */
   1319         if (bb->needFallThroughBranch) {
   1320             jumpToBasicBlock(stream, bb->fallThrough->id);
   1321         }
   1322 
   1323     }
   1324 
   1325     char* streamChainingStart = (char*)stream;
   1326     /* Handle the chaining cells in predefined order */
   1327     for (i = 0; i < kChainingCellGap; i++) {
   1328         size_t j;
   1329         int *blockIdList = (int *) chainingListByType[i].elemList;
   1330 
   1331         cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
   1332 
   1333         /* No chaining cells of this type */
   1334         if (cUnit->numChainingCells[i] == 0)
   1335             continue;
   1336 
   1337         /* Record the first LIR for a new type of chaining cell */
   1338         cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
   1339         for (j = 0; j < chainingListByType[i].numUsed; j++) {
   1340             int blockId = blockIdList[j];
   1341             BasicBlock *chainingBlock =
   1342                 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
   1343                                                          blockId);
   1344 
   1345             labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
   1346 
   1347             /* Align this chaining cell first */
   1348 #if 0
   1349             newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
   1350 #endif
   1351             /* Insert the pseudo chaining instruction */
   1352             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
   1353 
   1354 
   1355             switch (chainingBlock->blockType) {
   1356                 case kChainingCellNormal:
   1357                     handleNormalChainingCell(cUnit,
   1358                      chainingBlock->startOffset, blockId, labelList);
   1359                     break;
   1360                 case kChainingCellInvokeSingleton:
   1361                     handleInvokeSingletonChainingCell(cUnit,
   1362                         chainingBlock->containingMethod, blockId, labelList);
   1363                     break;
   1364                 case kChainingCellInvokePredicted:
   1365                     handleInvokePredictedChainingCell(cUnit, blockId);
   1366                     break;
   1367                 case kChainingCellHot:
   1368                     handleHotChainingCell(cUnit,
   1369                         chainingBlock->startOffset, blockId, labelList);
   1370                     break;
   1371                 case kChainingCellBackwardBranch:
   1372                     handleBackwardBranchChainingCell(cUnit,
   1373                         chainingBlock->startOffset, blockId, labelList);
   1374                     break;
   1375                 default:
   1376                     ALOGE("Bad blocktype %d", chainingBlock->blockType);
   1377                     dvmAbort();
   1378                     break;
   1379             }
   1380 
   1381             if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
   1382                 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
   1383                 gDvmJit.codeCacheFull = true;
   1384                 cUnit->baseAddr = NULL;
   1385                 endOfTrace(true); /* need to free structures */
   1386                 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1387                 return;
   1388             }
   1389         }
   1390     }
   1391 #if 0
   1392     dvmCompilerApplyGlobalOptimizations(cUnit);
   1393 #endif
   1394     endOfTrace(false);
   1395 
   1396     if (gDvmJit.codeCacheFull) {
   1397         /* We hit code cache size limit inside endofTrace(false).
   1398          * Bail out for this trace!
   1399          */
   1400         ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
   1401         cUnit->baseAddr = NULL;
   1402         PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1403         return;
   1404     }
   1405 
   1406     /* dump section for chaining cell counts, make sure it is 4-byte aligned */
   1407     padding = (4 - ((u4)stream & 3)) & 3;
   1408     stream += padding;
   1409     ChainCellCounts chainCellCounts;
   1410     /* Install the chaining cell counts */
   1411     for (i=0; i< kChainingCellGap; i++) {
   1412         chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
   1413     }
   1414     char* streamCountStart = (char*)stream;
   1415     memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
   1416     stream += sizeof(chainCellCounts);
   1417 
   1418     cUnit->baseAddr = streamMethodStart;
   1419     cUnit->totalSize = (stream - streamStart);
   1420     if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
   1421         ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
   1422         gDvmJit.codeCacheFull = true;
   1423         cUnit->baseAddr = NULL;
   1424         PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1425         return;
   1426     }
   1427 
   1428     /* write chaining cell count offset & chaining cell offset */
   1429     u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
   1430     *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
   1431     pOffset[1] = streamChainingStart - streamMethodStart;
   1432 
   1433     PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
   1434 
   1435     gDvmJit.codeCacheByteUsed += (stream - streamStart);
   1436     if (cUnit->printMe) {
   1437         unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
   1438         unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
   1439         ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
   1440               cUnit->method->clazz->descriptor, cUnit->method->name,
   1441               codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
   1442         ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
   1443               cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
   1444         printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
   1445     }
   1446     ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
   1447           (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
   1448           cUnit->totalSize, gDvmJit.codeCache);
   1449 
   1450     gDvmJit.numCompilations++;
   1451 
   1452     info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
   1453 }
   1454 
   1455 /*
   1456  * Perform translation chain operation.
   1457  */
   1458 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
   1459 {
   1460 #ifdef JIT_CHAIN
   1461     int relOffset = (int) tgtAddr - (int)branchAddr;
   1462 
   1463     if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
   1464         (gDvmJit.codeCacheFull == false)) {
   1465 
   1466         gDvmJit.translationChains++;
   1467 
   1468         //OpndSize immSize = estOpndSizeFromImm(relOffset);
   1469         //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
   1470         /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
   1471          * the original code sequence.
   1472          */
   1473         OpndSize immSize = OpndSize_32;
   1474         relOffset -= 5;
   1475         //can't use stream here since it is used by the compilation thread
   1476         UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
   1477         dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
   1478         PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
   1479 
   1480         gDvmJit.hasNewChain = true;
   1481 
   1482         COMPILER_TRACE_CHAINING(
   1483             ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
   1484                   (int) branchAddr, tgtAddr, relOffset));
   1485     }
   1486 #endif
   1487     return tgtAddr;
   1488 }
   1489 
   1490 /*
   1491  * Accept the work and start compiling.  Returns true if compilation
   1492  * is attempted.
   1493  */
   1494 bool dvmCompilerDoWork(CompilerWorkOrder *work)
   1495 {
   1496     JitTraceDescription *desc;
   1497     bool isCompile;
   1498     bool success = true;
   1499 
   1500     if (gDvmJit.codeCacheFull) {
   1501         return false;
   1502     }
   1503 
   1504     switch (work->kind) {
   1505         case kWorkOrderTrace:
   1506             isCompile = true;
   1507             /* Start compilation with maximally allowed trace length */
   1508             desc = (JitTraceDescription *)work->info;
   1509             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
   1510                                         work->bailPtr, 0 /* no hints */);
   1511             break;
   1512         case kWorkOrderTraceDebug: {
   1513             bool oldPrintMe = gDvmJit.printMe;
   1514             gDvmJit.printMe = true;
   1515             isCompile = true;
   1516             /* Start compilation with maximally allowed trace length */
   1517             desc = (JitTraceDescription *)work->info;
   1518             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
   1519                                         work->bailPtr, 0 /* no hints */);
   1520             gDvmJit.printMe = oldPrintMe;
   1521             break;
   1522         }
   1523         case kWorkOrderProfileMode:
   1524             dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
   1525             isCompile = false;
   1526             break;
   1527         default:
   1528             isCompile = false;
   1529             ALOGE("Jit: unknown work order type");
   1530             assert(0);  // Bail if debug build, discard otherwise
   1531     }
   1532     if (!success)
   1533         work->result.codeAddress = NULL;
   1534     return isCompile;
   1535 }
   1536 
   1537 void dvmCompilerCacheFlush(long start, long end, long flags) {
   1538   /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
   1539 }
   1540 
   1541 //#endif
   1542