Home | History | Annotate | Download | only in analysis
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * This code generate "register maps" for Dalvik bytecode.  In a stack-based
     19  * VM we might call these "stack maps".  They are used to increase the
     20  * precision in the garbage collector when scanning references in the
     21  * interpreter thread stacks.
     22  */
     23 #include "Dalvik.h"
     24 #include "UniquePtr.h"
     25 #include "analysis/CodeVerify.h"
     26 #include "analysis/RegisterMap.h"
     27 #include "libdex/DexCatch.h"
     28 #include "libdex/InstrUtils.h"
     29 #include "libdex/Leb128.h"
     30 
     31 #include <stddef.h>
     32 
     33 /* double-check the compression */
     34 #define REGISTER_MAP_VERIFY     false
     35 
     36 /* verbose logging */
     37 #define REGISTER_MAP_VERBOSE    false
     38 
     39 //#define REGISTER_MAP_STATS
     40 
     41 // fwd
     42 static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data);
     43 static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap);
     44 static int compareMaps(const RegisterMap* pMap1, const RegisterMap* pMap2);
     45 
     46 #ifdef REGISTER_MAP_STATS
     47 static void computeMapStats(RegisterMap* pMap, const Method* method);
     48 #endif
     49 static RegisterMap* compressMapDifferential(const RegisterMap* pMap,\
     50     const Method* meth);
     51 static RegisterMap* uncompressMapDifferential(const RegisterMap* pMap);
     52 
     53 #ifdef REGISTER_MAP_STATS
     54 /*
     55  * Generate some statistics on the register maps we create and use.
     56  */
     57 #define kMaxGcPointGap      50
     58 #define kUpdatePosnMinRegs  24
     59 #define kNumUpdatePosns     8
     60 #define kMaxDiffBits        20
     61 struct MapStats {
     62     /*
     63      * Buckets measuring the distance between GC points.  This tells us how
     64      * many bits we need to encode the advancing program counter.  We ignore
     65      * some of the "long tail" entries.
     66      */
     67     int gcPointGap[kMaxGcPointGap];
     68 
     69     /*
     70      * Number of gaps.  Equal to (number of gcPoints - number of methods),
     71      * since the computation isn't including the initial gap.
     72      */
     73     int gcGapCount;
     74 
     75     /*
     76      * Number of gaps.
     77      */
     78     int totalGcPointCount;
     79 
     80     /*
     81      * For larger methods (>= 24 registers), measure in which octant register
     82      * updates occur.  This should help us understand whether register
     83      * changes tend to cluster in the low regs even for large methods.
     84      */
     85     int updatePosn[kNumUpdatePosns];
     86 
     87     /*
     88      * For all methods, count up the number of changes to registers < 16
     89      * and >= 16.
     90      */
     91     int updateLT16;
     92     int updateGE16;
     93 
     94     /*
     95      * Histogram of the number of bits that differ between adjacent entries.
     96      */
     97     int numDiffBits[kMaxDiffBits];
     98 
     99 
    100     /*
    101      * Track the number of expanded maps, and the heap space required to
    102      * hold them.
    103      */
    104     int numExpandedMaps;
    105     int totalExpandedMapSize;
    106 };
    107 #endif
    108 
    109 /*
    110  * Prepare some things.
    111  */
    112 bool dvmRegisterMapStartup()
    113 {
    114 #ifdef REGISTER_MAP_STATS
    115     MapStats* pStats = calloc(1, sizeof(MapStats));
    116     gDvm.registerMapStats = pStats;
    117 #endif
    118     return true;
    119 }
    120 
    121 /*
    122  * Clean up.
    123  */
    124 void dvmRegisterMapShutdown()
    125 {
    126 #ifdef REGISTER_MAP_STATS
    127     free(gDvm.registerMapStats);
    128 #endif
    129 }
    130 
    131 /*
    132  * Write stats to log file.
    133  */
    134 void dvmRegisterMapDumpStats()
    135 {
    136 #ifdef REGISTER_MAP_STATS
    137     MapStats* pStats = (MapStats*) gDvm.registerMapStats;
    138     int i, end;
    139 
    140     for (end = kMaxGcPointGap-1; end >= 0; end--) {
    141         if (pStats->gcPointGap[end] != 0)
    142             break;
    143     }
    144 
    145     LOGI("Register Map gcPointGap stats (diff count=%d, total=%d):",
    146         pStats->gcGapCount, pStats->totalGcPointCount);
    147     assert(pStats->gcPointGap[0] == 0);
    148     for (i = 1; i <= end; i++) {
    149         LOGI(" %2d %d", i, pStats->gcPointGap[i]);
    150     }
    151 
    152 
    153     for (end = kMaxDiffBits-1; end >= 0; end--) {
    154         if (pStats->numDiffBits[end] != 0)
    155             break;
    156     }
    157 
    158     LOGI("Register Map bit difference stats:");
    159     for (i = 0; i <= end; i++) {
    160         LOGI(" %2d %d", i, pStats->numDiffBits[i]);
    161     }
    162 
    163 
    164     LOGI("Register Map update position stats (lt16=%d ge16=%d):",
    165         pStats->updateLT16, pStats->updateGE16);
    166     for (i = 0; i < kNumUpdatePosns; i++) {
    167         LOGI(" %2d %d", i, pStats->updatePosn[i]);
    168     }
    169 #endif
    170 }
    171 
    172 
    173 /*
    174  * ===========================================================================
    175  *      Map generation
    176  * ===========================================================================
    177  */
    178 
    179 /*
    180  * Generate the register map for a method that has just been verified
    181  * (i.e. we're doing this as part of verification).
    182  *
    183  * For type-precise determination we have all the data we need, so we
    184  * just need to encode it in some clever fashion.
    185  *
    186  * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
    187  */
    188 RegisterMap* dvmGenerateRegisterMapV(VerifierData* vdata)
    189 {
    190     static const int kHeaderSize = offsetof(RegisterMap, data);
    191     RegisterMap* pMap = NULL;
    192     RegisterMap* pResult = NULL;
    193     RegisterMapFormat format;
    194     u1 regWidth;
    195     u1* mapData;
    196     int i, bytesForAddr, gcPointCount;
    197     int bufSize;
    198 
    199     if (vdata->method->registersSize >= 2048) {
    200         LOGE("ERROR: register map can't handle %d registers",
    201             vdata->method->registersSize);
    202         goto bail;
    203     }
    204     regWidth = (vdata->method->registersSize + 7) / 8;
    205 
    206     /*
    207      * Decide if we need 8 or 16 bits to hold the address.  Strictly speaking
    208      * we only need 16 bits if we actually encode an address >= 256 -- if
    209      * the method has a section at the end without GC points (e.g. array
    210      * data) we don't need to count it.  The situation is unusual, and
    211      * detecting it requires scanning the entire method, so we don't bother.
    212      */
    213     if (vdata->insnsSize < 256) {
    214         format = kRegMapFormatCompact8;
    215         bytesForAddr = 1;
    216     } else {
    217         format = kRegMapFormatCompact16;
    218         bytesForAddr = 2;
    219     }
    220 
    221     /*
    222      * Count up the number of GC point instructions.
    223      *
    224      * NOTE: this does not automatically include the first instruction,
    225      * since we don't count method entry as a GC point.
    226      */
    227     gcPointCount = 0;
    228     for (i = 0; i < (int) vdata->insnsSize; i++) {
    229         if (dvmInsnIsGcPoint(vdata->insnFlags, i))
    230             gcPointCount++;
    231     }
    232     if (gcPointCount >= 65536) {
    233         /* we could handle this, but in practice we don't get near this */
    234         LOGE("ERROR: register map can't handle %d gc points in one method",
    235             gcPointCount);
    236         goto bail;
    237     }
    238 
    239     /*
    240      * Allocate a buffer to hold the map data.
    241      */
    242     bufSize = kHeaderSize + gcPointCount * (bytesForAddr + regWidth);
    243 
    244     LOGV("+++ grm: %s.%s (adr=%d gpc=%d rwd=%d bsz=%d)",
    245         vdata->method->clazz->descriptor, vdata->method->name,
    246         bytesForAddr, gcPointCount, regWidth, bufSize);
    247 
    248     pMap = (RegisterMap*) malloc(bufSize);
    249     dvmRegisterMapSetFormat(pMap, format);
    250     dvmRegisterMapSetOnHeap(pMap, true);
    251     dvmRegisterMapSetRegWidth(pMap, regWidth);
    252     dvmRegisterMapSetNumEntries(pMap, gcPointCount);
    253 
    254     /*
    255      * Populate it.
    256      */
    257     mapData = pMap->data;
    258     for (i = 0; i < (int) vdata->insnsSize; i++) {
    259         if (dvmInsnIsGcPoint(vdata->insnFlags, i)) {
    260             assert(vdata->registerLines[i].regTypes != NULL);
    261             if (format == kRegMapFormatCompact8) {
    262                 *mapData++ = i;
    263             } else /*kRegMapFormatCompact16*/ {
    264                 *mapData++ = i & 0xff;
    265                 *mapData++ = i >> 8;
    266             }
    267             outputTypeVector(vdata->registerLines[i].regTypes,
    268                 vdata->insnRegCount, mapData);
    269             mapData += regWidth;
    270         }
    271     }
    272 
    273     LOGV("mapData=%p pMap=%p bufSize=%d", mapData, pMap, bufSize);
    274     assert(mapData - (const u1*) pMap == bufSize);
    275 
    276     if (REGISTER_MAP_VERIFY && !verifyMap(vdata, pMap))
    277         goto bail;
    278 #ifdef REGISTER_MAP_STATS
    279     computeMapStats(pMap, vdata->method);
    280 #endif
    281 
    282     /*
    283      * Try to compress the map.
    284      */
    285     RegisterMap* pCompMap;
    286 
    287     pCompMap = compressMapDifferential(pMap, vdata->method);
    288     if (pCompMap != NULL) {
    289         if (REGISTER_MAP_VERIFY) {
    290             /*
    291              * Expand the compressed map we just created, and compare it
    292              * to the original.  Abort the VM if it doesn't match up.
    293              */
    294             RegisterMap* pUncompMap;
    295             pUncompMap = uncompressMapDifferential(pCompMap);
    296             if (pUncompMap == NULL) {
    297                 LOGE("Map failed to uncompress - %s.%s",
    298                     vdata->method->clazz->descriptor,
    299                     vdata->method->name);
    300                 free(pCompMap);
    301                 /* bad - compression is broken or we're out of memory */
    302                 dvmAbort();
    303             } else {
    304                 if (compareMaps(pMap, pUncompMap) != 0) {
    305                     LOGE("Map comparison failed - %s.%s",
    306                         vdata->method->clazz->descriptor,
    307                         vdata->method->name);
    308                     free(pCompMap);
    309                     /* bad - compression is broken */
    310                     dvmAbort();
    311                 }
    312 
    313                 /* verify succeeded */
    314                 delete pUncompMap;
    315             }
    316         }
    317 
    318         if (REGISTER_MAP_VERBOSE) {
    319             LOGD("Good compress on %s.%s",
    320                 vdata->method->clazz->descriptor,
    321                 vdata->method->name);
    322         }
    323         free(pMap);
    324         pMap = pCompMap;
    325     } else {
    326         if (REGISTER_MAP_VERBOSE) {
    327             LOGD("Unable to compress %s.%s (ent=%d rw=%d)",
    328                 vdata->method->clazz->descriptor,
    329                 vdata->method->name,
    330                 dvmRegisterMapGetNumEntries(pMap),
    331                 dvmRegisterMapGetRegWidth(pMap));
    332         }
    333     }
    334 
    335     pResult = pMap;
    336 
    337 bail:
    338     return pResult;
    339 }
    340 
    341 /*
    342  * Release the storage held by a RegisterMap.
    343  */
    344 void dvmFreeRegisterMap(RegisterMap* pMap)
    345 {
    346     if (pMap == NULL)
    347         return;
    348 
    349     assert(dvmRegisterMapGetOnHeap(pMap));
    350     free(pMap);
    351 }
    352 
    353 /*
    354  * Determine if the RegType value is a reference type.
    355  *
    356  * Ordinarily we include kRegTypeZero in the "is it a reference"
    357  * check.  There's no value in doing so here, because we know
    358  * the register can't hold anything but zero.
    359  */
    360 static inline bool isReferenceType(RegType type)
    361 {
    362     return (type > kRegTypeMAX || type == kRegTypeUninit);
    363 }
    364 
    365 /*
    366  * Given a line of registers, output a bit vector that indicates whether
    367  * or not the register holds a reference type (which could be null).
    368  *
    369  * We use '1' to indicate it's a reference, '0' for anything else (numeric
    370  * value, uninitialized data, merge conflict).  Register 0 will be found
    371  * in the low bit of the first byte.
    372  */
    373 static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data)
    374 {
    375     u1 val = 0;
    376     int i;
    377 
    378     for (i = 0; i < insnRegCount; i++) {
    379         RegType type = *regs++;
    380         val >>= 1;
    381         if (isReferenceType(type))
    382             val |= 0x80;        /* set hi bit */
    383 
    384         if ((i & 0x07) == 7)
    385             *data++ = val;
    386     }
    387     if ((i & 0x07) != 0) {
    388         /* flush bits from last byte */
    389         val >>= 8 - (i & 0x07);
    390         *data++ = val;
    391     }
    392 }
    393 
    394 /*
    395  * Print the map as a series of binary strings.
    396  *
    397  * Pass in method->registersSize if known, or -1 if not.
    398  */
    399 static void dumpRegisterMap(const RegisterMap* pMap, int registersSize)
    400 {
    401     const u1* rawMap = pMap->data;
    402     const RegisterMapFormat format = dvmRegisterMapGetFormat(pMap);
    403     const int numEntries = dvmRegisterMapGetNumEntries(pMap);
    404     const int regWidth = dvmRegisterMapGetRegWidth(pMap);
    405     int addrWidth;
    406 
    407     switch (format) {
    408     case kRegMapFormatCompact8:
    409         addrWidth = 1;
    410         break;
    411     case kRegMapFormatCompact16:
    412         addrWidth = 2;
    413         break;
    414     default:
    415         /* can't happen */
    416         LOGE("Can only dump Compact8 / Compact16 maps (not %d)", format);
    417         return;
    418     }
    419 
    420     if (registersSize < 0)
    421         registersSize = 8 * regWidth;
    422     assert(registersSize <= regWidth * 8);
    423 
    424     int ent;
    425     for (ent = 0; ent < numEntries; ent++) {
    426         int i, addr;
    427 
    428         addr = *rawMap++;
    429         if (addrWidth > 1)
    430             addr |= (*rawMap++) << 8;
    431 
    432         const u1* dataStart = rawMap;
    433         u1 val = 0;
    434 
    435         /* create binary string */
    436         char outBuf[registersSize +1];
    437         for (i = 0; i < registersSize; i++) {
    438             val >>= 1;
    439             if ((i & 0x07) == 0)
    440                 val = *rawMap++;
    441 
    442             outBuf[i] = '0' + (val & 0x01);
    443         }
    444         outBuf[i] = '\0';
    445 
    446         /* back up and create hex dump */
    447         char hexBuf[regWidth * 3 +1];
    448         char* cp = hexBuf;
    449         rawMap = dataStart;
    450         for (i = 0; i < regWidth; i++) {
    451             sprintf(cp, " %02x", *rawMap++);
    452             cp += 3;
    453         }
    454         hexBuf[i * 3] = '\0';
    455 
    456         LOGD("  %04x %s %s", addr, outBuf, hexBuf);
    457     }
    458 }
    459 
    460 /*
    461  * Double-check the map.
    462  *
    463  * We run through all of the data in the map, and compare it to the original.
    464  * Only works on uncompressed data.
    465  */
    466 static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap)
    467 {
    468     const u1* rawMap = pMap->data;
    469     const RegisterMapFormat format = dvmRegisterMapGetFormat(pMap);
    470     const int numEntries = dvmRegisterMapGetNumEntries(pMap);
    471     int ent;
    472     bool dumpMap = false;
    473 
    474     if (false) {
    475         const char* cd = "Landroid/net/http/Request;";
    476         const char* mn = "readResponse";
    477         if (strcmp(vdata->method->clazz->descriptor, cd) == 0 &&
    478             strcmp(vdata->method->name, mn) == 0)
    479         {
    480             char* desc;
    481             desc = dexProtoCopyMethodDescriptor(&vdata->method->prototype);
    482             LOGI("Map for %s.%s %s", vdata->method->clazz->descriptor,
    483                 vdata->method->name, desc);
    484             free(desc);
    485 
    486             dumpMap = true;
    487         }
    488     }
    489 
    490     if ((vdata->method->registersSize + 7) / 8 != pMap->regWidth) {
    491         LOGE("GLITCH: registersSize=%d, regWidth=%d",
    492             vdata->method->registersSize, pMap->regWidth);
    493         return false;
    494     }
    495 
    496     for (ent = 0; ent < numEntries; ent++) {
    497         int addr;
    498 
    499         switch (format) {
    500         case kRegMapFormatCompact8:
    501             addr = *rawMap++;
    502             break;
    503         case kRegMapFormatCompact16:
    504             addr = *rawMap++;
    505             addr |= (*rawMap++) << 8;
    506             break;
    507         default:
    508             /* shouldn't happen */
    509             LOGE("GLITCH: bad format (%d)", format);
    510             dvmAbort();
    511         }
    512 
    513         const RegType* regs = vdata->registerLines[addr].regTypes;
    514         if (regs == NULL) {
    515             LOGE("GLITCH: addr %d has no data", addr);
    516             return false;
    517         }
    518 
    519         u1 val = 0;
    520         int i;
    521 
    522         for (i = 0; i < vdata->method->registersSize; i++) {
    523             bool bitIsRef, regIsRef;
    524 
    525             val >>= 1;
    526             if ((i & 0x07) == 0) {
    527                 /* load next byte of data */
    528                 val = *rawMap++;
    529             }
    530 
    531             bitIsRef = val & 0x01;
    532 
    533             RegType type = regs[i];
    534             regIsRef = isReferenceType(type);
    535 
    536             if (bitIsRef != regIsRef) {
    537                 LOGE("GLITCH: addr %d reg %d: bit=%d reg=%d(%d)",
    538                     addr, i, bitIsRef, regIsRef, type);
    539                 return false;
    540             }
    541         }
    542 
    543         /* rawMap now points to the address field of the next entry */
    544     }
    545 
    546     if (dumpMap)
    547         dumpRegisterMap(pMap, vdata->method->registersSize);
    548 
    549     return true;
    550 }
    551 
    552 
    553 /*
    554  * ===========================================================================
    555  *      DEX generation & parsing
    556  * ===========================================================================
    557  */
    558 
    559 /*
    560  * Advance "ptr" to ensure 32-bit alignment.
    561  */
    562 static inline u1* align32(u1* ptr)
    563 {
    564     return (u1*) (((int) ptr + 3) & ~0x03);
    565 }
    566 
    567 /*
    568  * Compute the size, in bytes, of a register map.
    569  */
    570 static size_t computeRegisterMapSize(const RegisterMap* pMap)
    571 {
    572     static const int kHeaderSize = offsetof(RegisterMap, data);
    573     u1 format = dvmRegisterMapGetFormat(pMap);
    574     u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
    575 
    576     assert(pMap != NULL);
    577 
    578     switch (format) {
    579     case kRegMapFormatNone:
    580         return 1;
    581     case kRegMapFormatCompact8:
    582         return kHeaderSize + (1 + pMap->regWidth) * numEntries;
    583     case kRegMapFormatCompact16:
    584         return kHeaderSize + (2 + pMap->regWidth) * numEntries;
    585     case kRegMapFormatDifferential:
    586         {
    587             /* kHeaderSize + decoded ULEB128 length */
    588             const u1* ptr = pMap->data;
    589             int len = readUnsignedLeb128(&ptr);
    590             return len + (ptr - (u1*) pMap);
    591         }
    592     default:
    593         LOGE("Bad register map format %d", format);
    594         dvmAbort();
    595         return 0;
    596     }
    597 }
    598 
    599 /*
    600  * Output the map for a single method, if it has one.
    601  *
    602  * Abstract and native methods have no map.  All others are expected to
    603  * have one, since we know the class verified successfully.
    604  *
    605  * This strips the "allocated on heap" flag from the format byte, so that
    606  * direct-mapped maps are correctly identified as such.
    607  */
    608 static bool writeMapForMethod(const Method* meth, u1** pPtr)
    609 {
    610     if (meth->registerMap == NULL) {
    611         if (!dvmIsAbstractMethod(meth) && !dvmIsNativeMethod(meth)) {
    612             LOGW("Warning: no map available for %s.%s",
    613                 meth->clazz->descriptor, meth->name);
    614             /* weird, but keep going */
    615         }
    616         *(*pPtr)++ = kRegMapFormatNone;
    617         return true;
    618     }
    619 
    620     /* serialize map into the buffer */
    621     size_t mapSize = computeRegisterMapSize(meth->registerMap);
    622     memcpy(*pPtr, meth->registerMap, mapSize);
    623 
    624     /* strip the "on heap" flag out of the format byte, which is always first */
    625     assert(**pPtr == meth->registerMap->format);
    626     **pPtr &= ~(kRegMapFormatOnHeap);
    627 
    628     *pPtr += mapSize;
    629 
    630     return true;
    631 }
    632 
    633 /*
    634  * Write maps for all methods in the specified class to the buffer, which
    635  * can hold at most "length" bytes.  "*pPtr" will be advanced past the end
    636  * of the data we write.
    637  */
    638 static bool writeMapsAllMethods(DvmDex* pDvmDex, const ClassObject* clazz,
    639     u1** pPtr, size_t length)
    640 {
    641     RegisterMapMethodPool* pMethodPool;
    642     u1* ptr = *pPtr;
    643     int i, methodCount;
    644 
    645     /* artificial limit */
    646     if (clazz->virtualMethodCount + clazz->directMethodCount >= 65536) {
    647         LOGE("Too many methods in %s", clazz->descriptor);
    648         return false;
    649     }
    650 
    651     pMethodPool = (RegisterMapMethodPool*) ptr;
    652     ptr += offsetof(RegisterMapMethodPool, methodData);
    653     methodCount = 0;
    654 
    655     /*
    656      * Run through all methods, direct then virtual.  The class loader will
    657      * traverse them in the same order.  (We could split them into two
    658      * distinct pieces, but there doesn't appear to be any value in doing
    659      * so other than that it makes class loading slightly less fragile.)
    660      *
    661      * The class loader won't know about miranda methods at the point
    662      * where it parses this, so we omit those.
    663      *
    664      * TODO: consider omitting all native/abstract definitions.  Should be
    665      * safe, though we lose the ability to sanity-check against the
    666      * method counts in the DEX file.
    667      */
    668     for (i = 0; i < clazz->directMethodCount; i++) {
    669         const Method* meth = &clazz->directMethods[i];
    670         if (dvmIsMirandaMethod(meth))
    671             continue;
    672         if (!writeMapForMethod(&clazz->directMethods[i], &ptr)) {
    673             return false;
    674         }
    675         methodCount++;
    676         //ptr = align32(ptr);
    677     }
    678 
    679     for (i = 0; i < clazz->virtualMethodCount; i++) {
    680         const Method* meth = &clazz->virtualMethods[i];
    681         if (dvmIsMirandaMethod(meth))
    682             continue;
    683         if (!writeMapForMethod(&clazz->virtualMethods[i], &ptr)) {
    684             return false;
    685         }
    686         methodCount++;
    687         //ptr = align32(ptr);
    688     }
    689 
    690     pMethodPool->methodCount = methodCount;
    691 
    692     *pPtr = ptr;
    693     return true;
    694 }
    695 
    696 /*
    697  * Write maps for all classes to the specified buffer, which can hold at
    698  * most "length" bytes.
    699  *
    700  * Returns the actual length used, or 0 on failure.
    701  */
    702 static size_t writeMapsAllClasses(DvmDex* pDvmDex, u1* basePtr, size_t length)
    703 {
    704     DexFile* pDexFile = pDvmDex->pDexFile;
    705     u4 count = pDexFile->pHeader->classDefsSize;
    706     RegisterMapClassPool* pClassPool;
    707     u4* offsetTable;
    708     u1* ptr = basePtr;
    709     u4 idx;
    710 
    711     assert(gDvm.optimizing);
    712 
    713     pClassPool = (RegisterMapClassPool*) ptr;
    714     ptr += offsetof(RegisterMapClassPool, classDataOffset);
    715     offsetTable = (u4*) ptr;
    716     ptr += count * sizeof(u4);
    717 
    718     pClassPool->numClasses = count;
    719 
    720     /*
    721      * We want an entry for every class, loaded or not.
    722      */
    723     for (idx = 0; idx < count; idx++) {
    724         const DexClassDef* pClassDef;
    725         const char* classDescriptor;
    726         ClassObject* clazz;
    727 
    728         pClassDef = dexGetClassDef(pDexFile, idx);
    729         classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
    730 
    731         /*
    732          * All classes have been loaded into the bootstrap class loader.
    733          * If we can find it, and it was successfully pre-verified, we
    734          * run through its methods and add the register maps.
    735          *
    736          * If it wasn't pre-verified then we know it can't have any
    737          * register maps.  Classes that can't be loaded or failed
    738          * verification get an empty slot in the index.
    739          */
    740         clazz = NULL;
    741         if ((pClassDef->accessFlags & CLASS_ISPREVERIFIED) != 0)
    742             clazz = dvmLookupClass(classDescriptor, NULL, false);
    743 
    744         if (clazz != NULL) {
    745             offsetTable[idx] = ptr - basePtr;
    746             LOGVV("%d -> offset %d (%p-%p)",
    747                 idx, offsetTable[idx], ptr, basePtr);
    748 
    749             if (!writeMapsAllMethods(pDvmDex, clazz, &ptr,
    750                     length - (ptr - basePtr)))
    751             {
    752                 return 0;
    753             }
    754 
    755             ptr = align32(ptr);
    756             LOGVV("Size %s (%d+%d methods): %d", clazz->descriptor,
    757                 clazz->directMethodCount, clazz->virtualMethodCount,
    758                 (ptr - basePtr) - offsetTable[idx]);
    759         } else {
    760             LOGV("%4d NOT mapadding '%s'", idx, classDescriptor);
    761             assert(offsetTable[idx] == 0);
    762         }
    763     }
    764 
    765     if (ptr - basePtr >= (int)length) {
    766         /* a bit late */
    767         LOGE("Buffer overrun");
    768         dvmAbort();
    769     }
    770 
    771     return ptr - basePtr;
    772 }
    773 
    774 /*
    775  * Generate a register map set for all verified classes in "pDvmDex".
    776  */
    777 RegisterMapBuilder* dvmGenerateRegisterMaps(DvmDex* pDvmDex)
    778 {
    779     RegisterMapBuilder* pBuilder;
    780 
    781     pBuilder = (RegisterMapBuilder*) calloc(1, sizeof(RegisterMapBuilder));
    782     if (pBuilder == NULL)
    783         return NULL;
    784 
    785     /*
    786      * We have a couple of options here:
    787      *  (1) Compute the size of the output, and malloc a buffer.
    788      *  (2) Create a "large-enough" anonymous mmap region.
    789      *
    790      * The nice thing about option #2 is that we don't have to traverse
    791      * all of the classes and methods twice.  The risk is that we might
    792      * not make the region large enough.  Since the pages aren't mapped
    793      * until used we can allocate a semi-absurd amount of memory without
    794      * worrying about the effect on the rest of the system.
    795      *
    796      * The basic encoding on the largest jar file requires about 1MB of
    797      * storage.  We map out 4MB here.  (TODO: guarantee that the last
    798      * page of the mapping is marked invalid, so we reliably fail if
    799      * we overrun.)
    800      */
    801     if (sysCreatePrivateMap(4 * 1024 * 1024, &pBuilder->memMap) != 0) {
    802         free(pBuilder);
    803         return NULL;
    804     }
    805 
    806     /*
    807      * Create the maps.
    808      */
    809     size_t actual = writeMapsAllClasses(pDvmDex, (u1*)pBuilder->memMap.addr,
    810                                         pBuilder->memMap.length);
    811     if (actual == 0) {
    812         dvmFreeRegisterMapBuilder(pBuilder);
    813         return NULL;
    814     }
    815 
    816     LOGV("TOTAL size of register maps: %d", actual);
    817 
    818     pBuilder->data = pBuilder->memMap.addr;
    819     pBuilder->size = actual;
    820     return pBuilder;
    821 }
    822 
    823 /*
    824  * Free the builder.
    825  */
    826 void dvmFreeRegisterMapBuilder(RegisterMapBuilder* pBuilder)
    827 {
    828     if (pBuilder == NULL)
    829         return;
    830 
    831     sysReleaseShmem(&pBuilder->memMap);
    832     free(pBuilder);
    833 }
    834 
    835 
    836 /*
    837  * Find the data for the specified class.
    838  *
    839  * If there's no register map data, or none for this class, we return NULL.
    840  */
    841 const void* dvmRegisterMapGetClassData(const DexFile* pDexFile, u4 classIdx,
    842     u4* pNumMaps)
    843 {
    844     const RegisterMapClassPool* pClassPool;
    845     const RegisterMapMethodPool* pMethodPool;
    846 
    847     pClassPool = (const RegisterMapClassPool*) pDexFile->pRegisterMapPool;
    848     if (pClassPool == NULL)
    849         return NULL;
    850 
    851     if (classIdx >= pClassPool->numClasses) {
    852         LOGE("bad class index (%d vs %d)", classIdx, pClassPool->numClasses);
    853         dvmAbort();
    854     }
    855 
    856     u4 classOffset = pClassPool->classDataOffset[classIdx];
    857     if (classOffset == 0) {
    858         LOGV("+++ no map for classIdx=%d", classIdx);
    859         return NULL;
    860     }
    861 
    862     pMethodPool =
    863         (const RegisterMapMethodPool*) (((u1*) pClassPool) + classOffset);
    864     if (pNumMaps != NULL)
    865         *pNumMaps = pMethodPool->methodCount;
    866     return pMethodPool->methodData;
    867 }
    868 
    869 /*
    870  * This advances "*pPtr" and returns its original value.
    871  */
    872 const RegisterMap* dvmRegisterMapGetNext(const void** pPtr)
    873 {
    874     const RegisterMap* pMap = (const RegisterMap*) *pPtr;
    875 
    876     *pPtr = /*align32*/(((u1*) pMap) + computeRegisterMapSize(pMap));
    877     LOGVV("getNext: %p -> %p (f=%#x w=%d e=%d)",
    878         pMap, *pPtr, pMap->format, pMap->regWidth,
    879         dvmRegisterMapGetNumEntries(pMap));
    880     return pMap;
    881 }
    882 
    883 
    884 /*
    885  * ===========================================================================
    886  *      Utility functions
    887  * ===========================================================================
    888  */
    889 
    890 /*
    891  * Return the data for the specified address, or NULL if not found.
    892  *
    893  * The result must be released with dvmReleaseRegisterMapLine().
    894  */
    895 const u1* dvmRegisterMapGetLine(const RegisterMap* pMap, int addr)
    896 {
    897     int addrWidth, lineWidth;
    898     u1 format = dvmRegisterMapGetFormat(pMap);
    899     u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
    900 
    901     assert(numEntries > 0);
    902 
    903     switch (format) {
    904     case kRegMapFormatNone:
    905         return NULL;
    906     case kRegMapFormatCompact8:
    907         addrWidth = 1;
    908         break;
    909     case kRegMapFormatCompact16:
    910         addrWidth = 2;
    911         break;
    912     default:
    913         LOGE("Unknown format %d", format);
    914         dvmAbort();
    915         return NULL;
    916     }
    917 
    918     lineWidth = addrWidth + pMap->regWidth;
    919 
    920     /*
    921      * Find the appropriate entry.  Many maps are very small, some are very
    922      * large.
    923      */
    924     static const int kSearchThreshold = 8;
    925     const u1* data = NULL;
    926     int lineAddr;
    927 
    928     if (numEntries < kSearchThreshold) {
    929         int i;
    930         data = pMap->data;
    931         for (i = numEntries; i > 0; i--) {
    932             lineAddr = data[0];
    933             if (addrWidth > 1)
    934                 lineAddr |= data[1] << 8;
    935             if (lineAddr == addr)
    936                 return data + addrWidth;
    937 
    938             data += lineWidth;
    939         }
    940         assert(data == pMap->data + lineWidth * numEntries);
    941     } else {
    942         int hi, lo, mid;
    943 
    944         lo = 0;
    945         hi = numEntries -1;
    946 
    947         while (hi >= lo) {
    948             mid = (hi + lo) / 2;
    949             data = pMap->data + lineWidth * mid;
    950 
    951             lineAddr = data[0];
    952             if (addrWidth > 1)
    953                 lineAddr |= data[1] << 8;
    954 
    955             if (addr > lineAddr) {
    956                 lo = mid + 1;
    957             } else if (addr < lineAddr) {
    958                 hi = mid - 1;
    959             } else {
    960                 return data + addrWidth;
    961             }
    962         }
    963     }
    964 
    965     return NULL;
    966 }
    967 
    968 /*
    969  * Compare two register maps.
    970  *
    971  * Returns 0 if they're equal, nonzero if not.
    972  */
    973 static int compareMaps(const RegisterMap* pMap1, const RegisterMap* pMap2)
    974 {
    975     size_t size1, size2;
    976 
    977     size1 = computeRegisterMapSize(pMap1);
    978     size2 = computeRegisterMapSize(pMap2);
    979     if (size1 != size2) {
    980         LOGI("compareMaps: size mismatch (%zd vs %zd)", size1, size2);
    981         return -1;
    982     }
    983 
    984     if (memcmp(pMap1, pMap2, size1) != 0) {
    985         LOGI("compareMaps: content mismatch");
    986         return -1;
    987     }
    988 
    989     return 0;
    990 }
    991 
    992 
    993 /*
    994  * Get the expanded form of the register map associated with the method.
    995  *
    996  * If the map is already in one of the uncompressed formats, we return
    997  * immediately.  Otherwise, we expand the map and replace method's register
    998  * map pointer, freeing it if it was allocated on the heap.
    999  *
   1000  * NOTE: this function is not synchronized; external locking is mandatory
   1001  * (unless we're in the zygote, where single-threaded access is guaranteed).
   1002  */
   1003 const RegisterMap* dvmGetExpandedRegisterMap0(Method* method)
   1004 {
   1005     const RegisterMap* curMap = method->registerMap;
   1006     RegisterMap* newMap;
   1007 
   1008     if (curMap == NULL)
   1009         return NULL;
   1010 
   1011     /* sanity check to ensure this isn't called w/o external locking */
   1012     /* (if we use this at a time other than during GC, fix/remove this test) */
   1013     if (true) {
   1014         if (!gDvm.zygote && dvmTryLockMutex(&gDvm.gcHeapLock) == 0) {
   1015             LOGE("GLITCH: dvmGetExpandedRegisterMap not called at GC time");
   1016             dvmAbort();
   1017         }
   1018     }
   1019 
   1020     RegisterMapFormat format = dvmRegisterMapGetFormat(curMap);
   1021     switch (format) {
   1022     case kRegMapFormatCompact8:
   1023     case kRegMapFormatCompact16:
   1024         if (REGISTER_MAP_VERBOSE) {
   1025             if (dvmRegisterMapGetOnHeap(curMap)) {
   1026                 LOGD("RegMap: already expanded: %s.%s",
   1027                     method->clazz->descriptor, method->name);
   1028             } else {
   1029                 LOGD("RegMap: stored w/o compression: %s.%s",
   1030                     method->clazz->descriptor, method->name);
   1031             }
   1032         }
   1033         return curMap;
   1034     case kRegMapFormatDifferential:
   1035         newMap = uncompressMapDifferential(curMap);
   1036         break;
   1037     default:
   1038         LOGE("Unknown format %d in dvmGetExpandedRegisterMap", format);
   1039         dvmAbort();
   1040         newMap = NULL;      // make gcc happy
   1041     }
   1042 
   1043     if (newMap == NULL) {
   1044         LOGE("Map failed to uncompress (fmt=%d) %s.%s",
   1045             format, method->clazz->descriptor, method->name);
   1046         return NULL;
   1047     }
   1048 
   1049 #ifdef REGISTER_MAP_STATS
   1050     /*
   1051      * Gather and display some stats.
   1052      */
   1053     {
   1054         MapStats* pStats = (MapStats*) gDvm.registerMapStats;
   1055         pStats->numExpandedMaps++;
   1056         pStats->totalExpandedMapSize += computeRegisterMapSize(newMap);
   1057         LOGD("RMAP: count=%d size=%d",
   1058             pStats->numExpandedMaps, pStats->totalExpandedMapSize);
   1059     }
   1060 #endif
   1061 
   1062     IF_LOGV() {
   1063         char* desc = dexProtoCopyMethodDescriptor(&method->prototype);
   1064         LOGV("Expanding map -> %s.%s:%s",
   1065             method->clazz->descriptor, method->name, desc);
   1066         free(desc);
   1067     }
   1068 
   1069     /*
   1070      * Update method, and free compressed map if it was sitting on the heap.
   1071      */
   1072     dvmSetRegisterMap(method, newMap);
   1073 
   1074     if (dvmRegisterMapGetOnHeap(curMap))
   1075         dvmFreeRegisterMap((RegisterMap*) curMap);
   1076 
   1077     return newMap;
   1078 }
   1079 
   1080 
   1081 /*
   1082  * ===========================================================================
   1083  *      Map compression
   1084  * ===========================================================================
   1085  */
   1086 
   1087 /*
   1088 Notes on map compression
   1089 
   1090 The idea is to create a compressed form that will be uncompressed before
   1091 use, with the output possibly saved in a cache.  This means we can use an
   1092 approach that is unsuited for random access if we choose.
   1093 
   1094 In the event that a map simply does not work with our compression scheme,
   1095 it's reasonable to store the map without compression.  In the future we
   1096 may want to have more than one compression scheme, and try each in turn,
   1097 retaining the best.  (We certainly want to keep the uncompressed form if it
   1098 turns out to be smaller or even slightly larger than the compressed form.)
   1099 
   1100 Each entry consists of an address and a bit vector.  Adjacent entries are
   1101 strongly correlated, suggesting differential encoding.
   1102 
   1103 
   1104 Ideally we would avoid outputting adjacent entries with identical
   1105 bit vectors.  However, the register values at a given address do not
   1106 imply anything about the set of valid registers at subsequent addresses.
   1107 We therefore cannot omit an entry.
   1108 
   1109   If the thread stack has a PC at an address without a corresponding
   1110   entry in the register map, we must conservatively scan the registers in
   1111   that thread.  This can happen when single-stepping in the debugger,
   1112   because the debugger is allowed to invoke arbitrary methods when
   1113   a thread is stopped at a breakpoint.  If we can guarantee that a GC
   1114   thread scan will never happen while the debugger has that thread stopped,
   1115   then we can lift this restriction and simply omit entries that don't
   1116   change the bit vector from its previous state.
   1117 
   1118 Each entry advances the address value by at least 1 (measured in 16-bit
   1119 "code units").  Looking at the bootclasspath entries, advancing by 2 units
   1120 is most common.  Advances by 1 unit are far less common than advances by
   1121 2 units, but more common than 5, and things fall off rapidly.  Gaps of
   1122 up to 220 code units appear in some computationally intensive bits of code,
   1123 but are exceedingly rare.
   1124 
   1125 If we sum up the number of transitions in a couple of ranges in framework.jar:
   1126   [1,4]: 188998 of 218922 gaps (86.3%)
   1127   [1,7]: 211647 of 218922 gaps (96.7%)
   1128 Using a 3-bit delta, with one value reserved as an escape code, should
   1129 yield good results for the address.
   1130 
   1131 These results would change dramatically if we reduced the set of GC
   1132 points by e.g. removing instructions like integer divide that are only
   1133 present because they can throw and cause an allocation.
   1134 
   1135 We also need to include an "initial gap", because the first few instructions
   1136 in a method may not be GC points.
   1137 
   1138 
   1139 By observation, many entries simply repeat the previous bit vector, or
   1140 change only one or two bits.  (This is with type-precise information;
   1141 the rate of change of bits will be different if live-precise information
   1142 is factored in).
   1143 
   1144 Looking again at adjacent entries in framework.jar:
   1145   0 bits changed: 63.0%
   1146   1 bit changed: 32.2%
   1147 After that it falls off rapidly, e.g. the number of entries with 2 bits
   1148 changed is usually less than 1/10th of the number of entries with 1 bit
   1149 changed.  A solution that allows us to encode 0- or 1- bit changes
   1150 efficiently will do well.
   1151 
   1152 We still need to handle cases where a large number of bits change.  We
   1153 probably want a way to drop in a full copy of the bit vector when it's
   1154 smaller than the representation of multiple bit changes.
   1155 
   1156 
   1157 The bit-change information can be encoded as an index that tells the
   1158 decoder to toggle the state.  We want to encode the index in as few bits
   1159 as possible, but we need to allow for fairly wide vectors (e.g. we have a
   1160 method with 175 registers).  We can deal with this in a couple of ways:
   1161 (1) use an encoding that assumes few registers and has an escape code
   1162 for larger numbers of registers; or (2) use different encodings based
   1163 on how many total registers the method has.  The choice depends to some
   1164 extent on whether methods with large numbers of registers tend to modify
   1165 the first 16 regs more often than the others.
   1166 
   1167 The last N registers hold method arguments.  If the bytecode is expected
   1168 to be examined in a debugger, "dx" ensures that the contents of these
   1169 registers won't change.  Depending upon the encoding format, we may be
   1170 able to take advantage of this.  We still have to encode the initial
   1171 state, but we know we'll never have to output a bit change for the last
   1172 N registers.
   1173 
   1174 Considering only methods with 16 or more registers, the "target octant"
   1175 for register changes looks like this:
   1176   [ 43.1%, 16.4%, 6.5%, 6.2%, 7.4%, 8.8%, 9.7%, 1.8% ]
   1177 As expected, there are fewer changes at the end of the list where the
   1178 arguments are kept, and more changes at the start of the list because
   1179 register values smaller than 16 can be used in compact Dalvik instructions
   1180 and hence are favored for frequently-used values.  In general, the first
   1181 octant is considerably more active than later entries, the last octant
   1182 is much less active, and the rest are all about the same.
   1183 
   1184 Looking at all bit changes in all methods, 94% are to registers 0-15.  The
   1185 encoding will benefit greatly by favoring the low-numbered registers.
   1186 
   1187 
   1188 Some of the smaller methods have identical maps, and space could be
   1189 saved by simply including a pointer to an earlier definition.  This would
   1190 be best accomplished by specifying a "pointer" format value, followed by
   1191 a 3-byte (or ULEB128) offset.  Implementing this would probably involve
   1192 generating a hash value for each register map and maintaining a hash table.
   1193 
   1194 In some cases there are repeating patterns in the bit vector that aren't
   1195 adjacent.  These could benefit from a dictionary encoding.  This doesn't
   1196 really become useful until the methods reach a certain size though,
   1197 and managing the dictionary may incur more overhead than we want.
   1198 
   1199 Large maps can be compressed significantly.  The trouble is that, when
   1200 we need to use them, we have to uncompress them onto the heap.  We may
   1201 get a better trade-off between storage size and heap usage by refusing to
   1202 compress large maps, so that they can be memory mapped and used directly.
   1203 (OTOH, only about 2% of the maps will ever actually be used.)
   1204 
   1205 
   1206 ----- differential format -----
   1207 
   1208 // common header
   1209 +00 1B format
   1210 +01 1B regWidth
   1211 +02 2B numEntries (little-endian)
   1212 +04 nB length in bytes of the data that follows, in ULEB128 format
   1213        (not strictly necessary; allows determination of size w/o full parse)
   1214 +05+ 1B initial address (0-127), high bit set if max addr >= 256
   1215 +06+ nB initial value for bit vector
   1216 
   1217 // for each entry
   1218 +00: CCCCBAAA
   1219 
   1220   AAA: address difference.  Values from 0 to 6 indicate an increment of 1
   1221   to 7.  A value of 7 indicates that the address difference is large,
   1222   and the next byte is a ULEB128-encoded difference value.
   1223 
   1224   B: determines the meaning of CCCC.
   1225 
   1226   CCCC: if B is 0, this is the number of the bit to toggle (0-15).
   1227   If B is 1, this is a count of the number of changed bits (1-14).  A value
   1228   of 0 means that no bits were changed, and a value of 15 indicates
   1229   that enough bits were changed that it required less space to output
   1230   the entire bit vector.
   1231 
   1232 +01: (optional) ULEB128-encoded address difference
   1233 
   1234 +01+: (optional) one or more ULEB128-encoded bit numbers, OR the entire
   1235   bit vector.
   1236 
   1237 The most common situation is an entry whose address has changed by 2-4
   1238 code units, has no changes or just a single bit change, and the changed
   1239 register is less than 16.  We should therefore be able to encode a large
   1240 number of entries with a single byte, which is half the size of the
   1241 Compact8 encoding method.
   1242 */
   1243 
   1244 /*
   1245  * Compute some stats on an uncompressed register map.
   1246  */
   1247 #ifdef REGISTER_MAP_STATS
   1248 static void computeMapStats(RegisterMap* pMap, const Method* method)
   1249 {
   1250     MapStats* pStats = (MapStats*) gDvm.registerMapStats;
   1251     const u1 format = dvmRegisterMapGetFormat(pMap);
   1252     const u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
   1253     const u1* rawMap = pMap->data;
   1254     const u1* prevData = NULL;
   1255     int ent, addr, prevAddr = -1;
   1256 
   1257     for (ent = 0; ent < numEntries; ent++) {
   1258         switch (format) {
   1259         case kRegMapFormatCompact8:
   1260             addr = *rawMap++;
   1261             break;
   1262         case kRegMapFormatCompact16:
   1263             addr = *rawMap++;
   1264             addr |= (*rawMap++) << 8;
   1265             break;
   1266         default:
   1267             /* shouldn't happen */
   1268             LOGE("GLITCH: bad format (%d)", format);
   1269             dvmAbort();
   1270         }
   1271 
   1272         const u1* dataStart = rawMap;
   1273 
   1274         pStats->totalGcPointCount++;
   1275 
   1276         /*
   1277          * Gather "gap size" stats, i.e. the difference in addresses between
   1278          * successive GC points.
   1279          */
   1280         if (prevData != NULL) {
   1281             assert(prevAddr >= 0);
   1282             int addrDiff = addr - prevAddr;
   1283 
   1284             if (addrDiff < 0) {
   1285                 LOGE("GLITCH: address went backward (0x%04x->0x%04x, %s.%s)",
   1286                     prevAddr, addr, method->clazz->descriptor, method->name);
   1287             } else if (addrDiff > kMaxGcPointGap) {
   1288                 if (REGISTER_MAP_VERBOSE) {
   1289                     LOGI("HEY: addrDiff is %d, max %d (0x%04x->0x%04x %s.%s)",
   1290                         addrDiff, kMaxGcPointGap, prevAddr, addr,
   1291                         method->clazz->descriptor, method->name);
   1292                 }
   1293                 /* skip this one */
   1294             } else {
   1295                 pStats->gcPointGap[addrDiff]++;
   1296             }
   1297             pStats->gcGapCount++;
   1298 
   1299 
   1300             /*
   1301              * Compare bit vectors in adjacent entries.  We want to count
   1302              * up the number of bits that differ (to see if we frequently
   1303              * change 0 or 1 bits) and get a sense for which part of the
   1304              * vector changes the most often (near the start, middle, end).
   1305              *
   1306              * We only do the vector position quantization if we have at
   1307              * least 16 registers in the method.
   1308              */
   1309             int numDiff = 0;
   1310             float div = (float) kNumUpdatePosns / method->registersSize;
   1311             int regByte;
   1312             for (regByte = 0; regByte < pMap->regWidth; regByte++) {
   1313                 int prev, cur, bit;
   1314 
   1315                 prev = prevData[regByte];
   1316                 cur = dataStart[regByte];
   1317 
   1318                 for (bit = 0; bit < 8; bit++) {
   1319                     if (((prev >> bit) & 1) != ((cur >> bit) & 1)) {
   1320                         numDiff++;
   1321 
   1322                         int bitNum = regByte * 8 + bit;
   1323 
   1324                         if (bitNum < 16)
   1325                             pStats->updateLT16++;
   1326                         else
   1327                             pStats->updateGE16++;
   1328 
   1329                         if (method->registersSize < 16)
   1330                             continue;
   1331 
   1332                         if (bitNum >= method->registersSize) {
   1333                             /* stuff off the end should be zero in both */
   1334                             LOGE("WEIRD: bit=%d (%d/%d), prev=%02x cur=%02x",
   1335                                 bit, regByte, method->registersSize,
   1336                                 prev, cur);
   1337                             assert(false);
   1338                         }
   1339                         int idx = (int) (bitNum * div);
   1340                         if (!(idx >= 0 && idx < kNumUpdatePosns)) {
   1341                             LOGE("FAIL: bitNum=%d (of %d) div=%.3f idx=%d",
   1342                                 bitNum, method->registersSize, div, idx);
   1343                             assert(false);
   1344                         }
   1345                         pStats->updatePosn[idx]++;
   1346                     }
   1347                 }
   1348             }
   1349 
   1350             if (numDiff > kMaxDiffBits) {
   1351                 if (REGISTER_MAP_VERBOSE) {
   1352                     LOGI("WOW: numDiff is %d, max %d", numDiff, kMaxDiffBits);
   1353                 }
   1354             } else {
   1355                 pStats->numDiffBits[numDiff]++;
   1356             }
   1357         }
   1358 
   1359         /* advance to start of next line */
   1360         rawMap += pMap->regWidth;
   1361 
   1362         prevAddr = addr;
   1363         prevData = dataStart;
   1364     }
   1365 }
   1366 #endif
   1367 
   1368 /*
   1369  * Compute the difference between two bit vectors.
   1370  *
   1371  * If "lebOutBuf" is non-NULL, we output the bit indices in ULEB128 format
   1372  * as we go.  Otherwise, we just generate the various counts.
   1373  *
   1374  * The bit vectors are compared byte-by-byte, so any unused bits at the
   1375  * end must be zero.
   1376  *
   1377  * Returns the number of bytes required to hold the ULEB128 output.
   1378  *
   1379  * If "pFirstBitChanged" or "pNumBitsChanged" are non-NULL, they will
   1380  * receive the index of the first changed bit and the number of changed
   1381  * bits, respectively.
   1382  */
   1383 static int computeBitDiff(const u1* bits1, const u1* bits2, int byteWidth,
   1384     int* pFirstBitChanged, int* pNumBitsChanged, u1* lebOutBuf)
   1385 {
   1386     int numBitsChanged = 0;
   1387     int firstBitChanged = -1;
   1388     int lebSize = 0;
   1389     int byteNum;
   1390 
   1391     /*
   1392      * Run through the vectors, first comparing them at the byte level.  This
   1393      * will yield a fairly quick result if nothing has changed between them.
   1394      */
   1395     for (byteNum = 0; byteNum < byteWidth; byteNum++) {
   1396         u1 byte1 = *bits1++;
   1397         u1 byte2 = *bits2++;
   1398         if (byte1 != byte2) {
   1399             /*
   1400              * Walk through the byte, identifying the changed bits.
   1401              */
   1402             int bitNum;
   1403             for (bitNum = 0; bitNum < 8; bitNum++) {
   1404                 if (((byte1 >> bitNum) & 0x01) != ((byte2 >> bitNum) & 0x01)) {
   1405                     int bitOffset = (byteNum << 3) + bitNum;
   1406 
   1407                     if (firstBitChanged < 0)
   1408                         firstBitChanged = bitOffset;
   1409                     numBitsChanged++;
   1410 
   1411                     if (lebOutBuf == NULL) {
   1412                         lebSize += unsignedLeb128Size(bitOffset);
   1413                     } else {
   1414                         u1* curBuf = lebOutBuf;
   1415                         lebOutBuf = writeUnsignedLeb128(lebOutBuf, bitOffset);
   1416                         lebSize += lebOutBuf - curBuf;
   1417                     }
   1418                 }
   1419             }
   1420         }
   1421     }
   1422 
   1423     if (numBitsChanged > 0)
   1424         assert(firstBitChanged >= 0);
   1425 
   1426     if (pFirstBitChanged != NULL)
   1427         *pFirstBitChanged = firstBitChanged;
   1428     if (pNumBitsChanged != NULL)
   1429         *pNumBitsChanged = numBitsChanged;
   1430 
   1431     return lebSize;
   1432 }
   1433 
   1434 /*
   1435  * Compress the register map with differential encoding.
   1436  *
   1437  * "meth" is only needed for debug output.
   1438  *
   1439  * On success, returns a newly-allocated RegisterMap.  If the map is not
   1440  * compatible for some reason, or fails to get smaller, this will return NULL.
   1441  */
   1442 static RegisterMap* compressMapDifferential(const RegisterMap* pMap,
   1443     const Method* meth)
   1444 {
   1445     RegisterMap* pNewMap = NULL;
   1446     int origSize = computeRegisterMapSize(pMap);
   1447     u1* tmpPtr;
   1448     int addrWidth, regWidth, numEntries;
   1449     bool debug = false;
   1450 
   1451     if (false &&
   1452         strcmp(meth->clazz->descriptor, "Landroid/text/StaticLayout;") == 0 &&
   1453         strcmp(meth->name, "generate") == 0)
   1454     {
   1455         debug = true;
   1456     }
   1457 
   1458     u1 format = dvmRegisterMapGetFormat(pMap);
   1459     switch (format) {
   1460     case kRegMapFormatCompact8:
   1461         addrWidth = 1;
   1462         break;
   1463     case kRegMapFormatCompact16:
   1464         addrWidth = 2;
   1465         break;
   1466     default:
   1467         LOGE("ERROR: can't compress map with format=%d", format);
   1468         return NULL;
   1469     }
   1470 
   1471     regWidth = dvmRegisterMapGetRegWidth(pMap);
   1472     numEntries = dvmRegisterMapGetNumEntries(pMap);
   1473 
   1474     if (debug) {
   1475         LOGI("COMPRESS: %s.%s aw=%d rw=%d ne=%d",
   1476             meth->clazz->descriptor, meth->name,
   1477             addrWidth, regWidth, numEntries);
   1478         dumpRegisterMap(pMap, -1);
   1479     }
   1480 
   1481     if (numEntries <= 1) {
   1482         LOGV("Can't compress map with 0 or 1 entries");
   1483         return NULL;
   1484     }
   1485 
   1486     /*
   1487      * We don't know how large the compressed data will be.  It's possible
   1488      * for it to expand and become larger than the original.  The header
   1489      * itself is variable-sized, so we generate everything into a temporary
   1490      * buffer and then copy it to form-fitting storage once we know how big
   1491      * it will be (and that it's smaller than the original).
   1492      *
   1493      * If we use a size that is equal to the size of the input map plus
   1494      * a value longer than a single entry can possibly expand to, we need
   1495      * only check for overflow at the end of each entry.  The worst case
   1496      * for a single line is (1 + <ULEB8 address> + <full copy of vector>).
   1497      * Addresses are 16 bits, so that's (1 + 3 + regWidth).
   1498      *
   1499      * The initial address offset and bit vector will take up less than
   1500      * or equal to the amount of space required when uncompressed -- large
   1501      * initial offsets are rejected.
   1502      */
   1503     UniquePtr<u1[]> tmpBuf(new u1[origSize + (1 + 3 + regWidth)]);
   1504     if (tmpBuf.get() == NULL)
   1505         return NULL;
   1506 
   1507     tmpPtr = tmpBuf.get();
   1508 
   1509     const u1* mapData = pMap->data;
   1510     const u1* prevBits;
   1511     u2 addr, prevAddr;
   1512 
   1513     addr = *mapData++;
   1514     if (addrWidth > 1)
   1515         addr |= (*mapData++) << 8;
   1516 
   1517     if (addr >= 128) {
   1518         LOGV("Can't compress map with starting address >= 128");
   1519         return NULL;
   1520     }
   1521 
   1522     /*
   1523      * Start by writing the initial address and bit vector data.  The high
   1524      * bit of the initial address is used to indicate the required address
   1525      * width (which the decoder can't otherwise determine without parsing
   1526      * the compressed data).
   1527      */
   1528     *tmpPtr++ = addr | (addrWidth > 1 ? 0x80 : 0x00);
   1529     memcpy(tmpPtr, mapData, regWidth);
   1530 
   1531     prevBits = mapData;
   1532     prevAddr = addr;
   1533 
   1534     tmpPtr += regWidth;
   1535     mapData += regWidth;
   1536 
   1537     /*
   1538      * Loop over all following entries.
   1539      */
   1540     for (int entry = 1; entry < numEntries; entry++) {
   1541         int addrDiff;
   1542         u1 key;
   1543 
   1544         /*
   1545          * Pull out the address and figure out how to encode it.
   1546          */
   1547         addr = *mapData++;
   1548         if (addrWidth > 1)
   1549             addr |= (*mapData++) << 8;
   1550 
   1551         if (debug)
   1552             LOGI(" addr=0x%04x ent=%d (aw=%d)", addr, entry, addrWidth);
   1553 
   1554         addrDiff = addr - prevAddr;
   1555         assert(addrDiff > 0);
   1556         if (addrDiff < 8) {
   1557             /* small difference, encode in 3 bits */
   1558             key = addrDiff -1;          /* set 00000AAA */
   1559             if (debug)
   1560                 LOGI(" : small %d, key=0x%02x", addrDiff, key);
   1561         } else {
   1562             /* large difference, output escape code */
   1563             key = 0x07;                 /* escape code for AAA */
   1564             if (debug)
   1565                 LOGI(" : large %d, key=0x%02x", addrDiff, key);
   1566         }
   1567 
   1568         int numBitsChanged, firstBitChanged, lebSize;
   1569 
   1570         lebSize = computeBitDiff(prevBits, mapData, regWidth,
   1571             &firstBitChanged, &numBitsChanged, NULL);
   1572 
   1573         if (debug) {
   1574             LOGI(" : diff fbc=%d nbc=%d ls=%d (rw=%d)",
   1575                 firstBitChanged, numBitsChanged, lebSize, regWidth);
   1576         }
   1577 
   1578         if (numBitsChanged == 0) {
   1579             /* set B to 1 and CCCC to zero to indicate no bits were changed */
   1580             key |= 0x08;
   1581             if (debug) LOGI(" : no bits changed");
   1582         } else if (numBitsChanged == 1 && firstBitChanged < 16) {
   1583             /* set B to 0 and CCCC to the index of the changed bit */
   1584             key |= firstBitChanged << 4;
   1585             if (debug) LOGI(" : 1 low bit changed");
   1586         } else if (numBitsChanged < 15 && lebSize < regWidth) {
   1587             /* set B to 1 and CCCC to the number of bits */
   1588             key |= 0x08 | (numBitsChanged << 4);
   1589             if (debug) LOGI(" : some bits changed");
   1590         } else {
   1591             /* set B to 1 and CCCC to 0x0f so we store the entire vector */
   1592             key |= 0x08 | 0xf0;
   1593             if (debug) LOGI(" : encode original");
   1594         }
   1595 
   1596         /*
   1597          * Encode output.  Start with the key, follow with the address
   1598          * diff (if it didn't fit in 3 bits), then the changed bit info.
   1599          */
   1600         *tmpPtr++ = key;
   1601         if ((key & 0x07) == 0x07)
   1602             tmpPtr = writeUnsignedLeb128(tmpPtr, addrDiff);
   1603 
   1604         if ((key & 0x08) != 0) {
   1605             int bitCount = key >> 4;
   1606             if (bitCount == 0) {
   1607                 /* nothing changed, no additional output required */
   1608             } else if (bitCount == 15) {
   1609                 /* full vector is most compact representation */
   1610                 memcpy(tmpPtr, mapData, regWidth);
   1611                 tmpPtr += regWidth;
   1612             } else {
   1613                 /* write bit indices in LEB128 format */
   1614                 (void) computeBitDiff(prevBits, mapData, regWidth,
   1615                     NULL, NULL, tmpPtr);
   1616                 tmpPtr += lebSize;
   1617             }
   1618         } else {
   1619             /* single-bit changed, value encoded in key byte */
   1620         }
   1621 
   1622         prevBits = mapData;
   1623         prevAddr = addr;
   1624         mapData += regWidth;
   1625 
   1626         /*
   1627          * See if we've run past the original size.
   1628          */
   1629         if (tmpPtr - tmpBuf.get() >= origSize) {
   1630             if (debug) {
   1631                 LOGD("Compressed size >= original (%d vs %d): %s.%s",
   1632                     tmpPtr - tmpBuf.get(), origSize,
   1633                     meth->clazz->descriptor, meth->name);
   1634             }
   1635             return NULL;
   1636         }
   1637     }
   1638 
   1639     /*
   1640      * Create a RegisterMap with the contents.
   1641      *
   1642      * TODO: consider using a threshold other than merely ">=".  We would
   1643      * get poorer compression but potentially use less native heap space.
   1644      */
   1645     static const int kHeaderSize = offsetof(RegisterMap, data);
   1646     int newDataSize = tmpPtr - tmpBuf.get();
   1647     int newMapSize;
   1648 
   1649     newMapSize = kHeaderSize + unsignedLeb128Size(newDataSize) + newDataSize;
   1650     if (newMapSize >= origSize) {
   1651         if (debug) {
   1652             LOGD("Final comp size >= original (%d vs %d): %s.%s",
   1653                 newMapSize, origSize, meth->clazz->descriptor, meth->name);
   1654         }
   1655         return NULL;
   1656     }
   1657 
   1658     pNewMap = (RegisterMap*) malloc(newMapSize);
   1659     if (pNewMap == NULL)
   1660         return NULL;
   1661     dvmRegisterMapSetFormat(pNewMap, kRegMapFormatDifferential);
   1662     dvmRegisterMapSetOnHeap(pNewMap, true);
   1663     dvmRegisterMapSetRegWidth(pNewMap, regWidth);
   1664     dvmRegisterMapSetNumEntries(pNewMap, numEntries);
   1665 
   1666     tmpPtr = pNewMap->data;
   1667     tmpPtr = writeUnsignedLeb128(tmpPtr, newDataSize);
   1668     memcpy(tmpPtr, tmpBuf.get(), newDataSize);
   1669 
   1670     if (REGISTER_MAP_VERBOSE) {
   1671         LOGD("Compression successful (%d -> %d) from aw=%d rw=%d ne=%d",
   1672             computeRegisterMapSize(pMap), computeRegisterMapSize(pNewMap),
   1673             addrWidth, regWidth, numEntries);
   1674     }
   1675 
   1676     return pNewMap;
   1677 }
   1678 
   1679 /*
   1680  * Toggle the value of the "idx"th bit in "ptr".
   1681  */
   1682 static inline void toggleBit(u1* ptr, int idx)
   1683 {
   1684     ptr[idx >> 3] ^= 1 << (idx & 0x07);
   1685 }
   1686 
   1687 /*
   1688  * Expand a compressed map to an uncompressed form.
   1689  *
   1690  * Returns a newly-allocated RegisterMap on success, or NULL on failure.
   1691  *
   1692  * TODO: consider using the linear allocator or a custom allocator with
   1693  * LRU replacement for these instead of the native heap.
   1694  */
   1695 static RegisterMap* uncompressMapDifferential(const RegisterMap* pMap)
   1696 {
   1697     static const int kHeaderSize = offsetof(RegisterMap, data);
   1698     u1 format = dvmRegisterMapGetFormat(pMap);
   1699     RegisterMapFormat newFormat;
   1700     int regWidth, numEntries, newAddrWidth, newMapSize;
   1701 
   1702     if (format != kRegMapFormatDifferential) {
   1703         LOGE("Not differential (%d)", format);
   1704         return NULL;
   1705     }
   1706 
   1707     regWidth = dvmRegisterMapGetRegWidth(pMap);
   1708     numEntries = dvmRegisterMapGetNumEntries(pMap);
   1709 
   1710     /* get the data size; we can check this at the end */
   1711     const u1* srcPtr = pMap->data;
   1712     int expectedSrcLen = readUnsignedLeb128(&srcPtr);
   1713     const u1* srcStart = srcPtr;
   1714 
   1715     /* get the initial address and the 16-bit address flag */
   1716     int addr = *srcPtr & 0x7f;
   1717     if ((*srcPtr & 0x80) == 0) {
   1718         newFormat = kRegMapFormatCompact8;
   1719         newAddrWidth = 1;
   1720     } else {
   1721         newFormat = kRegMapFormatCompact16;
   1722         newAddrWidth = 2;
   1723     }
   1724     srcPtr++;
   1725 
   1726     /* now we know enough to allocate the new map */
   1727     if (REGISTER_MAP_VERBOSE) {
   1728         LOGI("Expanding to map aw=%d rw=%d ne=%d",
   1729             newAddrWidth, regWidth, numEntries);
   1730     }
   1731     newMapSize = kHeaderSize + (newAddrWidth + regWidth) * numEntries;
   1732     RegisterMap* pNewMap = (RegisterMap*) malloc(newMapSize);
   1733 
   1734     if (pNewMap == NULL)
   1735       return NULL;
   1736 
   1737     dvmRegisterMapSetFormat(pNewMap, newFormat);
   1738     dvmRegisterMapSetOnHeap(pNewMap, true);
   1739     dvmRegisterMapSetRegWidth(pNewMap, regWidth);
   1740     dvmRegisterMapSetNumEntries(pNewMap, numEntries);
   1741 
   1742     /*
   1743      * Write the start address and initial bits to the new map.
   1744      */
   1745     u1* dstPtr = pNewMap->data;
   1746 
   1747     *dstPtr++ = addr & 0xff;
   1748     if (newAddrWidth > 1)
   1749         *dstPtr++ = (u1) (addr >> 8);
   1750 
   1751     memcpy(dstPtr, srcPtr, regWidth);
   1752 
   1753     int prevAddr = addr;
   1754     const u1* prevBits = dstPtr;    /* point at uncompressed data */
   1755 
   1756     dstPtr += regWidth;
   1757     srcPtr += regWidth;
   1758 
   1759     /*
   1760      * Walk through, uncompressing one line at a time.
   1761      */
   1762     int entry;
   1763     for (entry = 1; entry < numEntries; entry++) {
   1764         int addrDiff;
   1765         u1 key;
   1766 
   1767         key = *srcPtr++;
   1768 
   1769         /* get the address */
   1770         if ((key & 0x07) == 7) {
   1771             /* address diff follows in ULEB128 */
   1772             addrDiff = readUnsignedLeb128(&srcPtr);
   1773         } else {
   1774             addrDiff = (key & 0x07) +1;
   1775         }
   1776 
   1777         addr = prevAddr + addrDiff;
   1778         *dstPtr++ = addr & 0xff;
   1779         if (newAddrWidth > 1)
   1780             *dstPtr++ = (u1) (addr >> 8);
   1781 
   1782         /* unpack the bits */
   1783         if ((key & 0x08) != 0) {
   1784             int bitCount = (key >> 4);
   1785             if (bitCount == 0) {
   1786                 /* no bits changed, just copy previous */
   1787                 memcpy(dstPtr, prevBits, regWidth);
   1788             } else if (bitCount == 15) {
   1789                 /* full copy of bit vector is present; ignore prevBits */
   1790                 memcpy(dstPtr, srcPtr, regWidth);
   1791                 srcPtr += regWidth;
   1792             } else {
   1793                 /* copy previous bits and modify listed indices */
   1794                 memcpy(dstPtr, prevBits, regWidth);
   1795                 while (bitCount--) {
   1796                     int bitIndex = readUnsignedLeb128(&srcPtr);
   1797                     toggleBit(dstPtr, bitIndex);
   1798                 }
   1799             }
   1800         } else {
   1801             /* copy previous bits and modify the specified one */
   1802             memcpy(dstPtr, prevBits, regWidth);
   1803 
   1804             /* one bit, from 0-15 inclusive, was changed */
   1805             toggleBit(dstPtr, key >> 4);
   1806         }
   1807 
   1808         prevAddr = addr;
   1809         prevBits = dstPtr;
   1810         dstPtr += regWidth;
   1811     }
   1812 
   1813     if (dstPtr - (u1*) pNewMap != newMapSize) {
   1814         LOGE("ERROR: output %d bytes, expected %d",
   1815             dstPtr - (u1*) pNewMap, newMapSize);
   1816         free(pNewMap);
   1817         return NULL;
   1818     }
   1819 
   1820     if (srcPtr - srcStart != expectedSrcLen) {
   1821         LOGE("ERROR: consumed %d bytes, expected %d",
   1822             srcPtr - srcStart, expectedSrcLen);
   1823         free(pNewMap);
   1824         return NULL;
   1825     }
   1826 
   1827     if (REGISTER_MAP_VERBOSE) {
   1828         LOGD("Expansion successful (%d -> %d)",
   1829             computeRegisterMapSize(pMap), computeRegisterMapSize(pNewMap));
   1830     }
   1831 
   1832     return pNewMap;
   1833 }
   1834