1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * This code generate "register maps" for Dalvik bytecode. In a stack-based 19 * VM we might call these "stack maps". They are used to increase the 20 * precision in the garbage collector when scanning references in the 21 * interpreter thread stacks. 22 */ 23 #include "Dalvik.h" 24 #include "UniquePtr.h" 25 #include "analysis/CodeVerify.h" 26 #include "analysis/RegisterMap.h" 27 #include "libdex/DexCatch.h" 28 #include "libdex/InstrUtils.h" 29 #include "libdex/Leb128.h" 30 31 #include <stddef.h> 32 33 /* double-check the compression */ 34 #define REGISTER_MAP_VERIFY false 35 36 /* verbose logging */ 37 #define REGISTER_MAP_VERBOSE false 38 39 //#define REGISTER_MAP_STATS 40 41 // fwd 42 static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data); 43 static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap); 44 static int compareMaps(const RegisterMap* pMap1, const RegisterMap* pMap2); 45 46 #ifdef REGISTER_MAP_STATS 47 static void computeMapStats(RegisterMap* pMap, const Method* method); 48 #endif 49 static RegisterMap* compressMapDifferential(const RegisterMap* pMap,\ 50 const Method* meth); 51 static RegisterMap* uncompressMapDifferential(const RegisterMap* pMap); 52 53 #ifdef REGISTER_MAP_STATS 54 /* 55 * Generate some statistics on the register maps we create and use. 56 */ 57 #define kMaxGcPointGap 50 58 #define kUpdatePosnMinRegs 24 59 #define kNumUpdatePosns 8 60 #define kMaxDiffBits 20 61 struct MapStats { 62 /* 63 * Buckets measuring the distance between GC points. This tells us how 64 * many bits we need to encode the advancing program counter. We ignore 65 * some of the "long tail" entries. 66 */ 67 int gcPointGap[kMaxGcPointGap]; 68 69 /* 70 * Number of gaps. Equal to (number of gcPoints - number of methods), 71 * since the computation isn't including the initial gap. 72 */ 73 int gcGapCount; 74 75 /* 76 * Number of gaps. 77 */ 78 int totalGcPointCount; 79 80 /* 81 * For larger methods (>= 24 registers), measure in which octant register 82 * updates occur. This should help us understand whether register 83 * changes tend to cluster in the low regs even for large methods. 84 */ 85 int updatePosn[kNumUpdatePosns]; 86 87 /* 88 * For all methods, count up the number of changes to registers < 16 89 * and >= 16. 90 */ 91 int updateLT16; 92 int updateGE16; 93 94 /* 95 * Histogram of the number of bits that differ between adjacent entries. 96 */ 97 int numDiffBits[kMaxDiffBits]; 98 99 100 /* 101 * Track the number of expanded maps, and the heap space required to 102 * hold them. 103 */ 104 int numExpandedMaps; 105 int totalExpandedMapSize; 106 }; 107 #endif 108 109 /* 110 * Prepare some things. 111 */ 112 bool dvmRegisterMapStartup() 113 { 114 #ifdef REGISTER_MAP_STATS 115 MapStats* pStats = calloc(1, sizeof(MapStats)); 116 gDvm.registerMapStats = pStats; 117 #endif 118 return true; 119 } 120 121 /* 122 * Clean up. 123 */ 124 void dvmRegisterMapShutdown() 125 { 126 #ifdef REGISTER_MAP_STATS 127 free(gDvm.registerMapStats); 128 #endif 129 } 130 131 /* 132 * Write stats to log file. 133 */ 134 void dvmRegisterMapDumpStats() 135 { 136 #ifdef REGISTER_MAP_STATS 137 MapStats* pStats = (MapStats*) gDvm.registerMapStats; 138 int i, end; 139 140 for (end = kMaxGcPointGap-1; end >= 0; end--) { 141 if (pStats->gcPointGap[end] != 0) 142 break; 143 } 144 145 LOGI("Register Map gcPointGap stats (diff count=%d, total=%d):", 146 pStats->gcGapCount, pStats->totalGcPointCount); 147 assert(pStats->gcPointGap[0] == 0); 148 for (i = 1; i <= end; i++) { 149 LOGI(" %2d %d", i, pStats->gcPointGap[i]); 150 } 151 152 153 for (end = kMaxDiffBits-1; end >= 0; end--) { 154 if (pStats->numDiffBits[end] != 0) 155 break; 156 } 157 158 LOGI("Register Map bit difference stats:"); 159 for (i = 0; i <= end; i++) { 160 LOGI(" %2d %d", i, pStats->numDiffBits[i]); 161 } 162 163 164 LOGI("Register Map update position stats (lt16=%d ge16=%d):", 165 pStats->updateLT16, pStats->updateGE16); 166 for (i = 0; i < kNumUpdatePosns; i++) { 167 LOGI(" %2d %d", i, pStats->updatePosn[i]); 168 } 169 #endif 170 } 171 172 173 /* 174 * =========================================================================== 175 * Map generation 176 * =========================================================================== 177 */ 178 179 /* 180 * Generate the register map for a method that has just been verified 181 * (i.e. we're doing this as part of verification). 182 * 183 * For type-precise determination we have all the data we need, so we 184 * just need to encode it in some clever fashion. 185 * 186 * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure. 187 */ 188 RegisterMap* dvmGenerateRegisterMapV(VerifierData* vdata) 189 { 190 static const int kHeaderSize = offsetof(RegisterMap, data); 191 RegisterMap* pMap = NULL; 192 RegisterMap* pResult = NULL; 193 RegisterMapFormat format; 194 u1 regWidth; 195 u1* mapData; 196 int i, bytesForAddr, gcPointCount; 197 int bufSize; 198 199 if (vdata->method->registersSize >= 2048) { 200 LOGE("ERROR: register map can't handle %d registers", 201 vdata->method->registersSize); 202 goto bail; 203 } 204 regWidth = (vdata->method->registersSize + 7) / 8; 205 206 /* 207 * Decide if we need 8 or 16 bits to hold the address. Strictly speaking 208 * we only need 16 bits if we actually encode an address >= 256 -- if 209 * the method has a section at the end without GC points (e.g. array 210 * data) we don't need to count it. The situation is unusual, and 211 * detecting it requires scanning the entire method, so we don't bother. 212 */ 213 if (vdata->insnsSize < 256) { 214 format = kRegMapFormatCompact8; 215 bytesForAddr = 1; 216 } else { 217 format = kRegMapFormatCompact16; 218 bytesForAddr = 2; 219 } 220 221 /* 222 * Count up the number of GC point instructions. 223 * 224 * NOTE: this does not automatically include the first instruction, 225 * since we don't count method entry as a GC point. 226 */ 227 gcPointCount = 0; 228 for (i = 0; i < (int) vdata->insnsSize; i++) { 229 if (dvmInsnIsGcPoint(vdata->insnFlags, i)) 230 gcPointCount++; 231 } 232 if (gcPointCount >= 65536) { 233 /* we could handle this, but in practice we don't get near this */ 234 LOGE("ERROR: register map can't handle %d gc points in one method", 235 gcPointCount); 236 goto bail; 237 } 238 239 /* 240 * Allocate a buffer to hold the map data. 241 */ 242 bufSize = kHeaderSize + gcPointCount * (bytesForAddr + regWidth); 243 244 LOGV("+++ grm: %s.%s (adr=%d gpc=%d rwd=%d bsz=%d)", 245 vdata->method->clazz->descriptor, vdata->method->name, 246 bytesForAddr, gcPointCount, regWidth, bufSize); 247 248 pMap = (RegisterMap*) malloc(bufSize); 249 dvmRegisterMapSetFormat(pMap, format); 250 dvmRegisterMapSetOnHeap(pMap, true); 251 dvmRegisterMapSetRegWidth(pMap, regWidth); 252 dvmRegisterMapSetNumEntries(pMap, gcPointCount); 253 254 /* 255 * Populate it. 256 */ 257 mapData = pMap->data; 258 for (i = 0; i < (int) vdata->insnsSize; i++) { 259 if (dvmInsnIsGcPoint(vdata->insnFlags, i)) { 260 assert(vdata->registerLines[i].regTypes != NULL); 261 if (format == kRegMapFormatCompact8) { 262 *mapData++ = i; 263 } else /*kRegMapFormatCompact16*/ { 264 *mapData++ = i & 0xff; 265 *mapData++ = i >> 8; 266 } 267 outputTypeVector(vdata->registerLines[i].regTypes, 268 vdata->insnRegCount, mapData); 269 mapData += regWidth; 270 } 271 } 272 273 LOGV("mapData=%p pMap=%p bufSize=%d", mapData, pMap, bufSize); 274 assert(mapData - (const u1*) pMap == bufSize); 275 276 if (REGISTER_MAP_VERIFY && !verifyMap(vdata, pMap)) 277 goto bail; 278 #ifdef REGISTER_MAP_STATS 279 computeMapStats(pMap, vdata->method); 280 #endif 281 282 /* 283 * Try to compress the map. 284 */ 285 RegisterMap* pCompMap; 286 287 pCompMap = compressMapDifferential(pMap, vdata->method); 288 if (pCompMap != NULL) { 289 if (REGISTER_MAP_VERIFY) { 290 /* 291 * Expand the compressed map we just created, and compare it 292 * to the original. Abort the VM if it doesn't match up. 293 */ 294 RegisterMap* pUncompMap; 295 pUncompMap = uncompressMapDifferential(pCompMap); 296 if (pUncompMap == NULL) { 297 LOGE("Map failed to uncompress - %s.%s", 298 vdata->method->clazz->descriptor, 299 vdata->method->name); 300 free(pCompMap); 301 /* bad - compression is broken or we're out of memory */ 302 dvmAbort(); 303 } else { 304 if (compareMaps(pMap, pUncompMap) != 0) { 305 LOGE("Map comparison failed - %s.%s", 306 vdata->method->clazz->descriptor, 307 vdata->method->name); 308 free(pCompMap); 309 /* bad - compression is broken */ 310 dvmAbort(); 311 } 312 313 /* verify succeeded */ 314 delete pUncompMap; 315 } 316 } 317 318 if (REGISTER_MAP_VERBOSE) { 319 LOGD("Good compress on %s.%s", 320 vdata->method->clazz->descriptor, 321 vdata->method->name); 322 } 323 free(pMap); 324 pMap = pCompMap; 325 } else { 326 if (REGISTER_MAP_VERBOSE) { 327 LOGD("Unable to compress %s.%s (ent=%d rw=%d)", 328 vdata->method->clazz->descriptor, 329 vdata->method->name, 330 dvmRegisterMapGetNumEntries(pMap), 331 dvmRegisterMapGetRegWidth(pMap)); 332 } 333 } 334 335 pResult = pMap; 336 337 bail: 338 return pResult; 339 } 340 341 /* 342 * Release the storage held by a RegisterMap. 343 */ 344 void dvmFreeRegisterMap(RegisterMap* pMap) 345 { 346 if (pMap == NULL) 347 return; 348 349 assert(dvmRegisterMapGetOnHeap(pMap)); 350 free(pMap); 351 } 352 353 /* 354 * Determine if the RegType value is a reference type. 355 * 356 * Ordinarily we include kRegTypeZero in the "is it a reference" 357 * check. There's no value in doing so here, because we know 358 * the register can't hold anything but zero. 359 */ 360 static inline bool isReferenceType(RegType type) 361 { 362 return (type > kRegTypeMAX || type == kRegTypeUninit); 363 } 364 365 /* 366 * Given a line of registers, output a bit vector that indicates whether 367 * or not the register holds a reference type (which could be null). 368 * 369 * We use '1' to indicate it's a reference, '0' for anything else (numeric 370 * value, uninitialized data, merge conflict). Register 0 will be found 371 * in the low bit of the first byte. 372 */ 373 static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data) 374 { 375 u1 val = 0; 376 int i; 377 378 for (i = 0; i < insnRegCount; i++) { 379 RegType type = *regs++; 380 val >>= 1; 381 if (isReferenceType(type)) 382 val |= 0x80; /* set hi bit */ 383 384 if ((i & 0x07) == 7) 385 *data++ = val; 386 } 387 if ((i & 0x07) != 0) { 388 /* flush bits from last byte */ 389 val >>= 8 - (i & 0x07); 390 *data++ = val; 391 } 392 } 393 394 /* 395 * Print the map as a series of binary strings. 396 * 397 * Pass in method->registersSize if known, or -1 if not. 398 */ 399 static void dumpRegisterMap(const RegisterMap* pMap, int registersSize) 400 { 401 const u1* rawMap = pMap->data; 402 const RegisterMapFormat format = dvmRegisterMapGetFormat(pMap); 403 const int numEntries = dvmRegisterMapGetNumEntries(pMap); 404 const int regWidth = dvmRegisterMapGetRegWidth(pMap); 405 int addrWidth; 406 407 switch (format) { 408 case kRegMapFormatCompact8: 409 addrWidth = 1; 410 break; 411 case kRegMapFormatCompact16: 412 addrWidth = 2; 413 break; 414 default: 415 /* can't happen */ 416 LOGE("Can only dump Compact8 / Compact16 maps (not %d)", format); 417 return; 418 } 419 420 if (registersSize < 0) 421 registersSize = 8 * regWidth; 422 assert(registersSize <= regWidth * 8); 423 424 int ent; 425 for (ent = 0; ent < numEntries; ent++) { 426 int i, addr; 427 428 addr = *rawMap++; 429 if (addrWidth > 1) 430 addr |= (*rawMap++) << 8; 431 432 const u1* dataStart = rawMap; 433 u1 val = 0; 434 435 /* create binary string */ 436 char outBuf[registersSize +1]; 437 for (i = 0; i < registersSize; i++) { 438 val >>= 1; 439 if ((i & 0x07) == 0) 440 val = *rawMap++; 441 442 outBuf[i] = '0' + (val & 0x01); 443 } 444 outBuf[i] = '\0'; 445 446 /* back up and create hex dump */ 447 char hexBuf[regWidth * 3 +1]; 448 char* cp = hexBuf; 449 rawMap = dataStart; 450 for (i = 0; i < regWidth; i++) { 451 sprintf(cp, " %02x", *rawMap++); 452 cp += 3; 453 } 454 hexBuf[i * 3] = '\0'; 455 456 LOGD(" %04x %s %s", addr, outBuf, hexBuf); 457 } 458 } 459 460 /* 461 * Double-check the map. 462 * 463 * We run through all of the data in the map, and compare it to the original. 464 * Only works on uncompressed data. 465 */ 466 static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap) 467 { 468 const u1* rawMap = pMap->data; 469 const RegisterMapFormat format = dvmRegisterMapGetFormat(pMap); 470 const int numEntries = dvmRegisterMapGetNumEntries(pMap); 471 int ent; 472 bool dumpMap = false; 473 474 if (false) { 475 const char* cd = "Landroid/net/http/Request;"; 476 const char* mn = "readResponse"; 477 if (strcmp(vdata->method->clazz->descriptor, cd) == 0 && 478 strcmp(vdata->method->name, mn) == 0) 479 { 480 char* desc; 481 desc = dexProtoCopyMethodDescriptor(&vdata->method->prototype); 482 LOGI("Map for %s.%s %s", vdata->method->clazz->descriptor, 483 vdata->method->name, desc); 484 free(desc); 485 486 dumpMap = true; 487 } 488 } 489 490 if ((vdata->method->registersSize + 7) / 8 != pMap->regWidth) { 491 LOGE("GLITCH: registersSize=%d, regWidth=%d", 492 vdata->method->registersSize, pMap->regWidth); 493 return false; 494 } 495 496 for (ent = 0; ent < numEntries; ent++) { 497 int addr; 498 499 switch (format) { 500 case kRegMapFormatCompact8: 501 addr = *rawMap++; 502 break; 503 case kRegMapFormatCompact16: 504 addr = *rawMap++; 505 addr |= (*rawMap++) << 8; 506 break; 507 default: 508 /* shouldn't happen */ 509 LOGE("GLITCH: bad format (%d)", format); 510 dvmAbort(); 511 } 512 513 const RegType* regs = vdata->registerLines[addr].regTypes; 514 if (regs == NULL) { 515 LOGE("GLITCH: addr %d has no data", addr); 516 return false; 517 } 518 519 u1 val = 0; 520 int i; 521 522 for (i = 0; i < vdata->method->registersSize; i++) { 523 bool bitIsRef, regIsRef; 524 525 val >>= 1; 526 if ((i & 0x07) == 0) { 527 /* load next byte of data */ 528 val = *rawMap++; 529 } 530 531 bitIsRef = val & 0x01; 532 533 RegType type = regs[i]; 534 regIsRef = isReferenceType(type); 535 536 if (bitIsRef != regIsRef) { 537 LOGE("GLITCH: addr %d reg %d: bit=%d reg=%d(%d)", 538 addr, i, bitIsRef, regIsRef, type); 539 return false; 540 } 541 } 542 543 /* rawMap now points to the address field of the next entry */ 544 } 545 546 if (dumpMap) 547 dumpRegisterMap(pMap, vdata->method->registersSize); 548 549 return true; 550 } 551 552 553 /* 554 * =========================================================================== 555 * DEX generation & parsing 556 * =========================================================================== 557 */ 558 559 /* 560 * Advance "ptr" to ensure 32-bit alignment. 561 */ 562 static inline u1* align32(u1* ptr) 563 { 564 return (u1*) (((int) ptr + 3) & ~0x03); 565 } 566 567 /* 568 * Compute the size, in bytes, of a register map. 569 */ 570 static size_t computeRegisterMapSize(const RegisterMap* pMap) 571 { 572 static const int kHeaderSize = offsetof(RegisterMap, data); 573 u1 format = dvmRegisterMapGetFormat(pMap); 574 u2 numEntries = dvmRegisterMapGetNumEntries(pMap); 575 576 assert(pMap != NULL); 577 578 switch (format) { 579 case kRegMapFormatNone: 580 return 1; 581 case kRegMapFormatCompact8: 582 return kHeaderSize + (1 + pMap->regWidth) * numEntries; 583 case kRegMapFormatCompact16: 584 return kHeaderSize + (2 + pMap->regWidth) * numEntries; 585 case kRegMapFormatDifferential: 586 { 587 /* kHeaderSize + decoded ULEB128 length */ 588 const u1* ptr = pMap->data; 589 int len = readUnsignedLeb128(&ptr); 590 return len + (ptr - (u1*) pMap); 591 } 592 default: 593 LOGE("Bad register map format %d", format); 594 dvmAbort(); 595 return 0; 596 } 597 } 598 599 /* 600 * Output the map for a single method, if it has one. 601 * 602 * Abstract and native methods have no map. All others are expected to 603 * have one, since we know the class verified successfully. 604 * 605 * This strips the "allocated on heap" flag from the format byte, so that 606 * direct-mapped maps are correctly identified as such. 607 */ 608 static bool writeMapForMethod(const Method* meth, u1** pPtr) 609 { 610 if (meth->registerMap == NULL) { 611 if (!dvmIsAbstractMethod(meth) && !dvmIsNativeMethod(meth)) { 612 LOGW("Warning: no map available for %s.%s", 613 meth->clazz->descriptor, meth->name); 614 /* weird, but keep going */ 615 } 616 *(*pPtr)++ = kRegMapFormatNone; 617 return true; 618 } 619 620 /* serialize map into the buffer */ 621 size_t mapSize = computeRegisterMapSize(meth->registerMap); 622 memcpy(*pPtr, meth->registerMap, mapSize); 623 624 /* strip the "on heap" flag out of the format byte, which is always first */ 625 assert(**pPtr == meth->registerMap->format); 626 **pPtr &= ~(kRegMapFormatOnHeap); 627 628 *pPtr += mapSize; 629 630 return true; 631 } 632 633 /* 634 * Write maps for all methods in the specified class to the buffer, which 635 * can hold at most "length" bytes. "*pPtr" will be advanced past the end 636 * of the data we write. 637 */ 638 static bool writeMapsAllMethods(DvmDex* pDvmDex, const ClassObject* clazz, 639 u1** pPtr, size_t length) 640 { 641 RegisterMapMethodPool* pMethodPool; 642 u1* ptr = *pPtr; 643 int i, methodCount; 644 645 /* artificial limit */ 646 if (clazz->virtualMethodCount + clazz->directMethodCount >= 65536) { 647 LOGE("Too many methods in %s", clazz->descriptor); 648 return false; 649 } 650 651 pMethodPool = (RegisterMapMethodPool*) ptr; 652 ptr += offsetof(RegisterMapMethodPool, methodData); 653 methodCount = 0; 654 655 /* 656 * Run through all methods, direct then virtual. The class loader will 657 * traverse them in the same order. (We could split them into two 658 * distinct pieces, but there doesn't appear to be any value in doing 659 * so other than that it makes class loading slightly less fragile.) 660 * 661 * The class loader won't know about miranda methods at the point 662 * where it parses this, so we omit those. 663 * 664 * TODO: consider omitting all native/abstract definitions. Should be 665 * safe, though we lose the ability to sanity-check against the 666 * method counts in the DEX file. 667 */ 668 for (i = 0; i < clazz->directMethodCount; i++) { 669 const Method* meth = &clazz->directMethods[i]; 670 if (dvmIsMirandaMethod(meth)) 671 continue; 672 if (!writeMapForMethod(&clazz->directMethods[i], &ptr)) { 673 return false; 674 } 675 methodCount++; 676 //ptr = align32(ptr); 677 } 678 679 for (i = 0; i < clazz->virtualMethodCount; i++) { 680 const Method* meth = &clazz->virtualMethods[i]; 681 if (dvmIsMirandaMethod(meth)) 682 continue; 683 if (!writeMapForMethod(&clazz->virtualMethods[i], &ptr)) { 684 return false; 685 } 686 methodCount++; 687 //ptr = align32(ptr); 688 } 689 690 pMethodPool->methodCount = methodCount; 691 692 *pPtr = ptr; 693 return true; 694 } 695 696 /* 697 * Write maps for all classes to the specified buffer, which can hold at 698 * most "length" bytes. 699 * 700 * Returns the actual length used, or 0 on failure. 701 */ 702 static size_t writeMapsAllClasses(DvmDex* pDvmDex, u1* basePtr, size_t length) 703 { 704 DexFile* pDexFile = pDvmDex->pDexFile; 705 u4 count = pDexFile->pHeader->classDefsSize; 706 RegisterMapClassPool* pClassPool; 707 u4* offsetTable; 708 u1* ptr = basePtr; 709 u4 idx; 710 711 assert(gDvm.optimizing); 712 713 pClassPool = (RegisterMapClassPool*) ptr; 714 ptr += offsetof(RegisterMapClassPool, classDataOffset); 715 offsetTable = (u4*) ptr; 716 ptr += count * sizeof(u4); 717 718 pClassPool->numClasses = count; 719 720 /* 721 * We want an entry for every class, loaded or not. 722 */ 723 for (idx = 0; idx < count; idx++) { 724 const DexClassDef* pClassDef; 725 const char* classDescriptor; 726 ClassObject* clazz; 727 728 pClassDef = dexGetClassDef(pDexFile, idx); 729 classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); 730 731 /* 732 * All classes have been loaded into the bootstrap class loader. 733 * If we can find it, and it was successfully pre-verified, we 734 * run through its methods and add the register maps. 735 * 736 * If it wasn't pre-verified then we know it can't have any 737 * register maps. Classes that can't be loaded or failed 738 * verification get an empty slot in the index. 739 */ 740 clazz = NULL; 741 if ((pClassDef->accessFlags & CLASS_ISPREVERIFIED) != 0) 742 clazz = dvmLookupClass(classDescriptor, NULL, false); 743 744 if (clazz != NULL) { 745 offsetTable[idx] = ptr - basePtr; 746 LOGVV("%d -> offset %d (%p-%p)", 747 idx, offsetTable[idx], ptr, basePtr); 748 749 if (!writeMapsAllMethods(pDvmDex, clazz, &ptr, 750 length - (ptr - basePtr))) 751 { 752 return 0; 753 } 754 755 ptr = align32(ptr); 756 LOGVV("Size %s (%d+%d methods): %d", clazz->descriptor, 757 clazz->directMethodCount, clazz->virtualMethodCount, 758 (ptr - basePtr) - offsetTable[idx]); 759 } else { 760 LOGV("%4d NOT mapadding '%s'", idx, classDescriptor); 761 assert(offsetTable[idx] == 0); 762 } 763 } 764 765 if (ptr - basePtr >= (int)length) { 766 /* a bit late */ 767 LOGE("Buffer overrun"); 768 dvmAbort(); 769 } 770 771 return ptr - basePtr; 772 } 773 774 /* 775 * Generate a register map set for all verified classes in "pDvmDex". 776 */ 777 RegisterMapBuilder* dvmGenerateRegisterMaps(DvmDex* pDvmDex) 778 { 779 RegisterMapBuilder* pBuilder; 780 781 pBuilder = (RegisterMapBuilder*) calloc(1, sizeof(RegisterMapBuilder)); 782 if (pBuilder == NULL) 783 return NULL; 784 785 /* 786 * We have a couple of options here: 787 * (1) Compute the size of the output, and malloc a buffer. 788 * (2) Create a "large-enough" anonymous mmap region. 789 * 790 * The nice thing about option #2 is that we don't have to traverse 791 * all of the classes and methods twice. The risk is that we might 792 * not make the region large enough. Since the pages aren't mapped 793 * until used we can allocate a semi-absurd amount of memory without 794 * worrying about the effect on the rest of the system. 795 * 796 * The basic encoding on the largest jar file requires about 1MB of 797 * storage. We map out 4MB here. (TODO: guarantee that the last 798 * page of the mapping is marked invalid, so we reliably fail if 799 * we overrun.) 800 */ 801 if (sysCreatePrivateMap(4 * 1024 * 1024, &pBuilder->memMap) != 0) { 802 free(pBuilder); 803 return NULL; 804 } 805 806 /* 807 * Create the maps. 808 */ 809 size_t actual = writeMapsAllClasses(pDvmDex, (u1*)pBuilder->memMap.addr, 810 pBuilder->memMap.length); 811 if (actual == 0) { 812 dvmFreeRegisterMapBuilder(pBuilder); 813 return NULL; 814 } 815 816 LOGV("TOTAL size of register maps: %d", actual); 817 818 pBuilder->data = pBuilder->memMap.addr; 819 pBuilder->size = actual; 820 return pBuilder; 821 } 822 823 /* 824 * Free the builder. 825 */ 826 void dvmFreeRegisterMapBuilder(RegisterMapBuilder* pBuilder) 827 { 828 if (pBuilder == NULL) 829 return; 830 831 sysReleaseShmem(&pBuilder->memMap); 832 free(pBuilder); 833 } 834 835 836 /* 837 * Find the data for the specified class. 838 * 839 * If there's no register map data, or none for this class, we return NULL. 840 */ 841 const void* dvmRegisterMapGetClassData(const DexFile* pDexFile, u4 classIdx, 842 u4* pNumMaps) 843 { 844 const RegisterMapClassPool* pClassPool; 845 const RegisterMapMethodPool* pMethodPool; 846 847 pClassPool = (const RegisterMapClassPool*) pDexFile->pRegisterMapPool; 848 if (pClassPool == NULL) 849 return NULL; 850 851 if (classIdx >= pClassPool->numClasses) { 852 LOGE("bad class index (%d vs %d)", classIdx, pClassPool->numClasses); 853 dvmAbort(); 854 } 855 856 u4 classOffset = pClassPool->classDataOffset[classIdx]; 857 if (classOffset == 0) { 858 LOGV("+++ no map for classIdx=%d", classIdx); 859 return NULL; 860 } 861 862 pMethodPool = 863 (const RegisterMapMethodPool*) (((u1*) pClassPool) + classOffset); 864 if (pNumMaps != NULL) 865 *pNumMaps = pMethodPool->methodCount; 866 return pMethodPool->methodData; 867 } 868 869 /* 870 * This advances "*pPtr" and returns its original value. 871 */ 872 const RegisterMap* dvmRegisterMapGetNext(const void** pPtr) 873 { 874 const RegisterMap* pMap = (const RegisterMap*) *pPtr; 875 876 *pPtr = /*align32*/(((u1*) pMap) + computeRegisterMapSize(pMap)); 877 LOGVV("getNext: %p -> %p (f=%#x w=%d e=%d)", 878 pMap, *pPtr, pMap->format, pMap->regWidth, 879 dvmRegisterMapGetNumEntries(pMap)); 880 return pMap; 881 } 882 883 884 /* 885 * =========================================================================== 886 * Utility functions 887 * =========================================================================== 888 */ 889 890 /* 891 * Return the data for the specified address, or NULL if not found. 892 * 893 * The result must be released with dvmReleaseRegisterMapLine(). 894 */ 895 const u1* dvmRegisterMapGetLine(const RegisterMap* pMap, int addr) 896 { 897 int addrWidth, lineWidth; 898 u1 format = dvmRegisterMapGetFormat(pMap); 899 u2 numEntries = dvmRegisterMapGetNumEntries(pMap); 900 901 assert(numEntries > 0); 902 903 switch (format) { 904 case kRegMapFormatNone: 905 return NULL; 906 case kRegMapFormatCompact8: 907 addrWidth = 1; 908 break; 909 case kRegMapFormatCompact16: 910 addrWidth = 2; 911 break; 912 default: 913 LOGE("Unknown format %d", format); 914 dvmAbort(); 915 return NULL; 916 } 917 918 lineWidth = addrWidth + pMap->regWidth; 919 920 /* 921 * Find the appropriate entry. Many maps are very small, some are very 922 * large. 923 */ 924 static const int kSearchThreshold = 8; 925 const u1* data = NULL; 926 int lineAddr; 927 928 if (numEntries < kSearchThreshold) { 929 int i; 930 data = pMap->data; 931 for (i = numEntries; i > 0; i--) { 932 lineAddr = data[0]; 933 if (addrWidth > 1) 934 lineAddr |= data[1] << 8; 935 if (lineAddr == addr) 936 return data + addrWidth; 937 938 data += lineWidth; 939 } 940 assert(data == pMap->data + lineWidth * numEntries); 941 } else { 942 int hi, lo, mid; 943 944 lo = 0; 945 hi = numEntries -1; 946 947 while (hi >= lo) { 948 mid = (hi + lo) / 2; 949 data = pMap->data + lineWidth * mid; 950 951 lineAddr = data[0]; 952 if (addrWidth > 1) 953 lineAddr |= data[1] << 8; 954 955 if (addr > lineAddr) { 956 lo = mid + 1; 957 } else if (addr < lineAddr) { 958 hi = mid - 1; 959 } else { 960 return data + addrWidth; 961 } 962 } 963 } 964 965 return NULL; 966 } 967 968 /* 969 * Compare two register maps. 970 * 971 * Returns 0 if they're equal, nonzero if not. 972 */ 973 static int compareMaps(const RegisterMap* pMap1, const RegisterMap* pMap2) 974 { 975 size_t size1, size2; 976 977 size1 = computeRegisterMapSize(pMap1); 978 size2 = computeRegisterMapSize(pMap2); 979 if (size1 != size2) { 980 LOGI("compareMaps: size mismatch (%zd vs %zd)", size1, size2); 981 return -1; 982 } 983 984 if (memcmp(pMap1, pMap2, size1) != 0) { 985 LOGI("compareMaps: content mismatch"); 986 return -1; 987 } 988 989 return 0; 990 } 991 992 993 /* 994 * Get the expanded form of the register map associated with the method. 995 * 996 * If the map is already in one of the uncompressed formats, we return 997 * immediately. Otherwise, we expand the map and replace method's register 998 * map pointer, freeing it if it was allocated on the heap. 999 * 1000 * NOTE: this function is not synchronized; external locking is mandatory 1001 * (unless we're in the zygote, where single-threaded access is guaranteed). 1002 */ 1003 const RegisterMap* dvmGetExpandedRegisterMap0(Method* method) 1004 { 1005 const RegisterMap* curMap = method->registerMap; 1006 RegisterMap* newMap; 1007 1008 if (curMap == NULL) 1009 return NULL; 1010 1011 /* sanity check to ensure this isn't called w/o external locking */ 1012 /* (if we use this at a time other than during GC, fix/remove this test) */ 1013 if (true) { 1014 if (!gDvm.zygote && dvmTryLockMutex(&gDvm.gcHeapLock) == 0) { 1015 LOGE("GLITCH: dvmGetExpandedRegisterMap not called at GC time"); 1016 dvmAbort(); 1017 } 1018 } 1019 1020 RegisterMapFormat format = dvmRegisterMapGetFormat(curMap); 1021 switch (format) { 1022 case kRegMapFormatCompact8: 1023 case kRegMapFormatCompact16: 1024 if (REGISTER_MAP_VERBOSE) { 1025 if (dvmRegisterMapGetOnHeap(curMap)) { 1026 LOGD("RegMap: already expanded: %s.%s", 1027 method->clazz->descriptor, method->name); 1028 } else { 1029 LOGD("RegMap: stored w/o compression: %s.%s", 1030 method->clazz->descriptor, method->name); 1031 } 1032 } 1033 return curMap; 1034 case kRegMapFormatDifferential: 1035 newMap = uncompressMapDifferential(curMap); 1036 break; 1037 default: 1038 LOGE("Unknown format %d in dvmGetExpandedRegisterMap", format); 1039 dvmAbort(); 1040 newMap = NULL; // make gcc happy 1041 } 1042 1043 if (newMap == NULL) { 1044 LOGE("Map failed to uncompress (fmt=%d) %s.%s", 1045 format, method->clazz->descriptor, method->name); 1046 return NULL; 1047 } 1048 1049 #ifdef REGISTER_MAP_STATS 1050 /* 1051 * Gather and display some stats. 1052 */ 1053 { 1054 MapStats* pStats = (MapStats*) gDvm.registerMapStats; 1055 pStats->numExpandedMaps++; 1056 pStats->totalExpandedMapSize += computeRegisterMapSize(newMap); 1057 LOGD("RMAP: count=%d size=%d", 1058 pStats->numExpandedMaps, pStats->totalExpandedMapSize); 1059 } 1060 #endif 1061 1062 IF_LOGV() { 1063 char* desc = dexProtoCopyMethodDescriptor(&method->prototype); 1064 LOGV("Expanding map -> %s.%s:%s", 1065 method->clazz->descriptor, method->name, desc); 1066 free(desc); 1067 } 1068 1069 /* 1070 * Update method, and free compressed map if it was sitting on the heap. 1071 */ 1072 dvmSetRegisterMap(method, newMap); 1073 1074 if (dvmRegisterMapGetOnHeap(curMap)) 1075 dvmFreeRegisterMap((RegisterMap*) curMap); 1076 1077 return newMap; 1078 } 1079 1080 1081 /* 1082 * =========================================================================== 1083 * Map compression 1084 * =========================================================================== 1085 */ 1086 1087 /* 1088 Notes on map compression 1089 1090 The idea is to create a compressed form that will be uncompressed before 1091 use, with the output possibly saved in a cache. This means we can use an 1092 approach that is unsuited for random access if we choose. 1093 1094 In the event that a map simply does not work with our compression scheme, 1095 it's reasonable to store the map without compression. In the future we 1096 may want to have more than one compression scheme, and try each in turn, 1097 retaining the best. (We certainly want to keep the uncompressed form if it 1098 turns out to be smaller or even slightly larger than the compressed form.) 1099 1100 Each entry consists of an address and a bit vector. Adjacent entries are 1101 strongly correlated, suggesting differential encoding. 1102 1103 1104 Ideally we would avoid outputting adjacent entries with identical 1105 bit vectors. However, the register values at a given address do not 1106 imply anything about the set of valid registers at subsequent addresses. 1107 We therefore cannot omit an entry. 1108 1109 If the thread stack has a PC at an address without a corresponding 1110 entry in the register map, we must conservatively scan the registers in 1111 that thread. This can happen when single-stepping in the debugger, 1112 because the debugger is allowed to invoke arbitrary methods when 1113 a thread is stopped at a breakpoint. If we can guarantee that a GC 1114 thread scan will never happen while the debugger has that thread stopped, 1115 then we can lift this restriction and simply omit entries that don't 1116 change the bit vector from its previous state. 1117 1118 Each entry advances the address value by at least 1 (measured in 16-bit 1119 "code units"). Looking at the bootclasspath entries, advancing by 2 units 1120 is most common. Advances by 1 unit are far less common than advances by 1121 2 units, but more common than 5, and things fall off rapidly. Gaps of 1122 up to 220 code units appear in some computationally intensive bits of code, 1123 but are exceedingly rare. 1124 1125 If we sum up the number of transitions in a couple of ranges in framework.jar: 1126 [1,4]: 188998 of 218922 gaps (86.3%) 1127 [1,7]: 211647 of 218922 gaps (96.7%) 1128 Using a 3-bit delta, with one value reserved as an escape code, should 1129 yield good results for the address. 1130 1131 These results would change dramatically if we reduced the set of GC 1132 points by e.g. removing instructions like integer divide that are only 1133 present because they can throw and cause an allocation. 1134 1135 We also need to include an "initial gap", because the first few instructions 1136 in a method may not be GC points. 1137 1138 1139 By observation, many entries simply repeat the previous bit vector, or 1140 change only one or two bits. (This is with type-precise information; 1141 the rate of change of bits will be different if live-precise information 1142 is factored in). 1143 1144 Looking again at adjacent entries in framework.jar: 1145 0 bits changed: 63.0% 1146 1 bit changed: 32.2% 1147 After that it falls off rapidly, e.g. the number of entries with 2 bits 1148 changed is usually less than 1/10th of the number of entries with 1 bit 1149 changed. A solution that allows us to encode 0- or 1- bit changes 1150 efficiently will do well. 1151 1152 We still need to handle cases where a large number of bits change. We 1153 probably want a way to drop in a full copy of the bit vector when it's 1154 smaller than the representation of multiple bit changes. 1155 1156 1157 The bit-change information can be encoded as an index that tells the 1158 decoder to toggle the state. We want to encode the index in as few bits 1159 as possible, but we need to allow for fairly wide vectors (e.g. we have a 1160 method with 175 registers). We can deal with this in a couple of ways: 1161 (1) use an encoding that assumes few registers and has an escape code 1162 for larger numbers of registers; or (2) use different encodings based 1163 on how many total registers the method has. The choice depends to some 1164 extent on whether methods with large numbers of registers tend to modify 1165 the first 16 regs more often than the others. 1166 1167 The last N registers hold method arguments. If the bytecode is expected 1168 to be examined in a debugger, "dx" ensures that the contents of these 1169 registers won't change. Depending upon the encoding format, we may be 1170 able to take advantage of this. We still have to encode the initial 1171 state, but we know we'll never have to output a bit change for the last 1172 N registers. 1173 1174 Considering only methods with 16 or more registers, the "target octant" 1175 for register changes looks like this: 1176 [ 43.1%, 16.4%, 6.5%, 6.2%, 7.4%, 8.8%, 9.7%, 1.8% ] 1177 As expected, there are fewer changes at the end of the list where the 1178 arguments are kept, and more changes at the start of the list because 1179 register values smaller than 16 can be used in compact Dalvik instructions 1180 and hence are favored for frequently-used values. In general, the first 1181 octant is considerably more active than later entries, the last octant 1182 is much less active, and the rest are all about the same. 1183 1184 Looking at all bit changes in all methods, 94% are to registers 0-15. The 1185 encoding will benefit greatly by favoring the low-numbered registers. 1186 1187 1188 Some of the smaller methods have identical maps, and space could be 1189 saved by simply including a pointer to an earlier definition. This would 1190 be best accomplished by specifying a "pointer" format value, followed by 1191 a 3-byte (or ULEB128) offset. Implementing this would probably involve 1192 generating a hash value for each register map and maintaining a hash table. 1193 1194 In some cases there are repeating patterns in the bit vector that aren't 1195 adjacent. These could benefit from a dictionary encoding. This doesn't 1196 really become useful until the methods reach a certain size though, 1197 and managing the dictionary may incur more overhead than we want. 1198 1199 Large maps can be compressed significantly. The trouble is that, when 1200 we need to use them, we have to uncompress them onto the heap. We may 1201 get a better trade-off between storage size and heap usage by refusing to 1202 compress large maps, so that they can be memory mapped and used directly. 1203 (OTOH, only about 2% of the maps will ever actually be used.) 1204 1205 1206 ----- differential format ----- 1207 1208 // common header 1209 +00 1B format 1210 +01 1B regWidth 1211 +02 2B numEntries (little-endian) 1212 +04 nB length in bytes of the data that follows, in ULEB128 format 1213 (not strictly necessary; allows determination of size w/o full parse) 1214 +05+ 1B initial address (0-127), high bit set if max addr >= 256 1215 +06+ nB initial value for bit vector 1216 1217 // for each entry 1218 +00: CCCCBAAA 1219 1220 AAA: address difference. Values from 0 to 6 indicate an increment of 1 1221 to 7. A value of 7 indicates that the address difference is large, 1222 and the next byte is a ULEB128-encoded difference value. 1223 1224 B: determines the meaning of CCCC. 1225 1226 CCCC: if B is 0, this is the number of the bit to toggle (0-15). 1227 If B is 1, this is a count of the number of changed bits (1-14). A value 1228 of 0 means that no bits were changed, and a value of 15 indicates 1229 that enough bits were changed that it required less space to output 1230 the entire bit vector. 1231 1232 +01: (optional) ULEB128-encoded address difference 1233 1234 +01+: (optional) one or more ULEB128-encoded bit numbers, OR the entire 1235 bit vector. 1236 1237 The most common situation is an entry whose address has changed by 2-4 1238 code units, has no changes or just a single bit change, and the changed 1239 register is less than 16. We should therefore be able to encode a large 1240 number of entries with a single byte, which is half the size of the 1241 Compact8 encoding method. 1242 */ 1243 1244 /* 1245 * Compute some stats on an uncompressed register map. 1246 */ 1247 #ifdef REGISTER_MAP_STATS 1248 static void computeMapStats(RegisterMap* pMap, const Method* method) 1249 { 1250 MapStats* pStats = (MapStats*) gDvm.registerMapStats; 1251 const u1 format = dvmRegisterMapGetFormat(pMap); 1252 const u2 numEntries = dvmRegisterMapGetNumEntries(pMap); 1253 const u1* rawMap = pMap->data; 1254 const u1* prevData = NULL; 1255 int ent, addr, prevAddr = -1; 1256 1257 for (ent = 0; ent < numEntries; ent++) { 1258 switch (format) { 1259 case kRegMapFormatCompact8: 1260 addr = *rawMap++; 1261 break; 1262 case kRegMapFormatCompact16: 1263 addr = *rawMap++; 1264 addr |= (*rawMap++) << 8; 1265 break; 1266 default: 1267 /* shouldn't happen */ 1268 LOGE("GLITCH: bad format (%d)", format); 1269 dvmAbort(); 1270 } 1271 1272 const u1* dataStart = rawMap; 1273 1274 pStats->totalGcPointCount++; 1275 1276 /* 1277 * Gather "gap size" stats, i.e. the difference in addresses between 1278 * successive GC points. 1279 */ 1280 if (prevData != NULL) { 1281 assert(prevAddr >= 0); 1282 int addrDiff = addr - prevAddr; 1283 1284 if (addrDiff < 0) { 1285 LOGE("GLITCH: address went backward (0x%04x->0x%04x, %s.%s)", 1286 prevAddr, addr, method->clazz->descriptor, method->name); 1287 } else if (addrDiff > kMaxGcPointGap) { 1288 if (REGISTER_MAP_VERBOSE) { 1289 LOGI("HEY: addrDiff is %d, max %d (0x%04x->0x%04x %s.%s)", 1290 addrDiff, kMaxGcPointGap, prevAddr, addr, 1291 method->clazz->descriptor, method->name); 1292 } 1293 /* skip this one */ 1294 } else { 1295 pStats->gcPointGap[addrDiff]++; 1296 } 1297 pStats->gcGapCount++; 1298 1299 1300 /* 1301 * Compare bit vectors in adjacent entries. We want to count 1302 * up the number of bits that differ (to see if we frequently 1303 * change 0 or 1 bits) and get a sense for which part of the 1304 * vector changes the most often (near the start, middle, end). 1305 * 1306 * We only do the vector position quantization if we have at 1307 * least 16 registers in the method. 1308 */ 1309 int numDiff = 0; 1310 float div = (float) kNumUpdatePosns / method->registersSize; 1311 int regByte; 1312 for (regByte = 0; regByte < pMap->regWidth; regByte++) { 1313 int prev, cur, bit; 1314 1315 prev = prevData[regByte]; 1316 cur = dataStart[regByte]; 1317 1318 for (bit = 0; bit < 8; bit++) { 1319 if (((prev >> bit) & 1) != ((cur >> bit) & 1)) { 1320 numDiff++; 1321 1322 int bitNum = regByte * 8 + bit; 1323 1324 if (bitNum < 16) 1325 pStats->updateLT16++; 1326 else 1327 pStats->updateGE16++; 1328 1329 if (method->registersSize < 16) 1330 continue; 1331 1332 if (bitNum >= method->registersSize) { 1333 /* stuff off the end should be zero in both */ 1334 LOGE("WEIRD: bit=%d (%d/%d), prev=%02x cur=%02x", 1335 bit, regByte, method->registersSize, 1336 prev, cur); 1337 assert(false); 1338 } 1339 int idx = (int) (bitNum * div); 1340 if (!(idx >= 0 && idx < kNumUpdatePosns)) { 1341 LOGE("FAIL: bitNum=%d (of %d) div=%.3f idx=%d", 1342 bitNum, method->registersSize, div, idx); 1343 assert(false); 1344 } 1345 pStats->updatePosn[idx]++; 1346 } 1347 } 1348 } 1349 1350 if (numDiff > kMaxDiffBits) { 1351 if (REGISTER_MAP_VERBOSE) { 1352 LOGI("WOW: numDiff is %d, max %d", numDiff, kMaxDiffBits); 1353 } 1354 } else { 1355 pStats->numDiffBits[numDiff]++; 1356 } 1357 } 1358 1359 /* advance to start of next line */ 1360 rawMap += pMap->regWidth; 1361 1362 prevAddr = addr; 1363 prevData = dataStart; 1364 } 1365 } 1366 #endif 1367 1368 /* 1369 * Compute the difference between two bit vectors. 1370 * 1371 * If "lebOutBuf" is non-NULL, we output the bit indices in ULEB128 format 1372 * as we go. Otherwise, we just generate the various counts. 1373 * 1374 * The bit vectors are compared byte-by-byte, so any unused bits at the 1375 * end must be zero. 1376 * 1377 * Returns the number of bytes required to hold the ULEB128 output. 1378 * 1379 * If "pFirstBitChanged" or "pNumBitsChanged" are non-NULL, they will 1380 * receive the index of the first changed bit and the number of changed 1381 * bits, respectively. 1382 */ 1383 static int computeBitDiff(const u1* bits1, const u1* bits2, int byteWidth, 1384 int* pFirstBitChanged, int* pNumBitsChanged, u1* lebOutBuf) 1385 { 1386 int numBitsChanged = 0; 1387 int firstBitChanged = -1; 1388 int lebSize = 0; 1389 int byteNum; 1390 1391 /* 1392 * Run through the vectors, first comparing them at the byte level. This 1393 * will yield a fairly quick result if nothing has changed between them. 1394 */ 1395 for (byteNum = 0; byteNum < byteWidth; byteNum++) { 1396 u1 byte1 = *bits1++; 1397 u1 byte2 = *bits2++; 1398 if (byte1 != byte2) { 1399 /* 1400 * Walk through the byte, identifying the changed bits. 1401 */ 1402 int bitNum; 1403 for (bitNum = 0; bitNum < 8; bitNum++) { 1404 if (((byte1 >> bitNum) & 0x01) != ((byte2 >> bitNum) & 0x01)) { 1405 int bitOffset = (byteNum << 3) + bitNum; 1406 1407 if (firstBitChanged < 0) 1408 firstBitChanged = bitOffset; 1409 numBitsChanged++; 1410 1411 if (lebOutBuf == NULL) { 1412 lebSize += unsignedLeb128Size(bitOffset); 1413 } else { 1414 u1* curBuf = lebOutBuf; 1415 lebOutBuf = writeUnsignedLeb128(lebOutBuf, bitOffset); 1416 lebSize += lebOutBuf - curBuf; 1417 } 1418 } 1419 } 1420 } 1421 } 1422 1423 if (numBitsChanged > 0) 1424 assert(firstBitChanged >= 0); 1425 1426 if (pFirstBitChanged != NULL) 1427 *pFirstBitChanged = firstBitChanged; 1428 if (pNumBitsChanged != NULL) 1429 *pNumBitsChanged = numBitsChanged; 1430 1431 return lebSize; 1432 } 1433 1434 /* 1435 * Compress the register map with differential encoding. 1436 * 1437 * "meth" is only needed for debug output. 1438 * 1439 * On success, returns a newly-allocated RegisterMap. If the map is not 1440 * compatible for some reason, or fails to get smaller, this will return NULL. 1441 */ 1442 static RegisterMap* compressMapDifferential(const RegisterMap* pMap, 1443 const Method* meth) 1444 { 1445 RegisterMap* pNewMap = NULL; 1446 int origSize = computeRegisterMapSize(pMap); 1447 u1* tmpPtr; 1448 int addrWidth, regWidth, numEntries; 1449 bool debug = false; 1450 1451 if (false && 1452 strcmp(meth->clazz->descriptor, "Landroid/text/StaticLayout;") == 0 && 1453 strcmp(meth->name, "generate") == 0) 1454 { 1455 debug = true; 1456 } 1457 1458 u1 format = dvmRegisterMapGetFormat(pMap); 1459 switch (format) { 1460 case kRegMapFormatCompact8: 1461 addrWidth = 1; 1462 break; 1463 case kRegMapFormatCompact16: 1464 addrWidth = 2; 1465 break; 1466 default: 1467 LOGE("ERROR: can't compress map with format=%d", format); 1468 return NULL; 1469 } 1470 1471 regWidth = dvmRegisterMapGetRegWidth(pMap); 1472 numEntries = dvmRegisterMapGetNumEntries(pMap); 1473 1474 if (debug) { 1475 LOGI("COMPRESS: %s.%s aw=%d rw=%d ne=%d", 1476 meth->clazz->descriptor, meth->name, 1477 addrWidth, regWidth, numEntries); 1478 dumpRegisterMap(pMap, -1); 1479 } 1480 1481 if (numEntries <= 1) { 1482 LOGV("Can't compress map with 0 or 1 entries"); 1483 return NULL; 1484 } 1485 1486 /* 1487 * We don't know how large the compressed data will be. It's possible 1488 * for it to expand and become larger than the original. The header 1489 * itself is variable-sized, so we generate everything into a temporary 1490 * buffer and then copy it to form-fitting storage once we know how big 1491 * it will be (and that it's smaller than the original). 1492 * 1493 * If we use a size that is equal to the size of the input map plus 1494 * a value longer than a single entry can possibly expand to, we need 1495 * only check for overflow at the end of each entry. The worst case 1496 * for a single line is (1 + <ULEB8 address> + <full copy of vector>). 1497 * Addresses are 16 bits, so that's (1 + 3 + regWidth). 1498 * 1499 * The initial address offset and bit vector will take up less than 1500 * or equal to the amount of space required when uncompressed -- large 1501 * initial offsets are rejected. 1502 */ 1503 UniquePtr<u1[]> tmpBuf(new u1[origSize + (1 + 3 + regWidth)]); 1504 if (tmpBuf.get() == NULL) 1505 return NULL; 1506 1507 tmpPtr = tmpBuf.get(); 1508 1509 const u1* mapData = pMap->data; 1510 const u1* prevBits; 1511 u2 addr, prevAddr; 1512 1513 addr = *mapData++; 1514 if (addrWidth > 1) 1515 addr |= (*mapData++) << 8; 1516 1517 if (addr >= 128) { 1518 LOGV("Can't compress map with starting address >= 128"); 1519 return NULL; 1520 } 1521 1522 /* 1523 * Start by writing the initial address and bit vector data. The high 1524 * bit of the initial address is used to indicate the required address 1525 * width (which the decoder can't otherwise determine without parsing 1526 * the compressed data). 1527 */ 1528 *tmpPtr++ = addr | (addrWidth > 1 ? 0x80 : 0x00); 1529 memcpy(tmpPtr, mapData, regWidth); 1530 1531 prevBits = mapData; 1532 prevAddr = addr; 1533 1534 tmpPtr += regWidth; 1535 mapData += regWidth; 1536 1537 /* 1538 * Loop over all following entries. 1539 */ 1540 for (int entry = 1; entry < numEntries; entry++) { 1541 int addrDiff; 1542 u1 key; 1543 1544 /* 1545 * Pull out the address and figure out how to encode it. 1546 */ 1547 addr = *mapData++; 1548 if (addrWidth > 1) 1549 addr |= (*mapData++) << 8; 1550 1551 if (debug) 1552 LOGI(" addr=0x%04x ent=%d (aw=%d)", addr, entry, addrWidth); 1553 1554 addrDiff = addr - prevAddr; 1555 assert(addrDiff > 0); 1556 if (addrDiff < 8) { 1557 /* small difference, encode in 3 bits */ 1558 key = addrDiff -1; /* set 00000AAA */ 1559 if (debug) 1560 LOGI(" : small %d, key=0x%02x", addrDiff, key); 1561 } else { 1562 /* large difference, output escape code */ 1563 key = 0x07; /* escape code for AAA */ 1564 if (debug) 1565 LOGI(" : large %d, key=0x%02x", addrDiff, key); 1566 } 1567 1568 int numBitsChanged, firstBitChanged, lebSize; 1569 1570 lebSize = computeBitDiff(prevBits, mapData, regWidth, 1571 &firstBitChanged, &numBitsChanged, NULL); 1572 1573 if (debug) { 1574 LOGI(" : diff fbc=%d nbc=%d ls=%d (rw=%d)", 1575 firstBitChanged, numBitsChanged, lebSize, regWidth); 1576 } 1577 1578 if (numBitsChanged == 0) { 1579 /* set B to 1 and CCCC to zero to indicate no bits were changed */ 1580 key |= 0x08; 1581 if (debug) LOGI(" : no bits changed"); 1582 } else if (numBitsChanged == 1 && firstBitChanged < 16) { 1583 /* set B to 0 and CCCC to the index of the changed bit */ 1584 key |= firstBitChanged << 4; 1585 if (debug) LOGI(" : 1 low bit changed"); 1586 } else if (numBitsChanged < 15 && lebSize < regWidth) { 1587 /* set B to 1 and CCCC to the number of bits */ 1588 key |= 0x08 | (numBitsChanged << 4); 1589 if (debug) LOGI(" : some bits changed"); 1590 } else { 1591 /* set B to 1 and CCCC to 0x0f so we store the entire vector */ 1592 key |= 0x08 | 0xf0; 1593 if (debug) LOGI(" : encode original"); 1594 } 1595 1596 /* 1597 * Encode output. Start with the key, follow with the address 1598 * diff (if it didn't fit in 3 bits), then the changed bit info. 1599 */ 1600 *tmpPtr++ = key; 1601 if ((key & 0x07) == 0x07) 1602 tmpPtr = writeUnsignedLeb128(tmpPtr, addrDiff); 1603 1604 if ((key & 0x08) != 0) { 1605 int bitCount = key >> 4; 1606 if (bitCount == 0) { 1607 /* nothing changed, no additional output required */ 1608 } else if (bitCount == 15) { 1609 /* full vector is most compact representation */ 1610 memcpy(tmpPtr, mapData, regWidth); 1611 tmpPtr += regWidth; 1612 } else { 1613 /* write bit indices in LEB128 format */ 1614 (void) computeBitDiff(prevBits, mapData, regWidth, 1615 NULL, NULL, tmpPtr); 1616 tmpPtr += lebSize; 1617 } 1618 } else { 1619 /* single-bit changed, value encoded in key byte */ 1620 } 1621 1622 prevBits = mapData; 1623 prevAddr = addr; 1624 mapData += regWidth; 1625 1626 /* 1627 * See if we've run past the original size. 1628 */ 1629 if (tmpPtr - tmpBuf.get() >= origSize) { 1630 if (debug) { 1631 LOGD("Compressed size >= original (%d vs %d): %s.%s", 1632 tmpPtr - tmpBuf.get(), origSize, 1633 meth->clazz->descriptor, meth->name); 1634 } 1635 return NULL; 1636 } 1637 } 1638 1639 /* 1640 * Create a RegisterMap with the contents. 1641 * 1642 * TODO: consider using a threshold other than merely ">=". We would 1643 * get poorer compression but potentially use less native heap space. 1644 */ 1645 static const int kHeaderSize = offsetof(RegisterMap, data); 1646 int newDataSize = tmpPtr - tmpBuf.get(); 1647 int newMapSize; 1648 1649 newMapSize = kHeaderSize + unsignedLeb128Size(newDataSize) + newDataSize; 1650 if (newMapSize >= origSize) { 1651 if (debug) { 1652 LOGD("Final comp size >= original (%d vs %d): %s.%s", 1653 newMapSize, origSize, meth->clazz->descriptor, meth->name); 1654 } 1655 return NULL; 1656 } 1657 1658 pNewMap = (RegisterMap*) malloc(newMapSize); 1659 if (pNewMap == NULL) 1660 return NULL; 1661 dvmRegisterMapSetFormat(pNewMap, kRegMapFormatDifferential); 1662 dvmRegisterMapSetOnHeap(pNewMap, true); 1663 dvmRegisterMapSetRegWidth(pNewMap, regWidth); 1664 dvmRegisterMapSetNumEntries(pNewMap, numEntries); 1665 1666 tmpPtr = pNewMap->data; 1667 tmpPtr = writeUnsignedLeb128(tmpPtr, newDataSize); 1668 memcpy(tmpPtr, tmpBuf.get(), newDataSize); 1669 1670 if (REGISTER_MAP_VERBOSE) { 1671 LOGD("Compression successful (%d -> %d) from aw=%d rw=%d ne=%d", 1672 computeRegisterMapSize(pMap), computeRegisterMapSize(pNewMap), 1673 addrWidth, regWidth, numEntries); 1674 } 1675 1676 return pNewMap; 1677 } 1678 1679 /* 1680 * Toggle the value of the "idx"th bit in "ptr". 1681 */ 1682 static inline void toggleBit(u1* ptr, int idx) 1683 { 1684 ptr[idx >> 3] ^= 1 << (idx & 0x07); 1685 } 1686 1687 /* 1688 * Expand a compressed map to an uncompressed form. 1689 * 1690 * Returns a newly-allocated RegisterMap on success, or NULL on failure. 1691 * 1692 * TODO: consider using the linear allocator or a custom allocator with 1693 * LRU replacement for these instead of the native heap. 1694 */ 1695 static RegisterMap* uncompressMapDifferential(const RegisterMap* pMap) 1696 { 1697 static const int kHeaderSize = offsetof(RegisterMap, data); 1698 u1 format = dvmRegisterMapGetFormat(pMap); 1699 RegisterMapFormat newFormat; 1700 int regWidth, numEntries, newAddrWidth, newMapSize; 1701 1702 if (format != kRegMapFormatDifferential) { 1703 LOGE("Not differential (%d)", format); 1704 return NULL; 1705 } 1706 1707 regWidth = dvmRegisterMapGetRegWidth(pMap); 1708 numEntries = dvmRegisterMapGetNumEntries(pMap); 1709 1710 /* get the data size; we can check this at the end */ 1711 const u1* srcPtr = pMap->data; 1712 int expectedSrcLen = readUnsignedLeb128(&srcPtr); 1713 const u1* srcStart = srcPtr; 1714 1715 /* get the initial address and the 16-bit address flag */ 1716 int addr = *srcPtr & 0x7f; 1717 if ((*srcPtr & 0x80) == 0) { 1718 newFormat = kRegMapFormatCompact8; 1719 newAddrWidth = 1; 1720 } else { 1721 newFormat = kRegMapFormatCompact16; 1722 newAddrWidth = 2; 1723 } 1724 srcPtr++; 1725 1726 /* now we know enough to allocate the new map */ 1727 if (REGISTER_MAP_VERBOSE) { 1728 LOGI("Expanding to map aw=%d rw=%d ne=%d", 1729 newAddrWidth, regWidth, numEntries); 1730 } 1731 newMapSize = kHeaderSize + (newAddrWidth + regWidth) * numEntries; 1732 RegisterMap* pNewMap = (RegisterMap*) malloc(newMapSize); 1733 1734 if (pNewMap == NULL) 1735 return NULL; 1736 1737 dvmRegisterMapSetFormat(pNewMap, newFormat); 1738 dvmRegisterMapSetOnHeap(pNewMap, true); 1739 dvmRegisterMapSetRegWidth(pNewMap, regWidth); 1740 dvmRegisterMapSetNumEntries(pNewMap, numEntries); 1741 1742 /* 1743 * Write the start address and initial bits to the new map. 1744 */ 1745 u1* dstPtr = pNewMap->data; 1746 1747 *dstPtr++ = addr & 0xff; 1748 if (newAddrWidth > 1) 1749 *dstPtr++ = (u1) (addr >> 8); 1750 1751 memcpy(dstPtr, srcPtr, regWidth); 1752 1753 int prevAddr = addr; 1754 const u1* prevBits = dstPtr; /* point at uncompressed data */ 1755 1756 dstPtr += regWidth; 1757 srcPtr += regWidth; 1758 1759 /* 1760 * Walk through, uncompressing one line at a time. 1761 */ 1762 int entry; 1763 for (entry = 1; entry < numEntries; entry++) { 1764 int addrDiff; 1765 u1 key; 1766 1767 key = *srcPtr++; 1768 1769 /* get the address */ 1770 if ((key & 0x07) == 7) { 1771 /* address diff follows in ULEB128 */ 1772 addrDiff = readUnsignedLeb128(&srcPtr); 1773 } else { 1774 addrDiff = (key & 0x07) +1; 1775 } 1776 1777 addr = prevAddr + addrDiff; 1778 *dstPtr++ = addr & 0xff; 1779 if (newAddrWidth > 1) 1780 *dstPtr++ = (u1) (addr >> 8); 1781 1782 /* unpack the bits */ 1783 if ((key & 0x08) != 0) { 1784 int bitCount = (key >> 4); 1785 if (bitCount == 0) { 1786 /* no bits changed, just copy previous */ 1787 memcpy(dstPtr, prevBits, regWidth); 1788 } else if (bitCount == 15) { 1789 /* full copy of bit vector is present; ignore prevBits */ 1790 memcpy(dstPtr, srcPtr, regWidth); 1791 srcPtr += regWidth; 1792 } else { 1793 /* copy previous bits and modify listed indices */ 1794 memcpy(dstPtr, prevBits, regWidth); 1795 while (bitCount--) { 1796 int bitIndex = readUnsignedLeb128(&srcPtr); 1797 toggleBit(dstPtr, bitIndex); 1798 } 1799 } 1800 } else { 1801 /* copy previous bits and modify the specified one */ 1802 memcpy(dstPtr, prevBits, regWidth); 1803 1804 /* one bit, from 0-15 inclusive, was changed */ 1805 toggleBit(dstPtr, key >> 4); 1806 } 1807 1808 prevAddr = addr; 1809 prevBits = dstPtr; 1810 dstPtr += regWidth; 1811 } 1812 1813 if (dstPtr - (u1*) pNewMap != newMapSize) { 1814 LOGE("ERROR: output %d bytes, expected %d", 1815 dstPtr - (u1*) pNewMap, newMapSize); 1816 free(pNewMap); 1817 return NULL; 1818 } 1819 1820 if (srcPtr - srcStart != expectedSrcLen) { 1821 LOGE("ERROR: consumed %d bytes, expected %d", 1822 srcPtr - srcStart, expectedSrcLen); 1823 free(pNewMap); 1824 return NULL; 1825 } 1826 1827 if (REGISTER_MAP_VERBOSE) { 1828 LOGD("Expansion successful (%d -> %d)", 1829 computeRegisterMapSize(pMap), computeRegisterMapSize(pNewMap)); 1830 } 1831 1832 return pNewMap; 1833 } 1834