Home | History | Annotate | Download | only in genrb
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2000-2015, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *
      9 * File reslist.cpp
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   02/21/00    weiv        Creation.
     15 *******************************************************************************
     16 */
     17 
     18 // Safer use of UnicodeString.
     19 #ifndef UNISTR_FROM_CHAR_EXPLICIT
     20 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
     21 #endif
     22 
     23 // Less important, but still a good idea.
     24 #ifndef UNISTR_FROM_STRING_EXPLICIT
     25 #   define UNISTR_FROM_STRING_EXPLICIT explicit
     26 #endif
     27 
     28 #include <assert.h>
     29 #include <stdio.h>
     30 #include "unicode/localpointer.h"
     31 #include "reslist.h"
     32 #include "unewdata.h"
     33 #include "unicode/ures.h"
     34 #include "unicode/putil.h"
     35 #include "errmsg.h"
     36 
     37 #include "uarrsort.h"
     38 #include "uelement.h"
     39 #include "uhash.h"
     40 #include "uinvchar.h"
     41 #include "ustr_imp.h"
     42 #include "unicode/utf16.h"
     43 /*
     44  * Align binary data at a 16-byte offset from the start of the resource bundle,
     45  * to be safe for any data type it may contain.
     46  */
     47 #define BIN_ALIGNMENT 16
     48 
     49 // This numeric constant must be at least 1.
     50 // If StringResource.fNumUnitsSaved == 0 then the string occurs only once,
     51 // and it makes no sense to move it to the pool bundle.
     52 // The larger the threshold for fNumUnitsSaved
     53 // the smaller the savings, and the smaller the pool bundle.
     54 // We trade some total size reduction to reduce the pool bundle a bit,
     55 // so that one can reasonably save data size by
     56 // removing bundle files without rebuilding the pool bundle.
     57 // This can also help to keep the pool and total (pool+local) string indexes
     58 // within 16 bits, that is, within range of Table16 and Array16 containers.
     59 #ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
     60 #   define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10
     61 #endif
     62 
     63 U_NAMESPACE_USE
     64 
     65 static UBool gIncludeCopyright = FALSE;
     66 static UBool gUsePoolBundle = FALSE;
     67 static UBool gIsDefaultFormatVersion = TRUE;
     68 static int32_t gFormatVersion = 3;
     69 
     70 /* How do we store string values? */
     71 enum {
     72     STRINGS_UTF16_V1,   /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
     73     STRINGS_UTF16_V2    /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */
     74 };
     75 
     76 static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40;  /* do not store the length explicitly for such strings */
     77 
     78 static const ResFile kNoPoolBundle;
     79 
     80 /*
     81  * res_none() returns the address of kNoResource,
     82  * for use in non-error cases when no resource is to be added to the bundle.
     83  * (NULL is used in error cases.)
     84  */
     85 static SResource kNoResource;  // TODO: const
     86 
     87 static UDataInfo dataInfo= {
     88     sizeof(UDataInfo),
     89     0,
     90 
     91     U_IS_BIG_ENDIAN,
     92     U_CHARSET_FAMILY,
     93     sizeof(UChar),
     94     0,
     95 
     96     {0x52, 0x65, 0x73, 0x42},     /* dataFormat="ResB" */
     97     {1, 3, 0, 0},                 /* formatVersion */
     98     {1, 4, 0, 0}                  /* dataVersion take a look at version inside parsed resb*/
     99 };
    100 
    101 static const UVersionInfo gFormatVersions[4] = {  /* indexed by a major-formatVersion integer */
    102     { 0, 0, 0, 0 },
    103     { 1, 3, 0, 0 },
    104     { 2, 0, 0, 0 },
    105     { 3, 0, 0, 0 }
    106 };
    107 // Remember to update genrb.h GENRB_VERSION when changing the data format.
    108 // (Or maybe we should remove GENRB_VERSION and report the ICU version number?)
    109 
    110 static uint8_t calcPadding(uint32_t size) {
    111     /* returns space we need to pad */
    112     return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0);
    113 
    114 }
    115 
    116 void setIncludeCopyright(UBool val){
    117     gIncludeCopyright=val;
    118 }
    119 
    120 UBool getIncludeCopyright(void){
    121     return gIncludeCopyright;
    122 }
    123 
    124 void setFormatVersion(int32_t formatVersion) {
    125     gIsDefaultFormatVersion = FALSE;
    126     gFormatVersion = formatVersion;
    127 }
    128 
    129 int32_t getFormatVersion() {
    130     return gFormatVersion;
    131 }
    132 
    133 void setUsePoolBundle(UBool use) {
    134     gUsePoolBundle = use;
    135 }
    136 
    137 // TODO: return const pointer, or find another way to express "none"
    138 struct SResource* res_none() {
    139     return &kNoResource;
    140 }
    141 
    142 SResource::SResource()
    143         : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1),
    144           line(0), fNext(NULL) {
    145     ustr_init(&fComment);
    146 }
    147 
    148 SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment,
    149                      UErrorCode &errorCode)
    150         : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1),
    151           fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1),
    152           line(0), fNext(NULL) {
    153     ustr_init(&fComment);
    154     if(comment != NULL) {
    155         ustr_cpy(&fComment, comment, &errorCode);
    156     }
    157 }
    158 
    159 SResource::~SResource() {
    160     ustr_deinit(&fComment);
    161 }
    162 
    163 ContainerResource::~ContainerResource() {
    164     SResource *current = fFirst;
    165     while (current != NULL) {
    166         SResource *next = current->fNext;
    167         delete current;
    168         current = next;
    169     }
    170 }
    171 
    172 TableResource::~TableResource() {}
    173 
    174 // TODO: clarify that containers adopt new items, even in error cases; use LocalPointer
    175 void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) {
    176     if (U_FAILURE(errorCode) || res == NULL || res == &kNoResource) {
    177         return;
    178     }
    179 
    180     /* remember this linenumber to report to the user if there is a duplicate key */
    181     res->line = linenumber;
    182 
    183     /* here we need to traverse the list */
    184     ++fCount;
    185 
    186     /* is the list still empty? */
    187     if (fFirst == NULL) {
    188         fFirst = res;
    189         res->fNext = NULL;
    190         return;
    191     }
    192 
    193     const char *resKeyString = fRoot->fKeys + res->fKey;
    194 
    195     SResource *current = fFirst;
    196 
    197     SResource *prev = NULL;
    198     while (current != NULL) {
    199         const char *currentKeyString = fRoot->fKeys + current->fKey;
    200         int diff;
    201         /*
    202          * formatVersion 1: compare key strings in native-charset order
    203          * formatVersion 2 and up: compare key strings in ASCII order
    204          */
    205         if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) {
    206             diff = uprv_strcmp(currentKeyString, resKeyString);
    207         } else {
    208             diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString);
    209         }
    210         if (diff < 0) {
    211             prev    = current;
    212             current = current->fNext;
    213         } else if (diff > 0) {
    214             /* we're either in front of the list, or in the middle */
    215             if (prev == NULL) {
    216                 /* front of the list */
    217                 fFirst = res;
    218             } else {
    219                 /* middle of the list */
    220                 prev->fNext = res;
    221             }
    222 
    223             res->fNext = current;
    224             return;
    225         } else {
    226             /* Key already exists! ERROR! */
    227             error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line);
    228             errorCode = U_UNSUPPORTED_ERROR;
    229             return;
    230         }
    231     }
    232 
    233     /* end of list */
    234     prev->fNext = res;
    235     res->fNext  = NULL;
    236 }
    237 
    238 ArrayResource::~ArrayResource() {}
    239 
    240 void ArrayResource::add(SResource *res) {
    241     if (res != NULL && res != &kNoResource) {
    242         if (fFirst == NULL) {
    243             fFirst = res;
    244         } else {
    245             fLast->fNext = res;
    246         }
    247         fLast = res;
    248         ++fCount;
    249     }
    250 }
    251 
    252 PseudoListResource::~PseudoListResource() {}
    253 
    254 void PseudoListResource::add(SResource *res) {
    255     if (res != NULL && res != &kNoResource) {
    256         res->fNext = fFirst;
    257         fFirst = res;
    258         ++fCount;
    259     }
    260 }
    261 
    262 StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
    263                                        const UChar *value, int32_t len,
    264                                        const UString* comment, UErrorCode &errorCode)
    265         : SResource(bundle, tag, type, comment, errorCode) {
    266     if (len == 0 && gFormatVersion > 1) {
    267         fRes = URES_MAKE_EMPTY_RESOURCE(type);
    268         fWritten = TRUE;
    269         return;
    270     }
    271 
    272     fString.setTo(value, len);
    273     fString.getTerminatedBuffer();  // Some code relies on NUL-termination.
    274     if (U_SUCCESS(errorCode) && fString.isBogus()) {
    275         errorCode = U_MEMORY_ALLOCATION_ERROR;
    276     }
    277 }
    278 
    279 StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type,
    280                                        const icu::UnicodeString &value, UErrorCode &errorCode)
    281         : SResource(bundle, NULL, type, NULL, errorCode), fString(value) {
    282     if (value.isEmpty() && gFormatVersion > 1) {
    283         fRes = URES_MAKE_EMPTY_RESOURCE(type);
    284         fWritten = TRUE;
    285         return;
    286     }
    287 
    288     fString.getTerminatedBuffer();  // Some code relies on NUL-termination.
    289     if (U_SUCCESS(errorCode) && fString.isBogus()) {
    290         errorCode = U_MEMORY_ALLOCATION_ERROR;
    291     }
    292 }
    293 
    294 // Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty.
    295 StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len,
    296                                        UErrorCode &errorCode)
    297         : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) {
    298     assert(len > 0);
    299     assert(!fString.isBogus());
    300 }
    301 
    302 StringBaseResource::~StringBaseResource() {}
    303 
    304 static int32_t U_CALLCONV
    305 string_hash(const UElement key) {
    306     const StringResource *res = static_cast<const StringResource *>(key.pointer);
    307     return res->fString.hashCode();
    308 }
    309 
    310 static UBool U_CALLCONV
    311 string_comp(const UElement key1, const UElement key2) {
    312     const StringResource *res1 = static_cast<const StringResource *>(key1.pointer);
    313     const StringResource *res2 = static_cast<const StringResource *>(key2.pointer);
    314     return res1->fString == res2->fString;
    315 }
    316 
    317 StringResource::~StringResource() {}
    318 
    319 AliasResource::~AliasResource() {}
    320 
    321 IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value,
    322                          const UString* comment, UErrorCode &errorCode)
    323         : SResource(bundle, tag, URES_INT, comment, errorCode) {
    324     fValue = value;
    325     fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET);
    326     fWritten = TRUE;
    327 }
    328 
    329 IntResource::~IntResource() {}
    330 
    331 IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag,
    332                   const UString* comment, UErrorCode &errorCode)
    333         : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode),
    334           fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) {
    335     if (fArray == NULL) {
    336         errorCode = U_MEMORY_ALLOCATION_ERROR;
    337         return;
    338     }
    339 }
    340 
    341 IntVectorResource::~IntVectorResource() {
    342     delete[] fArray;
    343 }
    344 
    345 void IntVectorResource::add(int32_t value, UErrorCode &errorCode) {
    346     if (U_SUCCESS(errorCode)) {
    347         fArray[fCount++] = value;
    348     }
    349 }
    350 
    351 BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag,
    352                                uint32_t length, uint8_t *data, const char* fileName,
    353                                const UString* comment, UErrorCode &errorCode)
    354         : SResource(bundle, tag, URES_BINARY, comment, errorCode),
    355           fLength(length), fData(NULL), fFileName(NULL) {
    356     if (U_FAILURE(errorCode)) {
    357         return;
    358     }
    359     if (fileName != NULL && *fileName != 0){
    360         fFileName = new char[uprv_strlen(fileName)+1];
    361         if (fFileName == NULL) {
    362             errorCode = U_MEMORY_ALLOCATION_ERROR;
    363             return;
    364         }
    365         uprv_strcpy(fFileName, fileName);
    366     }
    367     if (length > 0) {
    368         fData = new uint8_t[length];
    369         if (fData == NULL) {
    370             errorCode = U_MEMORY_ALLOCATION_ERROR;
    371             return;
    372         }
    373         uprv_memcpy(fData, data, length);
    374     } else {
    375         if (gFormatVersion > 1) {
    376             fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY);
    377             fWritten = TRUE;
    378         }
    379     }
    380 }
    381 
    382 BinaryResource::~BinaryResource() {
    383     delete[] fData;
    384     delete[] fFileName;
    385 }
    386 
    387 /* Writing Functions */
    388 
    389 void
    390 StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
    391                                        UErrorCode &errorCode) {
    392     assert(fSame == NULL);
    393     fSame = static_cast<StringResource *>(uhash_get(stringSet, this));
    394     if (fSame != NULL) {
    395         // This is a duplicate of a pool bundle string or of an earlier-visited string.
    396         if (++fSame->fNumCopies == 1) {
    397             assert(fSame->fWritten);
    398             int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes);
    399             if (poolStringIndex >= bundle->fPoolStringIndexLimit) {
    400                 bundle->fPoolStringIndexLimit = poolStringIndex + 1;
    401             }
    402         }
    403         return;
    404     }
    405     /* Put this string into the set for finding duplicates. */
    406     fNumCopies = 1;
    407     uhash_put(stringSet, this, this, &errorCode);
    408 
    409     if (bundle->fStringsForm != STRINGS_UTF16_V1) {
    410         int32_t len = length();
    411         if (len <= MAX_IMPLICIT_STRING_LENGTH &&
    412                 !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) {
    413             /*
    414              * This string will be stored without an explicit length.
    415              * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
    416              */
    417             fNumCharsForLength = 0;
    418         } else if (len <= 0x3ee) {
    419             fNumCharsForLength = 1;
    420         } else if (len <= 0xfffff) {
    421             fNumCharsForLength = 2;
    422         } else {
    423             fNumCharsForLength = 3;
    424         }
    425         bundle->f16BitStringsLength += fNumCharsForLength + len + 1;  /* +1 for the NUL */
    426     }
    427 }
    428 
    429 void
    430 ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
    431                                           UErrorCode &errorCode) {
    432     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    433         current->preflightStrings(bundle, stringSet, errorCode);
    434     }
    435 }
    436 
    437 void
    438 SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) {
    439     if (U_FAILURE(errorCode)) {
    440         return;
    441     }
    442     if (fRes != RES_BOGUS) {
    443         /*
    444          * The resource item word was already precomputed, which means
    445          * no further data needs to be written.
    446          * This might be an integer, or an empty string/binary/etc.
    447          */
    448         return;
    449     }
    450     handlePreflightStrings(bundle, stringSet, errorCode);
    451 }
    452 
    453 void
    454 SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/,
    455                                   UErrorCode & /*errorCode*/) {
    456     /* Neither a string nor a container. */
    457 }
    458 
    459 int32_t
    460 SRBRoot::makeRes16(uint32_t resWord) const {
    461     if (resWord == 0) {
    462         return 0;  /* empty string */
    463     }
    464     uint32_t type = RES_GET_TYPE(resWord);
    465     int32_t offset = (int32_t)RES_GET_OFFSET(resWord);
    466     if (type == URES_STRING_V2) {
    467         assert(offset > 0);
    468         if (offset < fPoolStringIndexLimit) {
    469             if (offset < fPoolStringIndex16Limit) {
    470                 return offset;
    471             }
    472         } else {
    473             offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit;
    474             if (offset <= 0xffff) {
    475                 return offset;
    476             }
    477         }
    478     }
    479     return -1;
    480 }
    481 
    482 int32_t
    483 SRBRoot::mapKey(int32_t oldpos) const {
    484     const KeyMapEntry *map = fKeyMap;
    485     if (map == NULL) {
    486         return oldpos;
    487     }
    488     int32_t i, start, limit;
    489 
    490     /* do a binary search for the old, pre-compactKeys() key offset */
    491     start = fUsePoolBundle->fKeysCount;
    492     limit = start + fKeysCount;
    493     while (start < limit - 1) {
    494         i = (start + limit) / 2;
    495         if (oldpos < map[i].oldpos) {
    496             limit = i;
    497         } else {
    498             start = i;
    499         }
    500     }
    501     assert(oldpos == map[start].oldpos);
    502     return map[start].newpos;
    503 }
    504 
    505 /*
    506  * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
    507  * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS
    508  * and exits early.
    509  */
    510 void
    511 StringResource::handleWrite16(SRBRoot * /*bundle*/) {
    512     SResource *same;
    513     if ((same = fSame) != NULL) {
    514         /* This is a duplicate. */
    515         assert(same->fRes != RES_BOGUS && same->fWritten);
    516         fRes = same->fRes;
    517         fWritten = same->fWritten;
    518     }
    519 }
    520 
    521 void
    522 ContainerResource::writeAllRes16(SRBRoot *bundle) {
    523     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    524         bundle->f16BitUnits.append((UChar)current->fRes16);
    525     }
    526     fWritten = TRUE;
    527 }
    528 
    529 void
    530 ArrayResource::handleWrite16(SRBRoot *bundle) {
    531     if (fCount == 0 && gFormatVersion > 1) {
    532         fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY);
    533         fWritten = TRUE;
    534         return;
    535     }
    536 
    537     int32_t res16 = 0;
    538     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    539         current->write16(bundle);
    540         res16 |= current->fRes16;
    541     }
    542     if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
    543         fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length());
    544         bundle->f16BitUnits.append((UChar)fCount);
    545         writeAllRes16(bundle);
    546     }
    547 }
    548 
    549 void
    550 TableResource::handleWrite16(SRBRoot *bundle) {
    551     if (fCount == 0 && gFormatVersion > 1) {
    552         fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
    553         fWritten = TRUE;
    554         return;
    555     }
    556     /* Find the smallest table type that fits the data. */
    557     int32_t key16 = 0;
    558     int32_t res16 = 0;
    559     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    560         current->write16(bundle);
    561         key16 |= current->fKey16;
    562         res16 |= current->fRes16;
    563     }
    564     if(fCount > (uint32_t)bundle->fMaxTableLength) {
    565         bundle->fMaxTableLength = fCount;
    566     }
    567     if (fCount <= 0xffff && key16 >= 0) {
    568         if (res16 >= 0 && gFormatVersion > 1) {
    569             /* 16-bit count, key offsets and values */
    570             fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length());
    571             bundle->f16BitUnits.append((UChar)fCount);
    572             for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    573                 bundle->f16BitUnits.append((UChar)current->fKey16);
    574             }
    575             writeAllRes16(bundle);
    576         } else {
    577             /* 16-bit count, 16-bit key offsets, 32-bit values */
    578             fTableType = URES_TABLE;
    579         }
    580     } else {
    581         /* 32-bit count, key offsets and values */
    582         fTableType = URES_TABLE32;
    583     }
    584 }
    585 
    586 void
    587 PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) {
    588     fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
    589     fWritten = TRUE;
    590 }
    591 
    592 void
    593 SResource::write16(SRBRoot *bundle) {
    594     if (fKey >= 0) {
    595         // A tagged resource has a non-negative key index into the parsed key strings.
    596         // compactKeys() built a map from parsed key index to the final key index.
    597         // After the mapping, negative key indexes are used for shared pool bundle keys.
    598         fKey = bundle->mapKey(fKey);
    599         // If the key index fits into a Key16 for a Table or Table16,
    600         // then set the fKey16 field accordingly.
    601         // Otherwise keep it at -1.
    602         if (fKey >= 0) {
    603             if (fKey < bundle->fLocalKeyLimit) {
    604                 fKey16 = fKey;
    605             }
    606         } else {
    607             int32_t poolKeyIndex = fKey & 0x7fffffff;
    608             if (poolKeyIndex <= 0xffff) {
    609                 poolKeyIndex += bundle->fLocalKeyLimit;
    610                 if (poolKeyIndex <= 0xffff) {
    611                     fKey16 = poolKeyIndex;
    612                 }
    613             }
    614         }
    615     }
    616     /*
    617      * fRes != RES_BOGUS:
    618      * The resource item word was already precomputed, which means
    619      * no further data needs to be written.
    620      * This might be an integer, or an empty or UTF-16 v2 string,
    621      * an empty binary, etc.
    622      */
    623     if (fRes == RES_BOGUS) {
    624         handleWrite16(bundle);
    625     }
    626     // Compute fRes16 for precomputed as well as just-computed fRes.
    627     fRes16 = bundle->makeRes16(fRes);
    628 }
    629 
    630 void
    631 SResource::handleWrite16(SRBRoot * /*bundle*/) {
    632     /* Only a few resource types write 16-bit units. */
    633 }
    634 
    635 /*
    636  * Only called for UTF-16 v1 strings, and for aliases.
    637  * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS
    638  * and exits early.
    639  */
    640 void
    641 StringBaseResource::handlePreWrite(uint32_t *byteOffset) {
    642     /* Write the UTF-16 v1 string. */
    643     fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2);
    644     *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR;
    645 }
    646 
    647 void
    648 IntVectorResource::handlePreWrite(uint32_t *byteOffset) {
    649     if (fCount == 0 && gFormatVersion > 1) {
    650         fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR);
    651         fWritten = TRUE;
    652     } else {
    653         fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2);
    654         *byteOffset += (1 + fCount) * 4;
    655     }
    656 }
    657 
    658 void
    659 BinaryResource::handlePreWrite(uint32_t *byteOffset) {
    660     uint32_t pad       = 0;
    661     uint32_t dataStart = *byteOffset + sizeof(fLength);
    662 
    663     if (dataStart % BIN_ALIGNMENT) {
    664         pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
    665         *byteOffset += pad;  /* pad == 4 or 8 or 12 */
    666     }
    667     fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2);
    668     *byteOffset += 4 + fLength;
    669 }
    670 
    671 void
    672 ContainerResource::preWriteAllRes(uint32_t *byteOffset) {
    673     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    674         current->preWrite(byteOffset);
    675     }
    676 }
    677 
    678 void
    679 ArrayResource::handlePreWrite(uint32_t *byteOffset) {
    680     preWriteAllRes(byteOffset);
    681     fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2);
    682     *byteOffset += (1 + fCount) * 4;
    683 }
    684 
    685 void
    686 TableResource::handlePreWrite(uint32_t *byteOffset) {
    687     preWriteAllRes(byteOffset);
    688     if (fTableType == URES_TABLE) {
    689         /* 16-bit count, 16-bit key offsets, 32-bit values */
    690         fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2);
    691         *byteOffset += 2 + fCount * 6;
    692     } else {
    693         /* 32-bit count, key offsets and values */
    694         fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2);
    695         *byteOffset += 4 + fCount * 8;
    696     }
    697 }
    698 
    699 void
    700 SResource::preWrite(uint32_t *byteOffset) {
    701     if (fRes != RES_BOGUS) {
    702         /*
    703          * The resource item word was already precomputed, which means
    704          * no further data needs to be written.
    705          * This might be an integer, or an empty or UTF-16 v2 string,
    706          * an empty binary, etc.
    707          */
    708         return;
    709     }
    710     handlePreWrite(byteOffset);
    711     *byteOffset += calcPadding(*byteOffset);
    712 }
    713 
    714 void
    715 SResource::handlePreWrite(uint32_t * /*byteOffset*/) {
    716     assert(FALSE);
    717 }
    718 
    719 /*
    720  * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings,
    721  * write() sees fWritten and exits early.
    722  */
    723 void
    724 StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    725     /* Write the UTF-16 v1 string. */
    726     int32_t len = length();
    727     udata_write32(mem, len);
    728     udata_writeUString(mem, getBuffer(), len + 1);
    729     *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR;
    730     fWritten = TRUE;
    731 }
    732 
    733 void
    734 ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) {
    735     uint32_t i = 0;
    736     for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) {
    737         current->write(mem, byteOffset);
    738     }
    739     assert(i == fCount);
    740 }
    741 
    742 void
    743 ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) {
    744     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    745         udata_write32(mem, current->fRes);
    746     }
    747     *byteOffset += fCount * 4;
    748 }
    749 
    750 void
    751 ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    752     writeAllRes(mem, byteOffset);
    753     udata_write32(mem, fCount);
    754     *byteOffset += 4;
    755     writeAllRes32(mem, byteOffset);
    756 }
    757 
    758 void
    759 IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    760     udata_write32(mem, fCount);
    761     for(uint32_t i = 0; i < fCount; ++i) {
    762       udata_write32(mem, fArray[i]);
    763     }
    764     *byteOffset += (1 + fCount) * 4;
    765 }
    766 
    767 void
    768 BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    769     uint32_t pad       = 0;
    770     uint32_t dataStart = *byteOffset + sizeof(fLength);
    771 
    772     if (dataStart % BIN_ALIGNMENT) {
    773         pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
    774         udata_writePadding(mem, pad);  /* pad == 4 or 8 or 12 */
    775         *byteOffset += pad;
    776     }
    777 
    778     udata_write32(mem, fLength);
    779     if (fLength > 0) {
    780         udata_writeBlock(mem, fData, fLength);
    781     }
    782     *byteOffset += 4 + fLength;
    783 }
    784 
    785 void
    786 TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    787     writeAllRes(mem, byteOffset);
    788     if(fTableType == URES_TABLE) {
    789         udata_write16(mem, (uint16_t)fCount);
    790         for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    791             udata_write16(mem, current->fKey16);
    792         }
    793         *byteOffset += (1 + fCount)* 2;
    794         if ((fCount & 1) == 0) {
    795             /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */
    796             udata_writePadding(mem, 2);
    797             *byteOffset += 2;
    798         }
    799     } else /* URES_TABLE32 */ {
    800         udata_write32(mem, fCount);
    801         for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    802             udata_write32(mem, (uint32_t)current->fKey);
    803         }
    804         *byteOffset += (1 + fCount)* 4;
    805     }
    806     writeAllRes32(mem, byteOffset);
    807 }
    808 
    809 void
    810 SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) {
    811     if (fWritten) {
    812         assert(fRes != RES_BOGUS);
    813         return;
    814     }
    815     handleWrite(mem, byteOffset);
    816     uint8_t paddingSize = calcPadding(*byteOffset);
    817     if (paddingSize > 0) {
    818         udata_writePadding(mem, paddingSize);
    819         *byteOffset += paddingSize;
    820     }
    821     fWritten = TRUE;
    822 }
    823 
    824 void
    825 SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) {
    826     assert(FALSE);
    827 }
    828 
    829 void SRBRoot::write(const char *outputDir, const char *outputPkg,
    830                     char *writtenFilename, int writtenFilenameLen,
    831                     UErrorCode &errorCode) {
    832     UNewDataMemory *mem        = NULL;
    833     uint32_t        byteOffset = 0;
    834     uint32_t        top, size;
    835     char            dataName[1024];
    836     int32_t         indexes[URES_INDEX_TOP];
    837 
    838     compactKeys(errorCode);
    839     /*
    840      * Add padding bytes to fKeys so that fKeysTop is 4-aligned.
    841      * Safe because the capacity is a multiple of 4.
    842      */
    843     while (fKeysTop & 3) {
    844         fKeys[fKeysTop++] = (char)0xaa;
    845     }
    846     /*
    847      * In URES_TABLE, use all local key offsets that fit into 16 bits,
    848      * and use the remaining 16-bit offsets for pool key offsets
    849      * if there are any.
    850      * If there are no local keys, then use the whole 16-bit space
    851      * for pool key offsets.
    852      * Note: This cannot be changed without changing the major formatVersion.
    853      */
    854     if (fKeysBottom < fKeysTop) {
    855         if (fKeysTop <= 0x10000) {
    856             fLocalKeyLimit = fKeysTop;
    857         } else {
    858             fLocalKeyLimit = 0x10000;
    859         }
    860     } else {
    861         fLocalKeyLimit = 0;
    862     }
    863 
    864     UHashtable *stringSet;
    865     if (gFormatVersion > 1) {
    866         stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode);
    867         if (U_SUCCESS(errorCode) &&
    868                 fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) {
    869             for (SResource *current = fUsePoolBundle->fStrings->fFirst;
    870                     current != NULL;
    871                     current = current->fNext) {
    872                 StringResource *sr = static_cast<StringResource *>(current);
    873                 sr->fNumCopies = 0;
    874                 sr->fNumUnitsSaved = 0;
    875                 uhash_put(stringSet, sr, sr, &errorCode);
    876             }
    877         }
    878         fRoot->preflightStrings(this, stringSet, errorCode);
    879     } else {
    880         stringSet = NULL;
    881     }
    882     if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) {
    883         compactStringsV2(stringSet, errorCode);
    884     }
    885     uhash_close(stringSet);
    886     if (U_FAILURE(errorCode)) {
    887         return;
    888     }
    889 
    890     int32_t formatVersion = gFormatVersion;
    891     if (fPoolStringIndexLimit != 0) {
    892         int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit;
    893         if ((sum - 1) > RES_MAX_OFFSET) {
    894             errorCode = U_BUFFER_OVERFLOW_ERROR;
    895             return;
    896         }
    897         if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) {
    898             // 16-bit indexes work for all pool + local strings.
    899             fPoolStringIndex16Limit = fPoolStringIndexLimit;
    900         } else {
    901             // Set the pool index threshold so that 16-bit indexes work
    902             // for some pool strings and some local strings.
    903             fPoolStringIndex16Limit = (int32_t)(
    904                     ((int64_t)fPoolStringIndexLimit * 0xffff) / sum);
    905         }
    906     } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) {
    907         // If we just default to formatVersion 3
    908         // but there are no pool bundle strings to share
    909         // and we do not write a pool bundle,
    910         // then write formatVersion 2 which is just as good.
    911         formatVersion = 2;
    912     }
    913 
    914     fRoot->write16(this);
    915     if (f16BitUnits.isBogus()) {
    916         errorCode = U_MEMORY_ALLOCATION_ERROR;
    917         return;
    918     }
    919     if (f16BitUnits.length() & 1) {
    920         f16BitUnits.append((UChar)0xaaaa);  /* pad to multiple of 4 bytes */
    921     }
    922     /* all keys have been mapped */
    923     uprv_free(fKeyMap);
    924     fKeyMap = NULL;
    925 
    926     byteOffset = fKeysTop + f16BitUnits.length() * 2;
    927     fRoot->preWrite(&byteOffset);
    928 
    929     /* total size including the root item */
    930     top = byteOffset;
    931 
    932     if (writtenFilename && writtenFilenameLen) {
    933         *writtenFilename = 0;
    934     }
    935 
    936     if (writtenFilename) {
    937        int32_t off = 0, len = 0;
    938        if (outputDir) {
    939            len = (int32_t)uprv_strlen(outputDir);
    940            if (len > writtenFilenameLen) {
    941                len = writtenFilenameLen;
    942            }
    943            uprv_strncpy(writtenFilename, outputDir, len);
    944        }
    945        if (writtenFilenameLen -= len) {
    946            off += len;
    947            writtenFilename[off] = U_FILE_SEP_CHAR;
    948            if (--writtenFilenameLen) {
    949                ++off;
    950                if(outputPkg != NULL)
    951                {
    952                    uprv_strcpy(writtenFilename+off, outputPkg);
    953                    off += (int32_t)uprv_strlen(outputPkg);
    954                    writtenFilename[off] = '_';
    955                    ++off;
    956                }
    957 
    958                len = (int32_t)uprv_strlen(fLocale);
    959                if (len > writtenFilenameLen) {
    960                    len = writtenFilenameLen;
    961                }
    962                uprv_strncpy(writtenFilename + off, fLocale, len);
    963                if (writtenFilenameLen -= len) {
    964                    off += len;
    965                    len = 5;
    966                    if (len > writtenFilenameLen) {
    967                        len = writtenFilenameLen;
    968                    }
    969                    uprv_strncpy(writtenFilename +  off, ".res", len);
    970                }
    971            }
    972        }
    973     }
    974 
    975     if(outputPkg)
    976     {
    977         uprv_strcpy(dataName, outputPkg);
    978         uprv_strcat(dataName, "_");
    979         uprv_strcat(dataName, fLocale);
    980     }
    981     else
    982     {
    983         uprv_strcpy(dataName, fLocale);
    984     }
    985 
    986     uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo));
    987 
    988     mem = udata_create(outputDir, "res", dataName,
    989                        &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, &errorCode);
    990     if(U_FAILURE(errorCode)){
    991         return;
    992     }
    993 
    994     /* write the root item */
    995     udata_write32(mem, fRoot->fRes);
    996 
    997     /*
    998      * formatVersion 1.1 (ICU 2.8):
    999      * write int32_t indexes[] after root and before the key strings
   1000      * to make it easier to parse resource bundles in icuswap or from Java etc.
   1001      */
   1002     uprv_memset(indexes, 0, sizeof(indexes));
   1003     indexes[URES_INDEX_LENGTH]=             fIndexLength;
   1004     indexes[URES_INDEX_KEYS_TOP]=           fKeysTop>>2;
   1005     indexes[URES_INDEX_RESOURCES_TOP]=      (int32_t)(top>>2);
   1006     indexes[URES_INDEX_BUNDLE_TOP]=         indexes[URES_INDEX_RESOURCES_TOP];
   1007     indexes[URES_INDEX_MAX_TABLE_LENGTH]=   fMaxTableLength;
   1008 
   1009     /*
   1010      * formatVersion 1.2 (ICU 3.6):
   1011      * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set
   1012      * the memset() above initialized all indexes[] to 0
   1013      */
   1014     if (fNoFallback) {
   1015         indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK;
   1016     }
   1017     /*
   1018      * formatVersion 2.0 (ICU 4.4):
   1019      * more compact string value storage, optional pool bundle
   1020      */
   1021     if (URES_INDEX_16BIT_TOP < fIndexLength) {
   1022         indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1);
   1023     }
   1024     if (URES_INDEX_POOL_CHECKSUM < fIndexLength) {
   1025         if (fIsPoolBundle) {
   1026             indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK;
   1027             uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom),
   1028                                            (uint32_t)(fKeysTop - fKeysBottom), 0);
   1029             if (f16BitUnits.length() <= 1) {
   1030                 // no pool strings to checksum
   1031             } else if (U_IS_BIG_ENDIAN) {
   1032                 checksum = computeCRC((const char *)f16BitUnits.getBuffer(),
   1033                                       (uint32_t)f16BitUnits.length() * 2, checksum);
   1034             } else {
   1035                 // Swap to big-endian so we get the same checksum on all platforms
   1036                 // (except for charset family, due to the key strings).
   1037                 UnicodeString s(f16BitUnits);
   1038                 s.append((UChar)1);  // Ensure that we own this buffer.
   1039                 assert(!s.isBogus());
   1040                 uint16_t *p = (uint16_t *)s.getBuffer();
   1041                 for (int32_t count = f16BitUnits.length(); count > 0; --count) {
   1042                     uint16_t x = *p;
   1043                     *p++ = (uint16_t)((x << 8) | (x >> 8));
   1044                 }
   1045                 checksum = computeCRC((const char *)p,
   1046                                       (uint32_t)f16BitUnits.length() * 2, checksum);
   1047             }
   1048             indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum;
   1049         } else if (gUsePoolBundle) {
   1050             indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE;
   1051             indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum;
   1052         }
   1053     }
   1054     // formatVersion 3 (ICU 56):
   1055     // share string values via pool bundle strings
   1056     indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8;  // bits 23..0 -> 31..8
   1057     indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000;  // bits 27..24 -> 15..12
   1058     indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16;
   1059 
   1060     /* write the indexes[] */
   1061     udata_writeBlock(mem, indexes, fIndexLength*4);
   1062 
   1063     /* write the table key strings */
   1064     udata_writeBlock(mem, fKeys+fKeysBottom,
   1065                           fKeysTop-fKeysBottom);
   1066 
   1067     /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */
   1068     udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2);
   1069 
   1070     /* write all of the bundle contents: the root item and its children */
   1071     byteOffset = fKeysTop + f16BitUnits.length() * 2;
   1072     fRoot->write(mem, &byteOffset);
   1073     assert(byteOffset == top);
   1074 
   1075     size = udata_finish(mem, &errorCode);
   1076     if(top != size) {
   1077         fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n",
   1078                 (int)size, (int)top);
   1079         errorCode = U_INTERNAL_PROGRAM_ERROR;
   1080     }
   1081 }
   1082 
   1083 /* Opening Functions */
   1084 
   1085 TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
   1086     LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status);
   1087     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1088 }
   1089 
   1090 ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
   1091     LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status);
   1092     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1093 }
   1094 
   1095 struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
   1096     LocalPointer<SResource> res(
   1097             new StringResource(bundle, tag, value, len, comment, *status), *status);
   1098     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1099 }
   1100 
   1101 struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
   1102     LocalPointer<SResource> res(
   1103             new AliasResource(bundle, tag, value, len, comment, *status), *status);
   1104     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1105 }
   1106 
   1107 IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
   1108     LocalPointer<IntVectorResource> res(
   1109             new IntVectorResource(bundle, tag, comment, *status), *status);
   1110     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1111 }
   1112 
   1113 struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) {
   1114     LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status);
   1115     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1116 }
   1117 
   1118 struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) {
   1119     LocalPointer<SResource> res(
   1120             new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status);
   1121     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1122 }
   1123 
   1124 SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode)
   1125         : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE),
   1126           fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle),
   1127           fKeys(NULL), fKeyMap(NULL),
   1128           fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0),
   1129           f16BitUnits(), f16BitStringsLength(0),
   1130           fUsePoolBundle(&kNoPoolBundle),
   1131           fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
   1132           fWritePoolBundle(NULL) {
   1133     if (U_FAILURE(errorCode)) {
   1134         return;
   1135     }
   1136 
   1137     if (gFormatVersion > 1) {
   1138         // f16BitUnits must start with a zero for empty resources.
   1139         // We might be able to omit it if there are no empty 16-bit resources.
   1140         f16BitUnits.append((UChar)0);
   1141     }
   1142 
   1143     fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
   1144     if (isPoolBundle) {
   1145         fRoot = new PseudoListResource(this, errorCode);
   1146     } else {
   1147         fRoot = new TableResource(this, NULL, comment, errorCode);
   1148     }
   1149     if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) {
   1150         if (U_SUCCESS(errorCode)) {
   1151             errorCode = U_MEMORY_ALLOCATION_ERROR;
   1152         }
   1153         return;
   1154     }
   1155 
   1156     fKeysCapacity = KEY_SPACE_SIZE;
   1157     /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */
   1158     if (gUsePoolBundle || isPoolBundle) {
   1159         fIndexLength = URES_INDEX_POOL_CHECKSUM + 1;
   1160     } else if (gFormatVersion >= 2) {
   1161         fIndexLength = URES_INDEX_16BIT_TOP + 1;
   1162     } else /* formatVersion 1 */ {
   1163         fIndexLength = URES_INDEX_ATTRIBUTES + 1;
   1164     }
   1165     fKeysBottom = (1 /* root */ + fIndexLength) * 4;
   1166     uprv_memset(fKeys, 0, fKeysBottom);
   1167     fKeysTop = fKeysBottom;
   1168 
   1169     if (gFormatVersion == 1) {
   1170         fStringsForm = STRINGS_UTF16_V1;
   1171     } else {
   1172         fStringsForm = STRINGS_UTF16_V2;
   1173     }
   1174 }
   1175 
   1176 /* Closing Functions */
   1177 
   1178 void res_close(struct SResource *res) {
   1179     delete res;
   1180 }
   1181 
   1182 SRBRoot::~SRBRoot() {
   1183     delete fRoot;
   1184     uprv_free(fLocale);
   1185     uprv_free(fKeys);
   1186     uprv_free(fKeyMap);
   1187 }
   1188 
   1189 /* Misc Functions */
   1190 
   1191 void SRBRoot::setLocale(UChar *locale, UErrorCode &errorCode) {
   1192     if(U_FAILURE(errorCode)) {
   1193         return;
   1194     }
   1195 
   1196     uprv_free(fLocale);
   1197     fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1));
   1198     if(fLocale == NULL) {
   1199         errorCode = U_MEMORY_ALLOCATION_ERROR;
   1200         return;
   1201     }
   1202 
   1203     u_UCharsToChars(locale, fLocale, u_strlen(locale)+1);
   1204 }
   1205 
   1206 const char *
   1207 SRBRoot::getKeyString(int32_t key) const {
   1208     if (key < 0) {
   1209         return fUsePoolBundle->fKeys + (key & 0x7fffffff);
   1210     } else {
   1211         return fKeys + key;
   1212     }
   1213 }
   1214 
   1215 const char *
   1216 SResource::getKeyString(const SRBRoot *bundle) const {
   1217     if (fKey == -1) {
   1218         return NULL;
   1219     }
   1220     return bundle->getKeyString(fKey);
   1221 }
   1222 
   1223 const char *
   1224 SRBRoot::getKeyBytes(int32_t *pLength) const {
   1225     *pLength = fKeysTop - fKeysBottom;
   1226     return fKeys + fKeysBottom;
   1227 }
   1228 
   1229 int32_t
   1230 SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) {
   1231     int32_t keypos;
   1232 
   1233     if (U_FAILURE(errorCode)) {
   1234         return -1;
   1235     }
   1236     if (length < 0 || (keyBytes == NULL && length != 0)) {
   1237         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1238         return -1;
   1239     }
   1240     if (length == 0) {
   1241         return fKeysTop;
   1242     }
   1243 
   1244     keypos = fKeysTop;
   1245     fKeysTop += length;
   1246     if (fKeysTop >= fKeysCapacity) {
   1247         /* overflow - resize the keys buffer */
   1248         fKeysCapacity += KEY_SPACE_SIZE;
   1249         fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity));
   1250         if(fKeys == NULL) {
   1251             errorCode = U_MEMORY_ALLOCATION_ERROR;
   1252             return -1;
   1253         }
   1254     }
   1255 
   1256     uprv_memcpy(fKeys + keypos, keyBytes, length);
   1257 
   1258     return keypos;
   1259 }
   1260 
   1261 int32_t
   1262 SRBRoot::addTag(const char *tag, UErrorCode &errorCode) {
   1263     int32_t keypos;
   1264 
   1265     if (U_FAILURE(errorCode)) {
   1266         return -1;
   1267     }
   1268 
   1269     if (tag == NULL) {
   1270         /* no error: the root table and array items have no keys */
   1271         return -1;
   1272     }
   1273 
   1274     keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode);
   1275     if (U_SUCCESS(errorCode)) {
   1276         ++fKeysCount;
   1277     }
   1278     return keypos;
   1279 }
   1280 
   1281 static int32_t
   1282 compareInt32(int32_t lPos, int32_t rPos) {
   1283     /*
   1284      * Compare possibly-negative key offsets. Don't just return lPos - rPos
   1285      * because that is prone to negative-integer underflows.
   1286      */
   1287     if (lPos < rPos) {
   1288         return -1;
   1289     } else if (lPos > rPos) {
   1290         return 1;
   1291     } else {
   1292         return 0;
   1293     }
   1294 }
   1295 
   1296 static int32_t U_CALLCONV
   1297 compareKeySuffixes(const void *context, const void *l, const void *r) {
   1298     const struct SRBRoot *bundle=(const struct SRBRoot *)context;
   1299     int32_t lPos = ((const KeyMapEntry *)l)->oldpos;
   1300     int32_t rPos = ((const KeyMapEntry *)r)->oldpos;
   1301     const char *lStart = bundle->getKeyString(lPos);
   1302     const char *lLimit = lStart;
   1303     const char *rStart = bundle->getKeyString(rPos);
   1304     const char *rLimit = rStart;
   1305     int32_t diff;
   1306     while (*lLimit != 0) { ++lLimit; }
   1307     while (*rLimit != 0) { ++rLimit; }
   1308     /* compare keys in reverse character order */
   1309     while (lStart < lLimit && rStart < rLimit) {
   1310         diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit;
   1311         if (diff != 0) {
   1312             return diff;
   1313         }
   1314     }
   1315     /* sort equal suffixes by descending key length */
   1316     diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart);
   1317     if (diff != 0) {
   1318         return diff;
   1319     }
   1320     /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */
   1321     return compareInt32(lPos, rPos);
   1322 }
   1323 
   1324 static int32_t U_CALLCONV
   1325 compareKeyNewpos(const void * /*context*/, const void *l, const void *r) {
   1326     return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos);
   1327 }
   1328 
   1329 static int32_t U_CALLCONV
   1330 compareKeyOldpos(const void * /*context*/, const void *l, const void *r) {
   1331     return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
   1332 }
   1333 
   1334 void
   1335 SRBRoot::compactKeys(UErrorCode &errorCode) {
   1336     KeyMapEntry *map;
   1337     char *keys;
   1338     int32_t i;
   1339     int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount;
   1340     if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) {
   1341         return;
   1342     }
   1343     map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry));
   1344     if (map == NULL) {
   1345         errorCode = U_MEMORY_ALLOCATION_ERROR;
   1346         return;
   1347     }
   1348     keys = (char *)fUsePoolBundle->fKeys;
   1349     for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) {
   1350         map[i].oldpos =
   1351             (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000;  /* negative oldpos */
   1352         map[i].newpos = 0;
   1353         while (*keys != 0) { ++keys; }  /* skip the key */
   1354         ++keys;  /* skip the NUL */
   1355     }
   1356     keys = fKeys + fKeysBottom;
   1357     for (; i < keysCount; ++i) {
   1358         map[i].oldpos = (int32_t)(keys - fKeys);
   1359         map[i].newpos = 0;
   1360         while (*keys != 0) { ++keys; }  /* skip the key */
   1361         ++keys;  /* skip the NUL */
   1362     }
   1363     /* Sort the keys so that each one is immediately followed by all of its suffixes. */
   1364     uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
   1365                    compareKeySuffixes, this, FALSE, &errorCode);
   1366     /*
   1367      * Make suffixes point into earlier, longer strings that contain them
   1368      * and mark the old, now unused suffix bytes as deleted.
   1369      */
   1370     if (U_SUCCESS(errorCode)) {
   1371         keys = fKeys;
   1372         for (i = 0; i < keysCount;) {
   1373             /*
   1374              * This key is not a suffix of the previous one;
   1375              * keep this one and delete the following ones that are
   1376              * suffixes of this one.
   1377              */
   1378             const char *key;
   1379             const char *keyLimit;
   1380             int32_t j = i + 1;
   1381             map[i].newpos = map[i].oldpos;
   1382             if (j < keysCount && map[j].oldpos < 0) {
   1383                 /* Key string from the pool bundle, do not delete. */
   1384                 i = j;
   1385                 continue;
   1386             }
   1387             key = getKeyString(map[i].oldpos);
   1388             for (keyLimit = key; *keyLimit != 0; ++keyLimit) {}
   1389             for (; j < keysCount && map[j].oldpos >= 0; ++j) {
   1390                 const char *k;
   1391                 char *suffix;
   1392                 const char *suffixLimit;
   1393                 int32_t offset;
   1394                 suffix = keys + map[j].oldpos;
   1395                 for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {}
   1396                 offset = (int32_t)(keyLimit - key) - (suffixLimit - suffix);
   1397                 if (offset < 0) {
   1398                     break;  /* suffix cannot be longer than the original */
   1399                 }
   1400                 /* Is it a suffix of the earlier, longer key? */
   1401                 for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
   1402                 if (suffix == suffixLimit && *k == *suffixLimit) {
   1403                     map[j].newpos = map[i].oldpos + offset;  /* yes, point to the earlier key */
   1404                     /* mark the suffix as deleted */
   1405                     while (*suffix != 0) { *suffix++ = 1; }
   1406                     *suffix = 1;
   1407                 } else {
   1408                     break;  /* not a suffix, restart from here */
   1409                 }
   1410             }
   1411             i = j;
   1412         }
   1413         /*
   1414          * Re-sort by newpos, then modify the key characters array in-place
   1415          * to squeeze out unused bytes, and readjust the newpos offsets.
   1416          */
   1417         uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
   1418                        compareKeyNewpos, NULL, FALSE, &errorCode);
   1419         if (U_SUCCESS(errorCode)) {
   1420             int32_t oldpos, newpos, limit;
   1421             oldpos = newpos = fKeysBottom;
   1422             limit = fKeysTop;
   1423             /* skip key offsets that point into the pool bundle rather than this new bundle */
   1424             for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {}
   1425             if (i < keysCount) {
   1426                 while (oldpos < limit) {
   1427                     if (keys[oldpos] == 1) {
   1428                         ++oldpos;  /* skip unused bytes */
   1429                     } else {
   1430                         /* adjust the new offsets for keys starting here */
   1431                         while (i < keysCount && map[i].newpos == oldpos) {
   1432                             map[i++].newpos = newpos;
   1433                         }
   1434                         /* move the key characters to their new position */
   1435                         keys[newpos++] = keys[oldpos++];
   1436                     }
   1437                 }
   1438                 assert(i == keysCount);
   1439             }
   1440             fKeysTop = newpos;
   1441             /* Re-sort once more, by old offsets for binary searching. */
   1442             uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
   1443                            compareKeyOldpos, NULL, FALSE, &errorCode);
   1444             if (U_SUCCESS(errorCode)) {
   1445                 /* key size reduction by limit - newpos */
   1446                 fKeyMap = map;
   1447                 map = NULL;
   1448             }
   1449         }
   1450     }
   1451     uprv_free(map);
   1452 }
   1453 
   1454 static int32_t U_CALLCONV
   1455 compareStringSuffixes(const void * /*context*/, const void *l, const void *r) {
   1456     const StringResource *left = *((const StringResource **)l);
   1457     const StringResource *right = *((const StringResource **)r);
   1458     const UChar *lStart = left->getBuffer();
   1459     const UChar *lLimit = lStart + left->length();
   1460     const UChar *rStart = right->getBuffer();
   1461     const UChar *rLimit = rStart + right->length();
   1462     int32_t diff;
   1463     /* compare keys in reverse character order */
   1464     while (lStart < lLimit && rStart < rLimit) {
   1465         diff = (int32_t)*--lLimit - (int32_t)*--rLimit;
   1466         if (diff != 0) {
   1467             return diff;
   1468         }
   1469     }
   1470     /* sort equal suffixes by descending string length */
   1471     return right->length() - left->length();
   1472 }
   1473 
   1474 static int32_t U_CALLCONV
   1475 compareStringLengths(const void * /*context*/, const void *l, const void *r) {
   1476     const StringResource *left = *((const StringResource **)l);
   1477     const StringResource *right = *((const StringResource **)r);
   1478     int32_t diff;
   1479     /* Make "is suffix of another string" compare greater than a non-suffix. */
   1480     diff = (int)(left->fSame != NULL) - (int)(right->fSame != NULL);
   1481     if (diff != 0) {
   1482         return diff;
   1483     }
   1484     /* sort by ascending string length */
   1485     diff = left->length() - right->length();
   1486     if (diff != 0) {
   1487         return diff;
   1488     }
   1489     // sort by descending size reduction
   1490     diff = right->fNumUnitsSaved - left->fNumUnitsSaved;
   1491     if (diff != 0) {
   1492         return diff;
   1493     }
   1494     // sort lexically
   1495     return left->fString.compare(right->fString);
   1496 }
   1497 
   1498 void
   1499 StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) {
   1500     int32_t len = length();
   1501     fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length());
   1502     fWritten = TRUE;
   1503     switch(fNumCharsForLength) {
   1504     case 0:
   1505         break;
   1506     case 1:
   1507         dest.append((UChar)(0xdc00 + len));
   1508         break;
   1509     case 2:
   1510         dest.append((UChar)(0xdfef + (len >> 16)));
   1511         dest.append((UChar)len);
   1512         break;
   1513     case 3:
   1514         dest.append((UChar)0xdfff);
   1515         dest.append((UChar)(len >> 16));
   1516         dest.append((UChar)len);
   1517         break;
   1518     default:
   1519         break;  /* will not occur */
   1520     }
   1521     dest.append(fString);
   1522     dest.append((UChar)0);
   1523 }
   1524 
   1525 void
   1526 SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) {
   1527     if (U_FAILURE(errorCode)) {
   1528         return;
   1529     }
   1530     // Store the StringResource pointers in an array for
   1531     // easy sorting and processing.
   1532     // We enumerate a set of strings, so there are no duplicates.
   1533     int32_t count = uhash_count(stringSet);
   1534     LocalArray<StringResource *> array(new StringResource *[count], errorCode);
   1535     if (U_FAILURE(errorCode)) {
   1536         return;
   1537     }
   1538     for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) {
   1539         array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
   1540     }
   1541     /* Sort the strings so that each one is immediately followed by all of its suffixes. */
   1542     uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
   1543                    compareStringSuffixes, NULL, FALSE, &errorCode);
   1544     if (U_FAILURE(errorCode)) {
   1545         return;
   1546     }
   1547     /*
   1548      * Make suffixes point into earlier, longer strings that contain them.
   1549      * Temporarily use fSame and fSuffixOffset for suffix strings to
   1550      * refer to the remaining ones.
   1551      */
   1552     for (int32_t i = 0; i < count;) {
   1553         /*
   1554          * This string is not a suffix of the previous one;
   1555          * write this one and subsume the following ones that are
   1556          * suffixes of this one.
   1557          */
   1558         StringResource *res = array[i];
   1559         res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength();
   1560         // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit,
   1561         // see StringResource::handlePreflightStrings().
   1562         int32_t j;
   1563         for (j = i + 1; j < count; ++j) {
   1564             StringResource *suffixRes = array[j];
   1565             /* Is it a suffix of the earlier, longer string? */
   1566             if (res->fString.endsWith(suffixRes->fString)) {
   1567                 assert(res->length() != suffixRes->length());  // Set strings are unique.
   1568                 if (suffixRes->fWritten) {
   1569                     // Pool string, skip.
   1570                 } else if (suffixRes->fNumCharsForLength == 0) {
   1571                     /* yes, point to the earlier string */
   1572                     suffixRes->fSame = res;
   1573                     suffixRes->fSuffixOffset = res->length() - suffixRes->length();
   1574                     if (res->fWritten) {
   1575                         // Suffix-share res which is a pool string.
   1576                         // Compute the resource word and collect the maximum.
   1577                         suffixRes->fRes =
   1578                                 res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset;
   1579                         int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes);
   1580                         if (poolStringIndex >= fPoolStringIndexLimit) {
   1581                             fPoolStringIndexLimit = poolStringIndex + 1;
   1582                         }
   1583                         suffixRes->fWritten = TRUE;
   1584                     }
   1585                     res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength();
   1586                 } else {
   1587                     /* write the suffix by itself if we need explicit length */
   1588                 }
   1589             } else {
   1590                 break;  /* not a suffix, restart from here */
   1591             }
   1592         }
   1593         i = j;
   1594     }
   1595     /*
   1596      * Re-sort the strings by ascending length (except suffixes last)
   1597      * to optimize for URES_TABLE16 and URES_ARRAY16:
   1598      * Keep as many as possible within reach of 16-bit offsets.
   1599      */
   1600     uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
   1601                    compareStringLengths, NULL, FALSE, &errorCode);
   1602     if (U_FAILURE(errorCode)) {
   1603         return;
   1604     }
   1605     if (fIsPoolBundle) {
   1606         // Write strings that are sufficiently shared.
   1607         // Avoid writing other strings.
   1608         int32_t numStringsWritten = 0;
   1609         int32_t numUnitsSaved = 0;
   1610         int32_t numUnitsNotSaved = 0;
   1611         for (int32_t i = 0; i < count; ++i) {
   1612             StringResource *res = array[i];
   1613             // Maximum pool string index when suffix-sharing the last character.
   1614             int32_t maxStringIndex =
   1615                     f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1;
   1616             if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING &&
   1617                     maxStringIndex < RES_MAX_OFFSET) {
   1618                 res->writeUTF16v2(0, f16BitUnits);
   1619                 ++numStringsWritten;
   1620                 numUnitsSaved += res->fNumUnitsSaved;
   1621             } else {
   1622                 numUnitsNotSaved += res->fNumUnitsSaved;
   1623                 res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING);
   1624                 res->fWritten = TRUE;
   1625             }
   1626         }
   1627         if (f16BitUnits.isBogus()) {
   1628             errorCode = U_MEMORY_ALLOCATION_ERROR;
   1629         }
   1630         if (getShowWarning()) {  // not quiet
   1631             printf("number of shared strings: %d\n", (int)numStringsWritten);
   1632             printf("16-bit units for strings: %6d = %6d bytes\n",
   1633                    (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2);
   1634             printf("16-bit units saved:       %6d = %6d bytes\n",
   1635                    (int)numUnitsSaved, (int)numUnitsSaved * 2);
   1636             printf("16-bit units not saved:   %6d = %6d bytes\n",
   1637                    (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2);
   1638         }
   1639     } else {
   1640         assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit);
   1641         /* Write the non-suffix strings. */
   1642         int32_t i;
   1643         for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
   1644             StringResource *res = array[i];
   1645             if (!res->fWritten) {
   1646                 int32_t localStringIndex = f16BitUnits.length();
   1647                 if (localStringIndex >= fLocalStringIndexLimit) {
   1648                     fLocalStringIndexLimit = localStringIndex + 1;
   1649                 }
   1650                 res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits);
   1651             }
   1652         }
   1653         if (f16BitUnits.isBogus()) {
   1654             errorCode = U_MEMORY_ALLOCATION_ERROR;
   1655             return;
   1656         }
   1657         if (fWritePoolBundle != NULL && gFormatVersion >= 3) {
   1658             PseudoListResource *poolStrings =
   1659                     static_cast<PseudoListResource *>(fWritePoolBundle->fRoot);
   1660             for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
   1661                 assert(!array[i]->fString.isEmpty());
   1662                 StringResource *poolString =
   1663                         new StringResource(fWritePoolBundle, array[i]->fString, errorCode);
   1664                 if (poolString == NULL) {
   1665                     errorCode = U_MEMORY_ALLOCATION_ERROR;
   1666                     break;
   1667                 }
   1668                 poolStrings->add(poolString);
   1669             }
   1670         }
   1671         /* Write the suffix strings. Make each point to the real string. */
   1672         for (; i < count; ++i) {
   1673             StringResource *res = array[i];
   1674             if (res->fWritten) {
   1675                 continue;
   1676             }
   1677             StringResource *same = res->fSame;
   1678             assert(res->length() != same->length());  // Set strings are unique.
   1679             res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset;
   1680             int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit;
   1681             // Suffixes of pool strings have been set already.
   1682             assert(localStringIndex >= 0);
   1683             if (localStringIndex >= fLocalStringIndexLimit) {
   1684                 fLocalStringIndexLimit = localStringIndex + 1;
   1685             }
   1686             res->fWritten = TRUE;
   1687         }
   1688     }
   1689     // +1 to account for the initial zero in f16BitUnits
   1690     assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
   1691 }
   1692