Home | History | Annotate | Download | only in genrb
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2000-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *
     11 * File reslist.cpp
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   02/21/00    weiv        Creation.
     17 *******************************************************************************
     18 */
     19 
     20 // Safer use of UnicodeString.
     21 #ifndef UNISTR_FROM_CHAR_EXPLICIT
     22 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
     23 #endif
     24 
     25 // Less important, but still a good idea.
     26 #ifndef UNISTR_FROM_STRING_EXPLICIT
     27 #   define UNISTR_FROM_STRING_EXPLICIT explicit
     28 #endif
     29 
     30 #include <assert.h>
     31 #include <stdio.h>
     32 #include "unicode/localpointer.h"
     33 #include "reslist.h"
     34 #include "unewdata.h"
     35 #include "unicode/ures.h"
     36 #include "unicode/putil.h"
     37 #include "errmsg.h"
     38 
     39 #include "uarrsort.h"
     40 #include "uelement.h"
     41 #include "uhash.h"
     42 #include "uinvchar.h"
     43 #include "ustr_imp.h"
     44 #include "unicode/utf16.h"
     45 /*
     46  * Align binary data at a 16-byte offset from the start of the resource bundle,
     47  * to be safe for any data type it may contain.
     48  */
     49 #define BIN_ALIGNMENT 16
     50 
     51 // This numeric constant must be at least 1.
     52 // If StringResource.fNumUnitsSaved == 0 then the string occurs only once,
     53 // and it makes no sense to move it to the pool bundle.
     54 // The larger the threshold for fNumUnitsSaved
     55 // the smaller the savings, and the smaller the pool bundle.
     56 // We trade some total size reduction to reduce the pool bundle a bit,
     57 // so that one can reasonably save data size by
     58 // removing bundle files without rebuilding the pool bundle.
     59 // This can also help to keep the pool and total (pool+local) string indexes
     60 // within 16 bits, that is, within range of Table16 and Array16 containers.
     61 #ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
     62 #   define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10
     63 #endif
     64 
     65 U_NAMESPACE_USE
     66 
     67 static UBool gIncludeCopyright = FALSE;
     68 static UBool gUsePoolBundle = FALSE;
     69 static UBool gIsDefaultFormatVersion = TRUE;
     70 static int32_t gFormatVersion = 3;
     71 
     72 /* How do we store string values? */
     73 enum {
     74     STRINGS_UTF16_V1,   /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
     75     STRINGS_UTF16_V2    /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */
     76 };
     77 
     78 static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40;  /* do not store the length explicitly for such strings */
     79 
     80 static const ResFile kNoPoolBundle;
     81 
     82 /*
     83  * res_none() returns the address of kNoResource,
     84  * for use in non-error cases when no resource is to be added to the bundle.
     85  * (NULL is used in error cases.)
     86  */
     87 static SResource kNoResource;  // TODO: const
     88 
     89 static UDataInfo dataInfo= {
     90     sizeof(UDataInfo),
     91     0,
     92 
     93     U_IS_BIG_ENDIAN,
     94     U_CHARSET_FAMILY,
     95     sizeof(UChar),
     96     0,
     97 
     98     {0x52, 0x65, 0x73, 0x42},     /* dataFormat="ResB" */
     99     {1, 3, 0, 0},                 /* formatVersion */
    100     {1, 4, 0, 0}                  /* dataVersion take a look at version inside parsed resb*/
    101 };
    102 
    103 static const UVersionInfo gFormatVersions[4] = {  /* indexed by a major-formatVersion integer */
    104     { 0, 0, 0, 0 },
    105     { 1, 3, 0, 0 },
    106     { 2, 0, 0, 0 },
    107     { 3, 0, 0, 0 }
    108 };
    109 // Remember to update genrb.h GENRB_VERSION when changing the data format.
    110 // (Or maybe we should remove GENRB_VERSION and report the ICU version number?)
    111 
    112 static uint8_t calcPadding(uint32_t size) {
    113     /* returns space we need to pad */
    114     return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0);
    115 
    116 }
    117 
    118 void setIncludeCopyright(UBool val){
    119     gIncludeCopyright=val;
    120 }
    121 
    122 UBool getIncludeCopyright(void){
    123     return gIncludeCopyright;
    124 }
    125 
    126 void setFormatVersion(int32_t formatVersion) {
    127     gIsDefaultFormatVersion = FALSE;
    128     gFormatVersion = formatVersion;
    129 }
    130 
    131 int32_t getFormatVersion() {
    132     return gFormatVersion;
    133 }
    134 
    135 void setUsePoolBundle(UBool use) {
    136     gUsePoolBundle = use;
    137 }
    138 
    139 // TODO: return const pointer, or find another way to express "none"
    140 struct SResource* res_none() {
    141     return &kNoResource;
    142 }
    143 
    144 SResource::SResource()
    145         : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1),
    146           line(0), fNext(NULL) {
    147     ustr_init(&fComment);
    148 }
    149 
    150 SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment,
    151                      UErrorCode &errorCode)
    152         : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1),
    153           fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1),
    154           line(0), fNext(NULL) {
    155     ustr_init(&fComment);
    156     if(comment != NULL) {
    157         ustr_cpy(&fComment, comment, &errorCode);
    158     }
    159 }
    160 
    161 SResource::~SResource() {
    162     ustr_deinit(&fComment);
    163 }
    164 
    165 ContainerResource::~ContainerResource() {
    166     SResource *current = fFirst;
    167     while (current != NULL) {
    168         SResource *next = current->fNext;
    169         delete current;
    170         current = next;
    171     }
    172 }
    173 
    174 TableResource::~TableResource() {}
    175 
    176 // TODO: clarify that containers adopt new items, even in error cases; use LocalPointer
    177 void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) {
    178     if (U_FAILURE(errorCode) || res == NULL || res == &kNoResource) {
    179         return;
    180     }
    181 
    182     /* remember this linenumber to report to the user if there is a duplicate key */
    183     res->line = linenumber;
    184 
    185     /* here we need to traverse the list */
    186     ++fCount;
    187 
    188     /* is the list still empty? */
    189     if (fFirst == NULL) {
    190         fFirst = res;
    191         res->fNext = NULL;
    192         return;
    193     }
    194 
    195     const char *resKeyString = fRoot->fKeys + res->fKey;
    196 
    197     SResource *current = fFirst;
    198 
    199     SResource *prev = NULL;
    200     while (current != NULL) {
    201         const char *currentKeyString = fRoot->fKeys + current->fKey;
    202         int diff;
    203         /*
    204          * formatVersion 1: compare key strings in native-charset order
    205          * formatVersion 2 and up: compare key strings in ASCII order
    206          */
    207         if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) {
    208             diff = uprv_strcmp(currentKeyString, resKeyString);
    209         } else {
    210             diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString);
    211         }
    212         if (diff < 0) {
    213             prev    = current;
    214             current = current->fNext;
    215         } else if (diff > 0) {
    216             /* we're either in front of the list, or in the middle */
    217             if (prev == NULL) {
    218                 /* front of the list */
    219                 fFirst = res;
    220             } else {
    221                 /* middle of the list */
    222                 prev->fNext = res;
    223             }
    224 
    225             res->fNext = current;
    226             return;
    227         } else {
    228             /* Key already exists! ERROR! */
    229             error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line);
    230             errorCode = U_UNSUPPORTED_ERROR;
    231             return;
    232         }
    233     }
    234 
    235     /* end of list */
    236     prev->fNext = res;
    237     res->fNext  = NULL;
    238 }
    239 
    240 ArrayResource::~ArrayResource() {}
    241 
    242 void ArrayResource::add(SResource *res) {
    243     if (res != NULL && res != &kNoResource) {
    244         if (fFirst == NULL) {
    245             fFirst = res;
    246         } else {
    247             fLast->fNext = res;
    248         }
    249         fLast = res;
    250         ++fCount;
    251     }
    252 }
    253 
    254 PseudoListResource::~PseudoListResource() {}
    255 
    256 void PseudoListResource::add(SResource *res) {
    257     if (res != NULL && res != &kNoResource) {
    258         res->fNext = fFirst;
    259         fFirst = res;
    260         ++fCount;
    261     }
    262 }
    263 
    264 StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
    265                                        const UChar *value, int32_t len,
    266                                        const UString* comment, UErrorCode &errorCode)
    267         : SResource(bundle, tag, type, comment, errorCode) {
    268     if (len == 0 && gFormatVersion > 1) {
    269         fRes = URES_MAKE_EMPTY_RESOURCE(type);
    270         fWritten = TRUE;
    271         return;
    272     }
    273 
    274     fString.setTo(value, len);
    275     fString.getTerminatedBuffer();  // Some code relies on NUL-termination.
    276     if (U_SUCCESS(errorCode) && fString.isBogus()) {
    277         errorCode = U_MEMORY_ALLOCATION_ERROR;
    278     }
    279 }
    280 
    281 StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type,
    282                                        const icu::UnicodeString &value, UErrorCode &errorCode)
    283         : SResource(bundle, NULL, type, NULL, errorCode), fString(value) {
    284     if (value.isEmpty() && gFormatVersion > 1) {
    285         fRes = URES_MAKE_EMPTY_RESOURCE(type);
    286         fWritten = TRUE;
    287         return;
    288     }
    289 
    290     fString.getTerminatedBuffer();  // Some code relies on NUL-termination.
    291     if (U_SUCCESS(errorCode) && fString.isBogus()) {
    292         errorCode = U_MEMORY_ALLOCATION_ERROR;
    293     }
    294 }
    295 
    296 // Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty.
    297 StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len,
    298                                        UErrorCode &errorCode)
    299         : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) {
    300     assert(len > 0);
    301     assert(!fString.isBogus());
    302 }
    303 
    304 StringBaseResource::~StringBaseResource() {}
    305 
    306 static int32_t U_CALLCONV
    307 string_hash(const UElement key) {
    308     const StringResource *res = static_cast<const StringResource *>(key.pointer);
    309     return res->fString.hashCode();
    310 }
    311 
    312 static UBool U_CALLCONV
    313 string_comp(const UElement key1, const UElement key2) {
    314     const StringResource *res1 = static_cast<const StringResource *>(key1.pointer);
    315     const StringResource *res2 = static_cast<const StringResource *>(key2.pointer);
    316     return res1->fString == res2->fString;
    317 }
    318 
    319 StringResource::~StringResource() {}
    320 
    321 AliasResource::~AliasResource() {}
    322 
    323 IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value,
    324                          const UString* comment, UErrorCode &errorCode)
    325         : SResource(bundle, tag, URES_INT, comment, errorCode) {
    326     fValue = value;
    327     fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET);
    328     fWritten = TRUE;
    329 }
    330 
    331 IntResource::~IntResource() {}
    332 
    333 IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag,
    334                   const UString* comment, UErrorCode &errorCode)
    335         : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode),
    336           fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) {
    337     if (fArray == NULL) {
    338         errorCode = U_MEMORY_ALLOCATION_ERROR;
    339         return;
    340     }
    341 }
    342 
    343 IntVectorResource::~IntVectorResource() {
    344     delete[] fArray;
    345 }
    346 
    347 void IntVectorResource::add(int32_t value, UErrorCode &errorCode) {
    348     if (U_SUCCESS(errorCode)) {
    349         fArray[fCount++] = value;
    350     }
    351 }
    352 
    353 BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag,
    354                                uint32_t length, uint8_t *data, const char* fileName,
    355                                const UString* comment, UErrorCode &errorCode)
    356         : SResource(bundle, tag, URES_BINARY, comment, errorCode),
    357           fLength(length), fData(NULL), fFileName(NULL) {
    358     if (U_FAILURE(errorCode)) {
    359         return;
    360     }
    361     if (fileName != NULL && *fileName != 0){
    362         fFileName = new char[uprv_strlen(fileName)+1];
    363         if (fFileName == NULL) {
    364             errorCode = U_MEMORY_ALLOCATION_ERROR;
    365             return;
    366         }
    367         uprv_strcpy(fFileName, fileName);
    368     }
    369     if (length > 0) {
    370         fData = new uint8_t[length];
    371         if (fData == NULL) {
    372             errorCode = U_MEMORY_ALLOCATION_ERROR;
    373             return;
    374         }
    375         uprv_memcpy(fData, data, length);
    376     } else {
    377         if (gFormatVersion > 1) {
    378             fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY);
    379             fWritten = TRUE;
    380         }
    381     }
    382 }
    383 
    384 BinaryResource::~BinaryResource() {
    385     delete[] fData;
    386     delete[] fFileName;
    387 }
    388 
    389 /* Writing Functions */
    390 
    391 void
    392 StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
    393                                        UErrorCode &errorCode) {
    394     assert(fSame == NULL);
    395     fSame = static_cast<StringResource *>(uhash_get(stringSet, this));
    396     if (fSame != NULL) {
    397         // This is a duplicate of a pool bundle string or of an earlier-visited string.
    398         if (++fSame->fNumCopies == 1) {
    399             assert(fSame->fWritten);
    400             int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes);
    401             if (poolStringIndex >= bundle->fPoolStringIndexLimit) {
    402                 bundle->fPoolStringIndexLimit = poolStringIndex + 1;
    403             }
    404         }
    405         return;
    406     }
    407     /* Put this string into the set for finding duplicates. */
    408     fNumCopies = 1;
    409     uhash_put(stringSet, this, this, &errorCode);
    410 
    411     if (bundle->fStringsForm != STRINGS_UTF16_V1) {
    412         int32_t len = length();
    413         if (len <= MAX_IMPLICIT_STRING_LENGTH &&
    414                 !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) {
    415             /*
    416              * This string will be stored without an explicit length.
    417              * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
    418              */
    419             fNumCharsForLength = 0;
    420         } else if (len <= 0x3ee) {
    421             fNumCharsForLength = 1;
    422         } else if (len <= 0xfffff) {
    423             fNumCharsForLength = 2;
    424         } else {
    425             fNumCharsForLength = 3;
    426         }
    427         bundle->f16BitStringsLength += fNumCharsForLength + len + 1;  /* +1 for the NUL */
    428     }
    429 }
    430 
    431 void
    432 ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
    433                                           UErrorCode &errorCode) {
    434     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    435         current->preflightStrings(bundle, stringSet, errorCode);
    436     }
    437 }
    438 
    439 void
    440 SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) {
    441     if (U_FAILURE(errorCode)) {
    442         return;
    443     }
    444     if (fRes != RES_BOGUS) {
    445         /*
    446          * The resource item word was already precomputed, which means
    447          * no further data needs to be written.
    448          * This might be an integer, or an empty string/binary/etc.
    449          */
    450         return;
    451     }
    452     handlePreflightStrings(bundle, stringSet, errorCode);
    453 }
    454 
    455 void
    456 SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/,
    457                                   UErrorCode & /*errorCode*/) {
    458     /* Neither a string nor a container. */
    459 }
    460 
    461 int32_t
    462 SRBRoot::makeRes16(uint32_t resWord) const {
    463     if (resWord == 0) {
    464         return 0;  /* empty string */
    465     }
    466     uint32_t type = RES_GET_TYPE(resWord);
    467     int32_t offset = (int32_t)RES_GET_OFFSET(resWord);
    468     if (type == URES_STRING_V2) {
    469         assert(offset > 0);
    470         if (offset < fPoolStringIndexLimit) {
    471             if (offset < fPoolStringIndex16Limit) {
    472                 return offset;
    473             }
    474         } else {
    475             offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit;
    476             if (offset <= 0xffff) {
    477                 return offset;
    478             }
    479         }
    480     }
    481     return -1;
    482 }
    483 
    484 int32_t
    485 SRBRoot::mapKey(int32_t oldpos) const {
    486     const KeyMapEntry *map = fKeyMap;
    487     if (map == NULL) {
    488         return oldpos;
    489     }
    490     int32_t i, start, limit;
    491 
    492     /* do a binary search for the old, pre-compactKeys() key offset */
    493     start = fUsePoolBundle->fKeysCount;
    494     limit = start + fKeysCount;
    495     while (start < limit - 1) {
    496         i = (start + limit) / 2;
    497         if (oldpos < map[i].oldpos) {
    498             limit = i;
    499         } else {
    500             start = i;
    501         }
    502     }
    503     assert(oldpos == map[start].oldpos);
    504     return map[start].newpos;
    505 }
    506 
    507 /*
    508  * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
    509  * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS
    510  * and exits early.
    511  */
    512 void
    513 StringResource::handleWrite16(SRBRoot * /*bundle*/) {
    514     SResource *same;
    515     if ((same = fSame) != NULL) {
    516         /* This is a duplicate. */
    517         assert(same->fRes != RES_BOGUS && same->fWritten);
    518         fRes = same->fRes;
    519         fWritten = same->fWritten;
    520     }
    521 }
    522 
    523 void
    524 ContainerResource::writeAllRes16(SRBRoot *bundle) {
    525     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    526         bundle->f16BitUnits.append((UChar)current->fRes16);
    527     }
    528     fWritten = TRUE;
    529 }
    530 
    531 void
    532 ArrayResource::handleWrite16(SRBRoot *bundle) {
    533     if (fCount == 0 && gFormatVersion > 1) {
    534         fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY);
    535         fWritten = TRUE;
    536         return;
    537     }
    538 
    539     int32_t res16 = 0;
    540     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    541         current->write16(bundle);
    542         res16 |= current->fRes16;
    543     }
    544     if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
    545         fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length());
    546         bundle->f16BitUnits.append((UChar)fCount);
    547         writeAllRes16(bundle);
    548     }
    549 }
    550 
    551 void
    552 TableResource::handleWrite16(SRBRoot *bundle) {
    553     if (fCount == 0 && gFormatVersion > 1) {
    554         fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
    555         fWritten = TRUE;
    556         return;
    557     }
    558     /* Find the smallest table type that fits the data. */
    559     int32_t key16 = 0;
    560     int32_t res16 = 0;
    561     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    562         current->write16(bundle);
    563         key16 |= current->fKey16;
    564         res16 |= current->fRes16;
    565     }
    566     if(fCount > (uint32_t)bundle->fMaxTableLength) {
    567         bundle->fMaxTableLength = fCount;
    568     }
    569     if (fCount <= 0xffff && key16 >= 0) {
    570         if (res16 >= 0 && gFormatVersion > 1) {
    571             /* 16-bit count, key offsets and values */
    572             fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length());
    573             bundle->f16BitUnits.append((UChar)fCount);
    574             for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    575                 bundle->f16BitUnits.append((UChar)current->fKey16);
    576             }
    577             writeAllRes16(bundle);
    578         } else {
    579             /* 16-bit count, 16-bit key offsets, 32-bit values */
    580             fTableType = URES_TABLE;
    581         }
    582     } else {
    583         /* 32-bit count, key offsets and values */
    584         fTableType = URES_TABLE32;
    585     }
    586 }
    587 
    588 void
    589 PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) {
    590     fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
    591     fWritten = TRUE;
    592 }
    593 
    594 void
    595 SResource::write16(SRBRoot *bundle) {
    596     if (fKey >= 0) {
    597         // A tagged resource has a non-negative key index into the parsed key strings.
    598         // compactKeys() built a map from parsed key index to the final key index.
    599         // After the mapping, negative key indexes are used for shared pool bundle keys.
    600         fKey = bundle->mapKey(fKey);
    601         // If the key index fits into a Key16 for a Table or Table16,
    602         // then set the fKey16 field accordingly.
    603         // Otherwise keep it at -1.
    604         if (fKey >= 0) {
    605             if (fKey < bundle->fLocalKeyLimit) {
    606                 fKey16 = fKey;
    607             }
    608         } else {
    609             int32_t poolKeyIndex = fKey & 0x7fffffff;
    610             if (poolKeyIndex <= 0xffff) {
    611                 poolKeyIndex += bundle->fLocalKeyLimit;
    612                 if (poolKeyIndex <= 0xffff) {
    613                     fKey16 = poolKeyIndex;
    614                 }
    615             }
    616         }
    617     }
    618     /*
    619      * fRes != RES_BOGUS:
    620      * The resource item word was already precomputed, which means
    621      * no further data needs to be written.
    622      * This might be an integer, or an empty or UTF-16 v2 string,
    623      * an empty binary, etc.
    624      */
    625     if (fRes == RES_BOGUS) {
    626         handleWrite16(bundle);
    627     }
    628     // Compute fRes16 for precomputed as well as just-computed fRes.
    629     fRes16 = bundle->makeRes16(fRes);
    630 }
    631 
    632 void
    633 SResource::handleWrite16(SRBRoot * /*bundle*/) {
    634     /* Only a few resource types write 16-bit units. */
    635 }
    636 
    637 /*
    638  * Only called for UTF-16 v1 strings, and for aliases.
    639  * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS
    640  * and exits early.
    641  */
    642 void
    643 StringBaseResource::handlePreWrite(uint32_t *byteOffset) {
    644     /* Write the UTF-16 v1 string. */
    645     fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2);
    646     *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR;
    647 }
    648 
    649 void
    650 IntVectorResource::handlePreWrite(uint32_t *byteOffset) {
    651     if (fCount == 0 && gFormatVersion > 1) {
    652         fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR);
    653         fWritten = TRUE;
    654     } else {
    655         fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2);
    656         *byteOffset += (1 + fCount) * 4;
    657     }
    658 }
    659 
    660 void
    661 BinaryResource::handlePreWrite(uint32_t *byteOffset) {
    662     uint32_t pad       = 0;
    663     uint32_t dataStart = *byteOffset + sizeof(fLength);
    664 
    665     if (dataStart % BIN_ALIGNMENT) {
    666         pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
    667         *byteOffset += pad;  /* pad == 4 or 8 or 12 */
    668     }
    669     fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2);
    670     *byteOffset += 4 + fLength;
    671 }
    672 
    673 void
    674 ContainerResource::preWriteAllRes(uint32_t *byteOffset) {
    675     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    676         current->preWrite(byteOffset);
    677     }
    678 }
    679 
    680 void
    681 ArrayResource::handlePreWrite(uint32_t *byteOffset) {
    682     preWriteAllRes(byteOffset);
    683     fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2);
    684     *byteOffset += (1 + fCount) * 4;
    685 }
    686 
    687 void
    688 TableResource::handlePreWrite(uint32_t *byteOffset) {
    689     preWriteAllRes(byteOffset);
    690     if (fTableType == URES_TABLE) {
    691         /* 16-bit count, 16-bit key offsets, 32-bit values */
    692         fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2);
    693         *byteOffset += 2 + fCount * 6;
    694     } else {
    695         /* 32-bit count, key offsets and values */
    696         fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2);
    697         *byteOffset += 4 + fCount * 8;
    698     }
    699 }
    700 
    701 void
    702 SResource::preWrite(uint32_t *byteOffset) {
    703     if (fRes != RES_BOGUS) {
    704         /*
    705          * The resource item word was already precomputed, which means
    706          * no further data needs to be written.
    707          * This might be an integer, or an empty or UTF-16 v2 string,
    708          * an empty binary, etc.
    709          */
    710         return;
    711     }
    712     handlePreWrite(byteOffset);
    713     *byteOffset += calcPadding(*byteOffset);
    714 }
    715 
    716 void
    717 SResource::handlePreWrite(uint32_t * /*byteOffset*/) {
    718     assert(FALSE);
    719 }
    720 
    721 /*
    722  * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings,
    723  * write() sees fWritten and exits early.
    724  */
    725 void
    726 StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    727     /* Write the UTF-16 v1 string. */
    728     int32_t len = length();
    729     udata_write32(mem, len);
    730     udata_writeUString(mem, getBuffer(), len + 1);
    731     *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR;
    732     fWritten = TRUE;
    733 }
    734 
    735 void
    736 ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) {
    737     uint32_t i = 0;
    738     for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) {
    739         current->write(mem, byteOffset);
    740     }
    741     assert(i == fCount);
    742 }
    743 
    744 void
    745 ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) {
    746     for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    747         udata_write32(mem, current->fRes);
    748     }
    749     *byteOffset += fCount * 4;
    750 }
    751 
    752 void
    753 ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    754     writeAllRes(mem, byteOffset);
    755     udata_write32(mem, fCount);
    756     *byteOffset += 4;
    757     writeAllRes32(mem, byteOffset);
    758 }
    759 
    760 void
    761 IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    762     udata_write32(mem, fCount);
    763     for(uint32_t i = 0; i < fCount; ++i) {
    764       udata_write32(mem, fArray[i]);
    765     }
    766     *byteOffset += (1 + fCount) * 4;
    767 }
    768 
    769 void
    770 BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    771     uint32_t pad       = 0;
    772     uint32_t dataStart = *byteOffset + sizeof(fLength);
    773 
    774     if (dataStart % BIN_ALIGNMENT) {
    775         pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
    776         udata_writePadding(mem, pad);  /* pad == 4 or 8 or 12 */
    777         *byteOffset += pad;
    778     }
    779 
    780     udata_write32(mem, fLength);
    781     if (fLength > 0) {
    782         udata_writeBlock(mem, fData, fLength);
    783     }
    784     *byteOffset += 4 + fLength;
    785 }
    786 
    787 void
    788 TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
    789     writeAllRes(mem, byteOffset);
    790     if(fTableType == URES_TABLE) {
    791         udata_write16(mem, (uint16_t)fCount);
    792         for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    793             udata_write16(mem, current->fKey16);
    794         }
    795         *byteOffset += (1 + fCount)* 2;
    796         if ((fCount & 1) == 0) {
    797             /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */
    798             udata_writePadding(mem, 2);
    799             *byteOffset += 2;
    800         }
    801     } else /* URES_TABLE32 */ {
    802         udata_write32(mem, fCount);
    803         for (SResource *current = fFirst; current != NULL; current = current->fNext) {
    804             udata_write32(mem, (uint32_t)current->fKey);
    805         }
    806         *byteOffset += (1 + fCount)* 4;
    807     }
    808     writeAllRes32(mem, byteOffset);
    809 }
    810 
    811 void
    812 SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) {
    813     if (fWritten) {
    814         assert(fRes != RES_BOGUS);
    815         return;
    816     }
    817     handleWrite(mem, byteOffset);
    818     uint8_t paddingSize = calcPadding(*byteOffset);
    819     if (paddingSize > 0) {
    820         udata_writePadding(mem, paddingSize);
    821         *byteOffset += paddingSize;
    822     }
    823     fWritten = TRUE;
    824 }
    825 
    826 void
    827 SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) {
    828     assert(FALSE);
    829 }
    830 
    831 void SRBRoot::write(const char *outputDir, const char *outputPkg,
    832                     char *writtenFilename, int writtenFilenameLen,
    833                     UErrorCode &errorCode) {
    834     UNewDataMemory *mem        = NULL;
    835     uint32_t        byteOffset = 0;
    836     uint32_t        top, size;
    837     char            dataName[1024];
    838     int32_t         indexes[URES_INDEX_TOP];
    839 
    840     compactKeys(errorCode);
    841     /*
    842      * Add padding bytes to fKeys so that fKeysTop is 4-aligned.
    843      * Safe because the capacity is a multiple of 4.
    844      */
    845     while (fKeysTop & 3) {
    846         fKeys[fKeysTop++] = (char)0xaa;
    847     }
    848     /*
    849      * In URES_TABLE, use all local key offsets that fit into 16 bits,
    850      * and use the remaining 16-bit offsets for pool key offsets
    851      * if there are any.
    852      * If there are no local keys, then use the whole 16-bit space
    853      * for pool key offsets.
    854      * Note: This cannot be changed without changing the major formatVersion.
    855      */
    856     if (fKeysBottom < fKeysTop) {
    857         if (fKeysTop <= 0x10000) {
    858             fLocalKeyLimit = fKeysTop;
    859         } else {
    860             fLocalKeyLimit = 0x10000;
    861         }
    862     } else {
    863         fLocalKeyLimit = 0;
    864     }
    865 
    866     UHashtable *stringSet;
    867     if (gFormatVersion > 1) {
    868         stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode);
    869         if (U_SUCCESS(errorCode) &&
    870                 fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) {
    871             for (SResource *current = fUsePoolBundle->fStrings->fFirst;
    872                     current != NULL;
    873                     current = current->fNext) {
    874                 StringResource *sr = static_cast<StringResource *>(current);
    875                 sr->fNumCopies = 0;
    876                 sr->fNumUnitsSaved = 0;
    877                 uhash_put(stringSet, sr, sr, &errorCode);
    878             }
    879         }
    880         fRoot->preflightStrings(this, stringSet, errorCode);
    881     } else {
    882         stringSet = NULL;
    883     }
    884     if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) {
    885         compactStringsV2(stringSet, errorCode);
    886     }
    887     uhash_close(stringSet);
    888     if (U_FAILURE(errorCode)) {
    889         return;
    890     }
    891 
    892     int32_t formatVersion = gFormatVersion;
    893     if (fPoolStringIndexLimit != 0) {
    894         int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit;
    895         if ((sum - 1) > RES_MAX_OFFSET) {
    896             errorCode = U_BUFFER_OVERFLOW_ERROR;
    897             return;
    898         }
    899         if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) {
    900             // 16-bit indexes work for all pool + local strings.
    901             fPoolStringIndex16Limit = fPoolStringIndexLimit;
    902         } else {
    903             // Set the pool index threshold so that 16-bit indexes work
    904             // for some pool strings and some local strings.
    905             fPoolStringIndex16Limit = (int32_t)(
    906                     ((int64_t)fPoolStringIndexLimit * 0xffff) / sum);
    907         }
    908     } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) {
    909         // If we just default to formatVersion 3
    910         // but there are no pool bundle strings to share
    911         // and we do not write a pool bundle,
    912         // then write formatVersion 2 which is just as good.
    913         formatVersion = 2;
    914     }
    915 
    916     fRoot->write16(this);
    917     if (f16BitUnits.isBogus()) {
    918         errorCode = U_MEMORY_ALLOCATION_ERROR;
    919         return;
    920     }
    921     if (f16BitUnits.length() & 1) {
    922         f16BitUnits.append((UChar)0xaaaa);  /* pad to multiple of 4 bytes */
    923     }
    924     /* all keys have been mapped */
    925     uprv_free(fKeyMap);
    926     fKeyMap = NULL;
    927 
    928     byteOffset = fKeysTop + f16BitUnits.length() * 2;
    929     fRoot->preWrite(&byteOffset);
    930 
    931     /* total size including the root item */
    932     top = byteOffset;
    933 
    934     if (writtenFilename && writtenFilenameLen) {
    935         *writtenFilename = 0;
    936     }
    937 
    938     if (writtenFilename) {
    939        int32_t off = 0, len = 0;
    940        if (outputDir) {
    941            len = (int32_t)uprv_strlen(outputDir);
    942            if (len > writtenFilenameLen) {
    943                len = writtenFilenameLen;
    944            }
    945            uprv_strncpy(writtenFilename, outputDir, len);
    946        }
    947        if (writtenFilenameLen -= len) {
    948            off += len;
    949            writtenFilename[off] = U_FILE_SEP_CHAR;
    950            if (--writtenFilenameLen) {
    951                ++off;
    952                if(outputPkg != NULL)
    953                {
    954                    uprv_strcpy(writtenFilename+off, outputPkg);
    955                    off += (int32_t)uprv_strlen(outputPkg);
    956                    writtenFilename[off] = '_';
    957                    ++off;
    958                }
    959 
    960                len = (int32_t)uprv_strlen(fLocale);
    961                if (len > writtenFilenameLen) {
    962                    len = writtenFilenameLen;
    963                }
    964                uprv_strncpy(writtenFilename + off, fLocale, len);
    965                if (writtenFilenameLen -= len) {
    966                    off += len;
    967                    len = 5;
    968                    if (len > writtenFilenameLen) {
    969                        len = writtenFilenameLen;
    970                    }
    971                    uprv_strncpy(writtenFilename +  off, ".res", len);
    972                }
    973            }
    974        }
    975     }
    976 
    977     if(outputPkg)
    978     {
    979         uprv_strcpy(dataName, outputPkg);
    980         uprv_strcat(dataName, "_");
    981         uprv_strcat(dataName, fLocale);
    982     }
    983     else
    984     {
    985         uprv_strcpy(dataName, fLocale);
    986     }
    987 
    988     uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo));
    989 
    990     mem = udata_create(outputDir, "res", dataName,
    991                        &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, &errorCode);
    992     if(U_FAILURE(errorCode)){
    993         return;
    994     }
    995 
    996     /* write the root item */
    997     udata_write32(mem, fRoot->fRes);
    998 
    999     /*
   1000      * formatVersion 1.1 (ICU 2.8):
   1001      * write int32_t indexes[] after root and before the key strings
   1002      * to make it easier to parse resource bundles in icuswap or from Java etc.
   1003      */
   1004     uprv_memset(indexes, 0, sizeof(indexes));
   1005     indexes[URES_INDEX_LENGTH]=             fIndexLength;
   1006     indexes[URES_INDEX_KEYS_TOP]=           fKeysTop>>2;
   1007     indexes[URES_INDEX_RESOURCES_TOP]=      (int32_t)(top>>2);
   1008     indexes[URES_INDEX_BUNDLE_TOP]=         indexes[URES_INDEX_RESOURCES_TOP];
   1009     indexes[URES_INDEX_MAX_TABLE_LENGTH]=   fMaxTableLength;
   1010 
   1011     /*
   1012      * formatVersion 1.2 (ICU 3.6):
   1013      * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set
   1014      * the memset() above initialized all indexes[] to 0
   1015      */
   1016     if (fNoFallback) {
   1017         indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK;
   1018     }
   1019     /*
   1020      * formatVersion 2.0 (ICU 4.4):
   1021      * more compact string value storage, optional pool bundle
   1022      */
   1023     if (URES_INDEX_16BIT_TOP < fIndexLength) {
   1024         indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1);
   1025     }
   1026     if (URES_INDEX_POOL_CHECKSUM < fIndexLength) {
   1027         if (fIsPoolBundle) {
   1028             indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK;
   1029             uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom),
   1030                                            (uint32_t)(fKeysTop - fKeysBottom), 0);
   1031             if (f16BitUnits.length() <= 1) {
   1032                 // no pool strings to checksum
   1033             } else if (U_IS_BIG_ENDIAN) {
   1034                 checksum = computeCRC((const char *)f16BitUnits.getBuffer(),
   1035                                       (uint32_t)f16BitUnits.length() * 2, checksum);
   1036             } else {
   1037                 // Swap to big-endian so we get the same checksum on all platforms
   1038                 // (except for charset family, due to the key strings).
   1039                 UnicodeString s(f16BitUnits);
   1040                 s.append((UChar)1);  // Ensure that we own this buffer.
   1041                 assert(!s.isBogus());
   1042                 uint16_t *p = (uint16_t *)s.getBuffer();
   1043                 for (int32_t count = f16BitUnits.length(); count > 0; --count) {
   1044                     uint16_t x = *p;
   1045                     *p++ = (uint16_t)((x << 8) | (x >> 8));
   1046                 }
   1047                 checksum = computeCRC((const char *)p,
   1048                                       (uint32_t)f16BitUnits.length() * 2, checksum);
   1049             }
   1050             indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum;
   1051         } else if (gUsePoolBundle) {
   1052             indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE;
   1053             indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum;
   1054         }
   1055     }
   1056     // formatVersion 3 (ICU 56):
   1057     // share string values via pool bundle strings
   1058     indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8;  // bits 23..0 -> 31..8
   1059     indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000;  // bits 27..24 -> 15..12
   1060     indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16;
   1061 
   1062     /* write the indexes[] */
   1063     udata_writeBlock(mem, indexes, fIndexLength*4);
   1064 
   1065     /* write the table key strings */
   1066     udata_writeBlock(mem, fKeys+fKeysBottom,
   1067                           fKeysTop-fKeysBottom);
   1068 
   1069     /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */
   1070     udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2);
   1071 
   1072     /* write all of the bundle contents: the root item and its children */
   1073     byteOffset = fKeysTop + f16BitUnits.length() * 2;
   1074     fRoot->write(mem, &byteOffset);
   1075     assert(byteOffset == top);
   1076 
   1077     size = udata_finish(mem, &errorCode);
   1078     if(top != size) {
   1079         fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n",
   1080                 (int)size, (int)top);
   1081         errorCode = U_INTERNAL_PROGRAM_ERROR;
   1082     }
   1083 }
   1084 
   1085 /* Opening Functions */
   1086 
   1087 TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
   1088     LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status);
   1089     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1090 }
   1091 
   1092 ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
   1093     LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status);
   1094     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1095 }
   1096 
   1097 struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
   1098     LocalPointer<SResource> res(
   1099             new StringResource(bundle, tag, value, len, comment, *status), *status);
   1100     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1101 }
   1102 
   1103 struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
   1104     LocalPointer<SResource> res(
   1105             new AliasResource(bundle, tag, value, len, comment, *status), *status);
   1106     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1107 }
   1108 
   1109 IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
   1110     LocalPointer<IntVectorResource> res(
   1111             new IntVectorResource(bundle, tag, comment, *status), *status);
   1112     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1113 }
   1114 
   1115 struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) {
   1116     LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status);
   1117     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1118 }
   1119 
   1120 struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) {
   1121     LocalPointer<SResource> res(
   1122             new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status);
   1123     return U_SUCCESS(*status) ? res.orphan() : NULL;
   1124 }
   1125 
   1126 SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode)
   1127         : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE),
   1128           fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle),
   1129           fKeys(NULL), fKeyMap(NULL),
   1130           fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0),
   1131           f16BitUnits(), f16BitStringsLength(0),
   1132           fUsePoolBundle(&kNoPoolBundle),
   1133           fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
   1134           fWritePoolBundle(NULL) {
   1135     if (U_FAILURE(errorCode)) {
   1136         return;
   1137     }
   1138 
   1139     if (gFormatVersion > 1) {
   1140         // f16BitUnits must start with a zero for empty resources.
   1141         // We might be able to omit it if there are no empty 16-bit resources.
   1142         f16BitUnits.append((UChar)0);
   1143     }
   1144 
   1145     fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
   1146     if (isPoolBundle) {
   1147         fRoot = new PseudoListResource(this, errorCode);
   1148     } else {
   1149         fRoot = new TableResource(this, NULL, comment, errorCode);
   1150     }
   1151     if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) {
   1152         if (U_SUCCESS(errorCode)) {
   1153             errorCode = U_MEMORY_ALLOCATION_ERROR;
   1154         }
   1155         return;
   1156     }
   1157 
   1158     fKeysCapacity = KEY_SPACE_SIZE;
   1159     /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */
   1160     if (gUsePoolBundle || isPoolBundle) {
   1161         fIndexLength = URES_INDEX_POOL_CHECKSUM + 1;
   1162     } else if (gFormatVersion >= 2) {
   1163         fIndexLength = URES_INDEX_16BIT_TOP + 1;
   1164     } else /* formatVersion 1 */ {
   1165         fIndexLength = URES_INDEX_ATTRIBUTES + 1;
   1166     }
   1167     fKeysBottom = (1 /* root */ + fIndexLength) * 4;
   1168     uprv_memset(fKeys, 0, fKeysBottom);
   1169     fKeysTop = fKeysBottom;
   1170 
   1171     if (gFormatVersion == 1) {
   1172         fStringsForm = STRINGS_UTF16_V1;
   1173     } else {
   1174         fStringsForm = STRINGS_UTF16_V2;
   1175     }
   1176 }
   1177 
   1178 /* Closing Functions */
   1179 
   1180 void res_close(struct SResource *res) {
   1181     delete res;
   1182 }
   1183 
   1184 SRBRoot::~SRBRoot() {
   1185     delete fRoot;
   1186     uprv_free(fLocale);
   1187     uprv_free(fKeys);
   1188     uprv_free(fKeyMap);
   1189 }
   1190 
   1191 /* Misc Functions */
   1192 
   1193 void SRBRoot::setLocale(UChar *locale, UErrorCode &errorCode) {
   1194     if(U_FAILURE(errorCode)) {
   1195         return;
   1196     }
   1197 
   1198     uprv_free(fLocale);
   1199     fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1));
   1200     if(fLocale == NULL) {
   1201         errorCode = U_MEMORY_ALLOCATION_ERROR;
   1202         return;
   1203     }
   1204 
   1205     u_UCharsToChars(locale, fLocale, u_strlen(locale)+1);
   1206 }
   1207 
   1208 const char *
   1209 SRBRoot::getKeyString(int32_t key) const {
   1210     if (key < 0) {
   1211         return fUsePoolBundle->fKeys + (key & 0x7fffffff);
   1212     } else {
   1213         return fKeys + key;
   1214     }
   1215 }
   1216 
   1217 const char *
   1218 SResource::getKeyString(const SRBRoot *bundle) const {
   1219     if (fKey == -1) {
   1220         return NULL;
   1221     }
   1222     return bundle->getKeyString(fKey);
   1223 }
   1224 
   1225 const char *
   1226 SRBRoot::getKeyBytes(int32_t *pLength) const {
   1227     *pLength = fKeysTop - fKeysBottom;
   1228     return fKeys + fKeysBottom;
   1229 }
   1230 
   1231 int32_t
   1232 SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) {
   1233     int32_t keypos;
   1234 
   1235     if (U_FAILURE(errorCode)) {
   1236         return -1;
   1237     }
   1238     if (length < 0 || (keyBytes == NULL && length != 0)) {
   1239         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1240         return -1;
   1241     }
   1242     if (length == 0) {
   1243         return fKeysTop;
   1244     }
   1245 
   1246     keypos = fKeysTop;
   1247     fKeysTop += length;
   1248     if (fKeysTop >= fKeysCapacity) {
   1249         /* overflow - resize the keys buffer */
   1250         fKeysCapacity += KEY_SPACE_SIZE;
   1251         fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity));
   1252         if(fKeys == NULL) {
   1253             errorCode = U_MEMORY_ALLOCATION_ERROR;
   1254             return -1;
   1255         }
   1256     }
   1257 
   1258     uprv_memcpy(fKeys + keypos, keyBytes, length);
   1259 
   1260     return keypos;
   1261 }
   1262 
   1263 int32_t
   1264 SRBRoot::addTag(const char *tag, UErrorCode &errorCode) {
   1265     int32_t keypos;
   1266 
   1267     if (U_FAILURE(errorCode)) {
   1268         return -1;
   1269     }
   1270 
   1271     if (tag == NULL) {
   1272         /* no error: the root table and array items have no keys */
   1273         return -1;
   1274     }
   1275 
   1276     keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode);
   1277     if (U_SUCCESS(errorCode)) {
   1278         ++fKeysCount;
   1279     }
   1280     return keypos;
   1281 }
   1282 
   1283 static int32_t
   1284 compareInt32(int32_t lPos, int32_t rPos) {
   1285     /*
   1286      * Compare possibly-negative key offsets. Don't just return lPos - rPos
   1287      * because that is prone to negative-integer underflows.
   1288      */
   1289     if (lPos < rPos) {
   1290         return -1;
   1291     } else if (lPos > rPos) {
   1292         return 1;
   1293     } else {
   1294         return 0;
   1295     }
   1296 }
   1297 
   1298 static int32_t U_CALLCONV
   1299 compareKeySuffixes(const void *context, const void *l, const void *r) {
   1300     const struct SRBRoot *bundle=(const struct SRBRoot *)context;
   1301     int32_t lPos = ((const KeyMapEntry *)l)->oldpos;
   1302     int32_t rPos = ((const KeyMapEntry *)r)->oldpos;
   1303     const char *lStart = bundle->getKeyString(lPos);
   1304     const char *lLimit = lStart;
   1305     const char *rStart = bundle->getKeyString(rPos);
   1306     const char *rLimit = rStart;
   1307     int32_t diff;
   1308     while (*lLimit != 0) { ++lLimit; }
   1309     while (*rLimit != 0) { ++rLimit; }
   1310     /* compare keys in reverse character order */
   1311     while (lStart < lLimit && rStart < rLimit) {
   1312         diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit;
   1313         if (diff != 0) {
   1314             return diff;
   1315         }
   1316     }
   1317     /* sort equal suffixes by descending key length */
   1318     diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart);
   1319     if (diff != 0) {
   1320         return diff;
   1321     }
   1322     /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */
   1323     return compareInt32(lPos, rPos);
   1324 }
   1325 
   1326 static int32_t U_CALLCONV
   1327 compareKeyNewpos(const void * /*context*/, const void *l, const void *r) {
   1328     return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos);
   1329 }
   1330 
   1331 static int32_t U_CALLCONV
   1332 compareKeyOldpos(const void * /*context*/, const void *l, const void *r) {
   1333     return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
   1334 }
   1335 
   1336 void
   1337 SRBRoot::compactKeys(UErrorCode &errorCode) {
   1338     KeyMapEntry *map;
   1339     char *keys;
   1340     int32_t i;
   1341     int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount;
   1342     if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) {
   1343         return;
   1344     }
   1345     map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry));
   1346     if (map == NULL) {
   1347         errorCode = U_MEMORY_ALLOCATION_ERROR;
   1348         return;
   1349     }
   1350     keys = (char *)fUsePoolBundle->fKeys;
   1351     for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) {
   1352         map[i].oldpos =
   1353             (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000;  /* negative oldpos */
   1354         map[i].newpos = 0;
   1355         while (*keys != 0) { ++keys; }  /* skip the key */
   1356         ++keys;  /* skip the NUL */
   1357     }
   1358     keys = fKeys + fKeysBottom;
   1359     for (; i < keysCount; ++i) {
   1360         map[i].oldpos = (int32_t)(keys - fKeys);
   1361         map[i].newpos = 0;
   1362         while (*keys != 0) { ++keys; }  /* skip the key */
   1363         ++keys;  /* skip the NUL */
   1364     }
   1365     /* Sort the keys so that each one is immediately followed by all of its suffixes. */
   1366     uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
   1367                    compareKeySuffixes, this, FALSE, &errorCode);
   1368     /*
   1369      * Make suffixes point into earlier, longer strings that contain them
   1370      * and mark the old, now unused suffix bytes as deleted.
   1371      */
   1372     if (U_SUCCESS(errorCode)) {
   1373         keys = fKeys;
   1374         for (i = 0; i < keysCount;) {
   1375             /*
   1376              * This key is not a suffix of the previous one;
   1377              * keep this one and delete the following ones that are
   1378              * suffixes of this one.
   1379              */
   1380             const char *key;
   1381             const char *keyLimit;
   1382             int32_t j = i + 1;
   1383             map[i].newpos = map[i].oldpos;
   1384             if (j < keysCount && map[j].oldpos < 0) {
   1385                 /* Key string from the pool bundle, do not delete. */
   1386                 i = j;
   1387                 continue;
   1388             }
   1389             key = getKeyString(map[i].oldpos);
   1390             for (keyLimit = key; *keyLimit != 0; ++keyLimit) {}
   1391             for (; j < keysCount && map[j].oldpos >= 0; ++j) {
   1392                 const char *k;
   1393                 char *suffix;
   1394                 const char *suffixLimit;
   1395                 int32_t offset;
   1396                 suffix = keys + map[j].oldpos;
   1397                 for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {}
   1398                 offset = (int32_t)(keyLimit - key) - (suffixLimit - suffix);
   1399                 if (offset < 0) {
   1400                     break;  /* suffix cannot be longer than the original */
   1401                 }
   1402                 /* Is it a suffix of the earlier, longer key? */
   1403                 for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
   1404                 if (suffix == suffixLimit && *k == *suffixLimit) {
   1405                     map[j].newpos = map[i].oldpos + offset;  /* yes, point to the earlier key */
   1406                     /* mark the suffix as deleted */
   1407                     while (*suffix != 0) { *suffix++ = 1; }
   1408                     *suffix = 1;
   1409                 } else {
   1410                     break;  /* not a suffix, restart from here */
   1411                 }
   1412             }
   1413             i = j;
   1414         }
   1415         /*
   1416          * Re-sort by newpos, then modify the key characters array in-place
   1417          * to squeeze out unused bytes, and readjust the newpos offsets.
   1418          */
   1419         uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
   1420                        compareKeyNewpos, NULL, FALSE, &errorCode);
   1421         if (U_SUCCESS(errorCode)) {
   1422             int32_t oldpos, newpos, limit;
   1423             oldpos = newpos = fKeysBottom;
   1424             limit = fKeysTop;
   1425             /* skip key offsets that point into the pool bundle rather than this new bundle */
   1426             for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {}
   1427             if (i < keysCount) {
   1428                 while (oldpos < limit) {
   1429                     if (keys[oldpos] == 1) {
   1430                         ++oldpos;  /* skip unused bytes */
   1431                     } else {
   1432                         /* adjust the new offsets for keys starting here */
   1433                         while (i < keysCount && map[i].newpos == oldpos) {
   1434                             map[i++].newpos = newpos;
   1435                         }
   1436                         /* move the key characters to their new position */
   1437                         keys[newpos++] = keys[oldpos++];
   1438                     }
   1439                 }
   1440                 assert(i == keysCount);
   1441             }
   1442             fKeysTop = newpos;
   1443             /* Re-sort once more, by old offsets for binary searching. */
   1444             uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
   1445                            compareKeyOldpos, NULL, FALSE, &errorCode);
   1446             if (U_SUCCESS(errorCode)) {
   1447                 /* key size reduction by limit - newpos */
   1448                 fKeyMap = map;
   1449                 map = NULL;
   1450             }
   1451         }
   1452     }
   1453     uprv_free(map);
   1454 }
   1455 
   1456 static int32_t U_CALLCONV
   1457 compareStringSuffixes(const void * /*context*/, const void *l, const void *r) {
   1458     const StringResource *left = *((const StringResource **)l);
   1459     const StringResource *right = *((const StringResource **)r);
   1460     const UChar *lStart = left->getBuffer();
   1461     const UChar *lLimit = lStart + left->length();
   1462     const UChar *rStart = right->getBuffer();
   1463     const UChar *rLimit = rStart + right->length();
   1464     int32_t diff;
   1465     /* compare keys in reverse character order */
   1466     while (lStart < lLimit && rStart < rLimit) {
   1467         diff = (int32_t)*--lLimit - (int32_t)*--rLimit;
   1468         if (diff != 0) {
   1469             return diff;
   1470         }
   1471     }
   1472     /* sort equal suffixes by descending string length */
   1473     return right->length() - left->length();
   1474 }
   1475 
   1476 static int32_t U_CALLCONV
   1477 compareStringLengths(const void * /*context*/, const void *l, const void *r) {
   1478     const StringResource *left = *((const StringResource **)l);
   1479     const StringResource *right = *((const StringResource **)r);
   1480     int32_t diff;
   1481     /* Make "is suffix of another string" compare greater than a non-suffix. */
   1482     diff = (int)(left->fSame != NULL) - (int)(right->fSame != NULL);
   1483     if (diff != 0) {
   1484         return diff;
   1485     }
   1486     /* sort by ascending string length */
   1487     diff = left->length() - right->length();
   1488     if (diff != 0) {
   1489         return diff;
   1490     }
   1491     // sort by descending size reduction
   1492     diff = right->fNumUnitsSaved - left->fNumUnitsSaved;
   1493     if (diff != 0) {
   1494         return diff;
   1495     }
   1496     // sort lexically
   1497     return left->fString.compare(right->fString);
   1498 }
   1499 
   1500 void
   1501 StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) {
   1502     int32_t len = length();
   1503     fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length());
   1504     fWritten = TRUE;
   1505     switch(fNumCharsForLength) {
   1506     case 0:
   1507         break;
   1508     case 1:
   1509         dest.append((UChar)(0xdc00 + len));
   1510         break;
   1511     case 2:
   1512         dest.append((UChar)(0xdfef + (len >> 16)));
   1513         dest.append((UChar)len);
   1514         break;
   1515     case 3:
   1516         dest.append((UChar)0xdfff);
   1517         dest.append((UChar)(len >> 16));
   1518         dest.append((UChar)len);
   1519         break;
   1520     default:
   1521         break;  /* will not occur */
   1522     }
   1523     dest.append(fString);
   1524     dest.append((UChar)0);
   1525 }
   1526 
   1527 void
   1528 SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) {
   1529     if (U_FAILURE(errorCode)) {
   1530         return;
   1531     }
   1532     // Store the StringResource pointers in an array for
   1533     // easy sorting and processing.
   1534     // We enumerate a set of strings, so there are no duplicates.
   1535     int32_t count = uhash_count(stringSet);
   1536     LocalArray<StringResource *> array(new StringResource *[count], errorCode);
   1537     if (U_FAILURE(errorCode)) {
   1538         return;
   1539     }
   1540     for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) {
   1541         array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
   1542     }
   1543     /* Sort the strings so that each one is immediately followed by all of its suffixes. */
   1544     uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
   1545                    compareStringSuffixes, NULL, FALSE, &errorCode);
   1546     if (U_FAILURE(errorCode)) {
   1547         return;
   1548     }
   1549     /*
   1550      * Make suffixes point into earlier, longer strings that contain them.
   1551      * Temporarily use fSame and fSuffixOffset for suffix strings to
   1552      * refer to the remaining ones.
   1553      */
   1554     for (int32_t i = 0; i < count;) {
   1555         /*
   1556          * This string is not a suffix of the previous one;
   1557          * write this one and subsume the following ones that are
   1558          * suffixes of this one.
   1559          */
   1560         StringResource *res = array[i];
   1561         res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength();
   1562         // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit,
   1563         // see StringResource::handlePreflightStrings().
   1564         int32_t j;
   1565         for (j = i + 1; j < count; ++j) {
   1566             StringResource *suffixRes = array[j];
   1567             /* Is it a suffix of the earlier, longer string? */
   1568             if (res->fString.endsWith(suffixRes->fString)) {
   1569                 assert(res->length() != suffixRes->length());  // Set strings are unique.
   1570                 if (suffixRes->fWritten) {
   1571                     // Pool string, skip.
   1572                 } else if (suffixRes->fNumCharsForLength == 0) {
   1573                     /* yes, point to the earlier string */
   1574                     suffixRes->fSame = res;
   1575                     suffixRes->fSuffixOffset = res->length() - suffixRes->length();
   1576                     if (res->fWritten) {
   1577                         // Suffix-share res which is a pool string.
   1578                         // Compute the resource word and collect the maximum.
   1579                         suffixRes->fRes =
   1580                                 res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset;
   1581                         int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes);
   1582                         if (poolStringIndex >= fPoolStringIndexLimit) {
   1583                             fPoolStringIndexLimit = poolStringIndex + 1;
   1584                         }
   1585                         suffixRes->fWritten = TRUE;
   1586                     }
   1587                     res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength();
   1588                 } else {
   1589                     /* write the suffix by itself if we need explicit length */
   1590                 }
   1591             } else {
   1592                 break;  /* not a suffix, restart from here */
   1593             }
   1594         }
   1595         i = j;
   1596     }
   1597     /*
   1598      * Re-sort the strings by ascending length (except suffixes last)
   1599      * to optimize for URES_TABLE16 and URES_ARRAY16:
   1600      * Keep as many as possible within reach of 16-bit offsets.
   1601      */
   1602     uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
   1603                    compareStringLengths, NULL, FALSE, &errorCode);
   1604     if (U_FAILURE(errorCode)) {
   1605         return;
   1606     }
   1607     if (fIsPoolBundle) {
   1608         // Write strings that are sufficiently shared.
   1609         // Avoid writing other strings.
   1610         int32_t numStringsWritten = 0;
   1611         int32_t numUnitsSaved = 0;
   1612         int32_t numUnitsNotSaved = 0;
   1613         for (int32_t i = 0; i < count; ++i) {
   1614             StringResource *res = array[i];
   1615             // Maximum pool string index when suffix-sharing the last character.
   1616             int32_t maxStringIndex =
   1617                     f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1;
   1618             if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING &&
   1619                     maxStringIndex < RES_MAX_OFFSET) {
   1620                 res->writeUTF16v2(0, f16BitUnits);
   1621                 ++numStringsWritten;
   1622                 numUnitsSaved += res->fNumUnitsSaved;
   1623             } else {
   1624                 numUnitsNotSaved += res->fNumUnitsSaved;
   1625                 res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING);
   1626                 res->fWritten = TRUE;
   1627             }
   1628         }
   1629         if (f16BitUnits.isBogus()) {
   1630             errorCode = U_MEMORY_ALLOCATION_ERROR;
   1631         }
   1632         if (getShowWarning()) {  // not quiet
   1633             printf("number of shared strings: %d\n", (int)numStringsWritten);
   1634             printf("16-bit units for strings: %6d = %6d bytes\n",
   1635                    (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2);
   1636             printf("16-bit units saved:       %6d = %6d bytes\n",
   1637                    (int)numUnitsSaved, (int)numUnitsSaved * 2);
   1638             printf("16-bit units not saved:   %6d = %6d bytes\n",
   1639                    (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2);
   1640         }
   1641     } else {
   1642         assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit);
   1643         /* Write the non-suffix strings. */
   1644         int32_t i;
   1645         for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
   1646             StringResource *res = array[i];
   1647             if (!res->fWritten) {
   1648                 int32_t localStringIndex = f16BitUnits.length();
   1649                 if (localStringIndex >= fLocalStringIndexLimit) {
   1650                     fLocalStringIndexLimit = localStringIndex + 1;
   1651                 }
   1652                 res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits);
   1653             }
   1654         }
   1655         if (f16BitUnits.isBogus()) {
   1656             errorCode = U_MEMORY_ALLOCATION_ERROR;
   1657             return;
   1658         }
   1659         if (fWritePoolBundle != NULL && gFormatVersion >= 3) {
   1660             PseudoListResource *poolStrings =
   1661                     static_cast<PseudoListResource *>(fWritePoolBundle->fRoot);
   1662             for (i = 0; i < count && array[i]->fSame == NULL; ++i) {
   1663                 assert(!array[i]->fString.isEmpty());
   1664                 StringResource *poolString =
   1665                         new StringResource(fWritePoolBundle, array[i]->fString, errorCode);
   1666                 if (poolString == NULL) {
   1667                     errorCode = U_MEMORY_ALLOCATION_ERROR;
   1668                     break;
   1669                 }
   1670                 poolStrings->add(poolString);
   1671             }
   1672         }
   1673         /* Write the suffix strings. Make each point to the real string. */
   1674         for (; i < count; ++i) {
   1675             StringResource *res = array[i];
   1676             if (res->fWritten) {
   1677                 continue;
   1678             }
   1679             StringResource *same = res->fSame;
   1680             assert(res->length() != same->length());  // Set strings are unique.
   1681             res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset;
   1682             int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit;
   1683             // Suffixes of pool strings have been set already.
   1684             assert(localStringIndex >= 0);
   1685             if (localStringIndex >= fLocalStringIndexLimit) {
   1686                 fLocalStringIndexLimit = localStringIndex + 1;
   1687             }
   1688             res->fWritten = TRUE;
   1689         }
   1690     }
   1691     // +1 to account for the initial zero in f16BitUnits
   1692     assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
   1693 }
   1694