1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2000-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * 11 * File reslist.cpp 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 02/21/00 weiv Creation. 17 ******************************************************************************* 18 */ 19 20 // Safer use of UnicodeString. 21 #ifndef UNISTR_FROM_CHAR_EXPLICIT 22 # define UNISTR_FROM_CHAR_EXPLICIT explicit 23 #endif 24 25 // Less important, but still a good idea. 26 #ifndef UNISTR_FROM_STRING_EXPLICIT 27 # define UNISTR_FROM_STRING_EXPLICIT explicit 28 #endif 29 30 #include <assert.h> 31 #include <stdio.h> 32 #include "unicode/localpointer.h" 33 #include "reslist.h" 34 #include "unewdata.h" 35 #include "unicode/ures.h" 36 #include "unicode/putil.h" 37 #include "errmsg.h" 38 39 #include "uarrsort.h" 40 #include "uelement.h" 41 #include "uhash.h" 42 #include "uinvchar.h" 43 #include "ustr_imp.h" 44 #include "unicode/utf16.h" 45 /* 46 * Align binary data at a 16-byte offset from the start of the resource bundle, 47 * to be safe for any data type it may contain. 48 */ 49 #define BIN_ALIGNMENT 16 50 51 // This numeric constant must be at least 1. 52 // If StringResource.fNumUnitsSaved == 0 then the string occurs only once, 53 // and it makes no sense to move it to the pool bundle. 54 // The larger the threshold for fNumUnitsSaved 55 // the smaller the savings, and the smaller the pool bundle. 56 // We trade some total size reduction to reduce the pool bundle a bit, 57 // so that one can reasonably save data size by 58 // removing bundle files without rebuilding the pool bundle. 59 // This can also help to keep the pool and total (pool+local) string indexes 60 // within 16 bits, that is, within range of Table16 and Array16 containers. 61 #ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 62 # define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10 63 #endif 64 65 U_NAMESPACE_USE 66 67 static UBool gIncludeCopyright = FALSE; 68 static UBool gUsePoolBundle = FALSE; 69 static UBool gIsDefaultFormatVersion = TRUE; 70 static int32_t gFormatVersion = 3; 71 72 /* How do we store string values? */ 73 enum { 74 STRINGS_UTF16_V1, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */ 75 STRINGS_UTF16_V2 /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */ 76 }; 77 78 static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40; /* do not store the length explicitly for such strings */ 79 80 static const ResFile kNoPoolBundle; 81 82 /* 83 * res_none() returns the address of kNoResource, 84 * for use in non-error cases when no resource is to be added to the bundle. 85 * (NULL is used in error cases.) 86 */ 87 static SResource kNoResource; // TODO: const 88 89 static UDataInfo dataInfo= { 90 sizeof(UDataInfo), 91 0, 92 93 U_IS_BIG_ENDIAN, 94 U_CHARSET_FAMILY, 95 sizeof(UChar), 96 0, 97 98 {0x52, 0x65, 0x73, 0x42}, /* dataFormat="ResB" */ 99 {1, 3, 0, 0}, /* formatVersion */ 100 {1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/ 101 }; 102 103 static const UVersionInfo gFormatVersions[4] = { /* indexed by a major-formatVersion integer */ 104 { 0, 0, 0, 0 }, 105 { 1, 3, 0, 0 }, 106 { 2, 0, 0, 0 }, 107 { 3, 0, 0, 0 } 108 }; 109 // Remember to update genrb.h GENRB_VERSION when changing the data format. 110 // (Or maybe we should remove GENRB_VERSION and report the ICU version number?) 111 112 static uint8_t calcPadding(uint32_t size) { 113 /* returns space we need to pad */ 114 return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0); 115 116 } 117 118 void setIncludeCopyright(UBool val){ 119 gIncludeCopyright=val; 120 } 121 122 UBool getIncludeCopyright(void){ 123 return gIncludeCopyright; 124 } 125 126 void setFormatVersion(int32_t formatVersion) { 127 gIsDefaultFormatVersion = FALSE; 128 gFormatVersion = formatVersion; 129 } 130 131 int32_t getFormatVersion() { 132 return gFormatVersion; 133 } 134 135 void setUsePoolBundle(UBool use) { 136 gUsePoolBundle = use; 137 } 138 139 // TODO: return const pointer, or find another way to express "none" 140 struct SResource* res_none() { 141 return &kNoResource; 142 } 143 144 SResource::SResource() 145 : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1), 146 line(0), fNext(NULL) { 147 ustr_init(&fComment); 148 } 149 150 SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment, 151 UErrorCode &errorCode) 152 : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), 153 fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1), 154 line(0), fNext(NULL) { 155 ustr_init(&fComment); 156 if(comment != NULL) { 157 ustr_cpy(&fComment, comment, &errorCode); 158 } 159 } 160 161 SResource::~SResource() { 162 ustr_deinit(&fComment); 163 } 164 165 ContainerResource::~ContainerResource() { 166 SResource *current = fFirst; 167 while (current != NULL) { 168 SResource *next = current->fNext; 169 delete current; 170 current = next; 171 } 172 } 173 174 TableResource::~TableResource() {} 175 176 // TODO: clarify that containers adopt new items, even in error cases; use LocalPointer 177 void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) { 178 if (U_FAILURE(errorCode) || res == NULL || res == &kNoResource) { 179 return; 180 } 181 182 /* remember this linenumber to report to the user if there is a duplicate key */ 183 res->line = linenumber; 184 185 /* here we need to traverse the list */ 186 ++fCount; 187 188 /* is the list still empty? */ 189 if (fFirst == NULL) { 190 fFirst = res; 191 res->fNext = NULL; 192 return; 193 } 194 195 const char *resKeyString = fRoot->fKeys + res->fKey; 196 197 SResource *current = fFirst; 198 199 SResource *prev = NULL; 200 while (current != NULL) { 201 const char *currentKeyString = fRoot->fKeys + current->fKey; 202 int diff; 203 /* 204 * formatVersion 1: compare key strings in native-charset order 205 * formatVersion 2 and up: compare key strings in ASCII order 206 */ 207 if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) { 208 diff = uprv_strcmp(currentKeyString, resKeyString); 209 } else { 210 diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString); 211 } 212 if (diff < 0) { 213 prev = current; 214 current = current->fNext; 215 } else if (diff > 0) { 216 /* we're either in front of the list, or in the middle */ 217 if (prev == NULL) { 218 /* front of the list */ 219 fFirst = res; 220 } else { 221 /* middle of the list */ 222 prev->fNext = res; 223 } 224 225 res->fNext = current; 226 return; 227 } else { 228 /* Key already exists! ERROR! */ 229 error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line); 230 errorCode = U_UNSUPPORTED_ERROR; 231 return; 232 } 233 } 234 235 /* end of list */ 236 prev->fNext = res; 237 res->fNext = NULL; 238 } 239 240 ArrayResource::~ArrayResource() {} 241 242 void ArrayResource::add(SResource *res) { 243 if (res != NULL && res != &kNoResource) { 244 if (fFirst == NULL) { 245 fFirst = res; 246 } else { 247 fLast->fNext = res; 248 } 249 fLast = res; 250 ++fCount; 251 } 252 } 253 254 PseudoListResource::~PseudoListResource() {} 255 256 void PseudoListResource::add(SResource *res) { 257 if (res != NULL && res != &kNoResource) { 258 res->fNext = fFirst; 259 fFirst = res; 260 ++fCount; 261 } 262 } 263 264 StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type, 265 const UChar *value, int32_t len, 266 const UString* comment, UErrorCode &errorCode) 267 : SResource(bundle, tag, type, comment, errorCode) { 268 if (len == 0 && gFormatVersion > 1) { 269 fRes = URES_MAKE_EMPTY_RESOURCE(type); 270 fWritten = TRUE; 271 return; 272 } 273 274 fString.setTo(value, len); 275 fString.getTerminatedBuffer(); // Some code relies on NUL-termination. 276 if (U_SUCCESS(errorCode) && fString.isBogus()) { 277 errorCode = U_MEMORY_ALLOCATION_ERROR; 278 } 279 } 280 281 StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type, 282 const icu::UnicodeString &value, UErrorCode &errorCode) 283 : SResource(bundle, NULL, type, NULL, errorCode), fString(value) { 284 if (value.isEmpty() && gFormatVersion > 1) { 285 fRes = URES_MAKE_EMPTY_RESOURCE(type); 286 fWritten = TRUE; 287 return; 288 } 289 290 fString.getTerminatedBuffer(); // Some code relies on NUL-termination. 291 if (U_SUCCESS(errorCode) && fString.isBogus()) { 292 errorCode = U_MEMORY_ALLOCATION_ERROR; 293 } 294 } 295 296 // Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty. 297 StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len, 298 UErrorCode &errorCode) 299 : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) { 300 assert(len > 0); 301 assert(!fString.isBogus()); 302 } 303 304 StringBaseResource::~StringBaseResource() {} 305 306 static int32_t U_CALLCONV 307 string_hash(const UElement key) { 308 const StringResource *res = static_cast<const StringResource *>(key.pointer); 309 return res->fString.hashCode(); 310 } 311 312 static UBool U_CALLCONV 313 string_comp(const UElement key1, const UElement key2) { 314 const StringResource *res1 = static_cast<const StringResource *>(key1.pointer); 315 const StringResource *res2 = static_cast<const StringResource *>(key2.pointer); 316 return res1->fString == res2->fString; 317 } 318 319 StringResource::~StringResource() {} 320 321 AliasResource::~AliasResource() {} 322 323 IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value, 324 const UString* comment, UErrorCode &errorCode) 325 : SResource(bundle, tag, URES_INT, comment, errorCode) { 326 fValue = value; 327 fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET); 328 fWritten = TRUE; 329 } 330 331 IntResource::~IntResource() {} 332 333 IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag, 334 const UString* comment, UErrorCode &errorCode) 335 : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode), 336 fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) { 337 if (fArray == NULL) { 338 errorCode = U_MEMORY_ALLOCATION_ERROR; 339 return; 340 } 341 } 342 343 IntVectorResource::~IntVectorResource() { 344 delete[] fArray; 345 } 346 347 void IntVectorResource::add(int32_t value, UErrorCode &errorCode) { 348 if (U_SUCCESS(errorCode)) { 349 fArray[fCount++] = value; 350 } 351 } 352 353 BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag, 354 uint32_t length, uint8_t *data, const char* fileName, 355 const UString* comment, UErrorCode &errorCode) 356 : SResource(bundle, tag, URES_BINARY, comment, errorCode), 357 fLength(length), fData(NULL), fFileName(NULL) { 358 if (U_FAILURE(errorCode)) { 359 return; 360 } 361 if (fileName != NULL && *fileName != 0){ 362 fFileName = new char[uprv_strlen(fileName)+1]; 363 if (fFileName == NULL) { 364 errorCode = U_MEMORY_ALLOCATION_ERROR; 365 return; 366 } 367 uprv_strcpy(fFileName, fileName); 368 } 369 if (length > 0) { 370 fData = new uint8_t[length]; 371 if (fData == NULL) { 372 errorCode = U_MEMORY_ALLOCATION_ERROR; 373 return; 374 } 375 uprv_memcpy(fData, data, length); 376 } else { 377 if (gFormatVersion > 1) { 378 fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY); 379 fWritten = TRUE; 380 } 381 } 382 } 383 384 BinaryResource::~BinaryResource() { 385 delete[] fData; 386 delete[] fFileName; 387 } 388 389 /* Writing Functions */ 390 391 void 392 StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, 393 UErrorCode &errorCode) { 394 assert(fSame == NULL); 395 fSame = static_cast<StringResource *>(uhash_get(stringSet, this)); 396 if (fSame != NULL) { 397 // This is a duplicate of a pool bundle string or of an earlier-visited string. 398 if (++fSame->fNumCopies == 1) { 399 assert(fSame->fWritten); 400 int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes); 401 if (poolStringIndex >= bundle->fPoolStringIndexLimit) { 402 bundle->fPoolStringIndexLimit = poolStringIndex + 1; 403 } 404 } 405 return; 406 } 407 /* Put this string into the set for finding duplicates. */ 408 fNumCopies = 1; 409 uhash_put(stringSet, this, this, &errorCode); 410 411 if (bundle->fStringsForm != STRINGS_UTF16_V1) { 412 int32_t len = length(); 413 if (len <= MAX_IMPLICIT_STRING_LENGTH && 414 !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) { 415 /* 416 * This string will be stored without an explicit length. 417 * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen(). 418 */ 419 fNumCharsForLength = 0; 420 } else if (len <= 0x3ee) { 421 fNumCharsForLength = 1; 422 } else if (len <= 0xfffff) { 423 fNumCharsForLength = 2; 424 } else { 425 fNumCharsForLength = 3; 426 } 427 bundle->f16BitStringsLength += fNumCharsForLength + len + 1; /* +1 for the NUL */ 428 } 429 } 430 431 void 432 ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, 433 UErrorCode &errorCode) { 434 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 435 current->preflightStrings(bundle, stringSet, errorCode); 436 } 437 } 438 439 void 440 SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) { 441 if (U_FAILURE(errorCode)) { 442 return; 443 } 444 if (fRes != RES_BOGUS) { 445 /* 446 * The resource item word was already precomputed, which means 447 * no further data needs to be written. 448 * This might be an integer, or an empty string/binary/etc. 449 */ 450 return; 451 } 452 handlePreflightStrings(bundle, stringSet, errorCode); 453 } 454 455 void 456 SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/, 457 UErrorCode & /*errorCode*/) { 458 /* Neither a string nor a container. */ 459 } 460 461 int32_t 462 SRBRoot::makeRes16(uint32_t resWord) const { 463 if (resWord == 0) { 464 return 0; /* empty string */ 465 } 466 uint32_t type = RES_GET_TYPE(resWord); 467 int32_t offset = (int32_t)RES_GET_OFFSET(resWord); 468 if (type == URES_STRING_V2) { 469 assert(offset > 0); 470 if (offset < fPoolStringIndexLimit) { 471 if (offset < fPoolStringIndex16Limit) { 472 return offset; 473 } 474 } else { 475 offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit; 476 if (offset <= 0xffff) { 477 return offset; 478 } 479 } 480 } 481 return -1; 482 } 483 484 int32_t 485 SRBRoot::mapKey(int32_t oldpos) const { 486 const KeyMapEntry *map = fKeyMap; 487 if (map == NULL) { 488 return oldpos; 489 } 490 int32_t i, start, limit; 491 492 /* do a binary search for the old, pre-compactKeys() key offset */ 493 start = fUsePoolBundle->fKeysCount; 494 limit = start + fKeysCount; 495 while (start < limit - 1) { 496 i = (start + limit) / 2; 497 if (oldpos < map[i].oldpos) { 498 limit = i; 499 } else { 500 start = i; 501 } 502 } 503 assert(oldpos == map[start].oldpos); 504 return map[start].newpos; 505 } 506 507 /* 508 * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings. 509 * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS 510 * and exits early. 511 */ 512 void 513 StringResource::handleWrite16(SRBRoot * /*bundle*/) { 514 SResource *same; 515 if ((same = fSame) != NULL) { 516 /* This is a duplicate. */ 517 assert(same->fRes != RES_BOGUS && same->fWritten); 518 fRes = same->fRes; 519 fWritten = same->fWritten; 520 } 521 } 522 523 void 524 ContainerResource::writeAllRes16(SRBRoot *bundle) { 525 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 526 bundle->f16BitUnits.append((UChar)current->fRes16); 527 } 528 fWritten = TRUE; 529 } 530 531 void 532 ArrayResource::handleWrite16(SRBRoot *bundle) { 533 if (fCount == 0 && gFormatVersion > 1) { 534 fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY); 535 fWritten = TRUE; 536 return; 537 } 538 539 int32_t res16 = 0; 540 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 541 current->write16(bundle); 542 res16 |= current->fRes16; 543 } 544 if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) { 545 fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length()); 546 bundle->f16BitUnits.append((UChar)fCount); 547 writeAllRes16(bundle); 548 } 549 } 550 551 void 552 TableResource::handleWrite16(SRBRoot *bundle) { 553 if (fCount == 0 && gFormatVersion > 1) { 554 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE); 555 fWritten = TRUE; 556 return; 557 } 558 /* Find the smallest table type that fits the data. */ 559 int32_t key16 = 0; 560 int32_t res16 = 0; 561 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 562 current->write16(bundle); 563 key16 |= current->fKey16; 564 res16 |= current->fRes16; 565 } 566 if(fCount > (uint32_t)bundle->fMaxTableLength) { 567 bundle->fMaxTableLength = fCount; 568 } 569 if (fCount <= 0xffff && key16 >= 0) { 570 if (res16 >= 0 && gFormatVersion > 1) { 571 /* 16-bit count, key offsets and values */ 572 fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length()); 573 bundle->f16BitUnits.append((UChar)fCount); 574 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 575 bundle->f16BitUnits.append((UChar)current->fKey16); 576 } 577 writeAllRes16(bundle); 578 } else { 579 /* 16-bit count, 16-bit key offsets, 32-bit values */ 580 fTableType = URES_TABLE; 581 } 582 } else { 583 /* 32-bit count, key offsets and values */ 584 fTableType = URES_TABLE32; 585 } 586 } 587 588 void 589 PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) { 590 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE); 591 fWritten = TRUE; 592 } 593 594 void 595 SResource::write16(SRBRoot *bundle) { 596 if (fKey >= 0) { 597 // A tagged resource has a non-negative key index into the parsed key strings. 598 // compactKeys() built a map from parsed key index to the final key index. 599 // After the mapping, negative key indexes are used for shared pool bundle keys. 600 fKey = bundle->mapKey(fKey); 601 // If the key index fits into a Key16 for a Table or Table16, 602 // then set the fKey16 field accordingly. 603 // Otherwise keep it at -1. 604 if (fKey >= 0) { 605 if (fKey < bundle->fLocalKeyLimit) { 606 fKey16 = fKey; 607 } 608 } else { 609 int32_t poolKeyIndex = fKey & 0x7fffffff; 610 if (poolKeyIndex <= 0xffff) { 611 poolKeyIndex += bundle->fLocalKeyLimit; 612 if (poolKeyIndex <= 0xffff) { 613 fKey16 = poolKeyIndex; 614 } 615 } 616 } 617 } 618 /* 619 * fRes != RES_BOGUS: 620 * The resource item word was already precomputed, which means 621 * no further data needs to be written. 622 * This might be an integer, or an empty or UTF-16 v2 string, 623 * an empty binary, etc. 624 */ 625 if (fRes == RES_BOGUS) { 626 handleWrite16(bundle); 627 } 628 // Compute fRes16 for precomputed as well as just-computed fRes. 629 fRes16 = bundle->makeRes16(fRes); 630 } 631 632 void 633 SResource::handleWrite16(SRBRoot * /*bundle*/) { 634 /* Only a few resource types write 16-bit units. */ 635 } 636 637 /* 638 * Only called for UTF-16 v1 strings, and for aliases. 639 * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS 640 * and exits early. 641 */ 642 void 643 StringBaseResource::handlePreWrite(uint32_t *byteOffset) { 644 /* Write the UTF-16 v1 string. */ 645 fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2); 646 *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR; 647 } 648 649 void 650 IntVectorResource::handlePreWrite(uint32_t *byteOffset) { 651 if (fCount == 0 && gFormatVersion > 1) { 652 fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR); 653 fWritten = TRUE; 654 } else { 655 fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2); 656 *byteOffset += (1 + fCount) * 4; 657 } 658 } 659 660 void 661 BinaryResource::handlePreWrite(uint32_t *byteOffset) { 662 uint32_t pad = 0; 663 uint32_t dataStart = *byteOffset + sizeof(fLength); 664 665 if (dataStart % BIN_ALIGNMENT) { 666 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT); 667 *byteOffset += pad; /* pad == 4 or 8 or 12 */ 668 } 669 fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2); 670 *byteOffset += 4 + fLength; 671 } 672 673 void 674 ContainerResource::preWriteAllRes(uint32_t *byteOffset) { 675 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 676 current->preWrite(byteOffset); 677 } 678 } 679 680 void 681 ArrayResource::handlePreWrite(uint32_t *byteOffset) { 682 preWriteAllRes(byteOffset); 683 fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2); 684 *byteOffset += (1 + fCount) * 4; 685 } 686 687 void 688 TableResource::handlePreWrite(uint32_t *byteOffset) { 689 preWriteAllRes(byteOffset); 690 if (fTableType == URES_TABLE) { 691 /* 16-bit count, 16-bit key offsets, 32-bit values */ 692 fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2); 693 *byteOffset += 2 + fCount * 6; 694 } else { 695 /* 32-bit count, key offsets and values */ 696 fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2); 697 *byteOffset += 4 + fCount * 8; 698 } 699 } 700 701 void 702 SResource::preWrite(uint32_t *byteOffset) { 703 if (fRes != RES_BOGUS) { 704 /* 705 * The resource item word was already precomputed, which means 706 * no further data needs to be written. 707 * This might be an integer, or an empty or UTF-16 v2 string, 708 * an empty binary, etc. 709 */ 710 return; 711 } 712 handlePreWrite(byteOffset); 713 *byteOffset += calcPadding(*byteOffset); 714 } 715 716 void 717 SResource::handlePreWrite(uint32_t * /*byteOffset*/) { 718 assert(FALSE); 719 } 720 721 /* 722 * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings, 723 * write() sees fWritten and exits early. 724 */ 725 void 726 StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 727 /* Write the UTF-16 v1 string. */ 728 int32_t len = length(); 729 udata_write32(mem, len); 730 udata_writeUString(mem, getBuffer(), len + 1); 731 *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR; 732 fWritten = TRUE; 733 } 734 735 void 736 ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) { 737 uint32_t i = 0; 738 for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) { 739 current->write(mem, byteOffset); 740 } 741 assert(i == fCount); 742 } 743 744 void 745 ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) { 746 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 747 udata_write32(mem, current->fRes); 748 } 749 *byteOffset += fCount * 4; 750 } 751 752 void 753 ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 754 writeAllRes(mem, byteOffset); 755 udata_write32(mem, fCount); 756 *byteOffset += 4; 757 writeAllRes32(mem, byteOffset); 758 } 759 760 void 761 IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 762 udata_write32(mem, fCount); 763 for(uint32_t i = 0; i < fCount; ++i) { 764 udata_write32(mem, fArray[i]); 765 } 766 *byteOffset += (1 + fCount) * 4; 767 } 768 769 void 770 BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 771 uint32_t pad = 0; 772 uint32_t dataStart = *byteOffset + sizeof(fLength); 773 774 if (dataStart % BIN_ALIGNMENT) { 775 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT); 776 udata_writePadding(mem, pad); /* pad == 4 or 8 or 12 */ 777 *byteOffset += pad; 778 } 779 780 udata_write32(mem, fLength); 781 if (fLength > 0) { 782 udata_writeBlock(mem, fData, fLength); 783 } 784 *byteOffset += 4 + fLength; 785 } 786 787 void 788 TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 789 writeAllRes(mem, byteOffset); 790 if(fTableType == URES_TABLE) { 791 udata_write16(mem, (uint16_t)fCount); 792 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 793 udata_write16(mem, current->fKey16); 794 } 795 *byteOffset += (1 + fCount)* 2; 796 if ((fCount & 1) == 0) { 797 /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */ 798 udata_writePadding(mem, 2); 799 *byteOffset += 2; 800 } 801 } else /* URES_TABLE32 */ { 802 udata_write32(mem, fCount); 803 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 804 udata_write32(mem, (uint32_t)current->fKey); 805 } 806 *byteOffset += (1 + fCount)* 4; 807 } 808 writeAllRes32(mem, byteOffset); 809 } 810 811 void 812 SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) { 813 if (fWritten) { 814 assert(fRes != RES_BOGUS); 815 return; 816 } 817 handleWrite(mem, byteOffset); 818 uint8_t paddingSize = calcPadding(*byteOffset); 819 if (paddingSize > 0) { 820 udata_writePadding(mem, paddingSize); 821 *byteOffset += paddingSize; 822 } 823 fWritten = TRUE; 824 } 825 826 void 827 SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) { 828 assert(FALSE); 829 } 830 831 void SRBRoot::write(const char *outputDir, const char *outputPkg, 832 char *writtenFilename, int writtenFilenameLen, 833 UErrorCode &errorCode) { 834 UNewDataMemory *mem = NULL; 835 uint32_t byteOffset = 0; 836 uint32_t top, size; 837 char dataName[1024]; 838 int32_t indexes[URES_INDEX_TOP]; 839 840 compactKeys(errorCode); 841 /* 842 * Add padding bytes to fKeys so that fKeysTop is 4-aligned. 843 * Safe because the capacity is a multiple of 4. 844 */ 845 while (fKeysTop & 3) { 846 fKeys[fKeysTop++] = (char)0xaa; 847 } 848 /* 849 * In URES_TABLE, use all local key offsets that fit into 16 bits, 850 * and use the remaining 16-bit offsets for pool key offsets 851 * if there are any. 852 * If there are no local keys, then use the whole 16-bit space 853 * for pool key offsets. 854 * Note: This cannot be changed without changing the major formatVersion. 855 */ 856 if (fKeysBottom < fKeysTop) { 857 if (fKeysTop <= 0x10000) { 858 fLocalKeyLimit = fKeysTop; 859 } else { 860 fLocalKeyLimit = 0x10000; 861 } 862 } else { 863 fLocalKeyLimit = 0; 864 } 865 866 UHashtable *stringSet; 867 if (gFormatVersion > 1) { 868 stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode); 869 if (U_SUCCESS(errorCode) && 870 fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) { 871 for (SResource *current = fUsePoolBundle->fStrings->fFirst; 872 current != NULL; 873 current = current->fNext) { 874 StringResource *sr = static_cast<StringResource *>(current); 875 sr->fNumCopies = 0; 876 sr->fNumUnitsSaved = 0; 877 uhash_put(stringSet, sr, sr, &errorCode); 878 } 879 } 880 fRoot->preflightStrings(this, stringSet, errorCode); 881 } else { 882 stringSet = NULL; 883 } 884 if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) { 885 compactStringsV2(stringSet, errorCode); 886 } 887 uhash_close(stringSet); 888 if (U_FAILURE(errorCode)) { 889 return; 890 } 891 892 int32_t formatVersion = gFormatVersion; 893 if (fPoolStringIndexLimit != 0) { 894 int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit; 895 if ((sum - 1) > RES_MAX_OFFSET) { 896 errorCode = U_BUFFER_OVERFLOW_ERROR; 897 return; 898 } 899 if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) { 900 // 16-bit indexes work for all pool + local strings. 901 fPoolStringIndex16Limit = fPoolStringIndexLimit; 902 } else { 903 // Set the pool index threshold so that 16-bit indexes work 904 // for some pool strings and some local strings. 905 fPoolStringIndex16Limit = (int32_t)( 906 ((int64_t)fPoolStringIndexLimit * 0xffff) / sum); 907 } 908 } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) { 909 // If we just default to formatVersion 3 910 // but there are no pool bundle strings to share 911 // and we do not write a pool bundle, 912 // then write formatVersion 2 which is just as good. 913 formatVersion = 2; 914 } 915 916 fRoot->write16(this); 917 if (f16BitUnits.isBogus()) { 918 errorCode = U_MEMORY_ALLOCATION_ERROR; 919 return; 920 } 921 if (f16BitUnits.length() & 1) { 922 f16BitUnits.append((UChar)0xaaaa); /* pad to multiple of 4 bytes */ 923 } 924 /* all keys have been mapped */ 925 uprv_free(fKeyMap); 926 fKeyMap = NULL; 927 928 byteOffset = fKeysTop + f16BitUnits.length() * 2; 929 fRoot->preWrite(&byteOffset); 930 931 /* total size including the root item */ 932 top = byteOffset; 933 934 if (writtenFilename && writtenFilenameLen) { 935 *writtenFilename = 0; 936 } 937 938 if (writtenFilename) { 939 int32_t off = 0, len = 0; 940 if (outputDir) { 941 len = (int32_t)uprv_strlen(outputDir); 942 if (len > writtenFilenameLen) { 943 len = writtenFilenameLen; 944 } 945 uprv_strncpy(writtenFilename, outputDir, len); 946 } 947 if (writtenFilenameLen -= len) { 948 off += len; 949 writtenFilename[off] = U_FILE_SEP_CHAR; 950 if (--writtenFilenameLen) { 951 ++off; 952 if(outputPkg != NULL) 953 { 954 uprv_strcpy(writtenFilename+off, outputPkg); 955 off += (int32_t)uprv_strlen(outputPkg); 956 writtenFilename[off] = '_'; 957 ++off; 958 } 959 960 len = (int32_t)uprv_strlen(fLocale); 961 if (len > writtenFilenameLen) { 962 len = writtenFilenameLen; 963 } 964 uprv_strncpy(writtenFilename + off, fLocale, len); 965 if (writtenFilenameLen -= len) { 966 off += len; 967 len = 5; 968 if (len > writtenFilenameLen) { 969 len = writtenFilenameLen; 970 } 971 uprv_strncpy(writtenFilename + off, ".res", len); 972 } 973 } 974 } 975 } 976 977 if(outputPkg) 978 { 979 uprv_strcpy(dataName, outputPkg); 980 uprv_strcat(dataName, "_"); 981 uprv_strcat(dataName, fLocale); 982 } 983 else 984 { 985 uprv_strcpy(dataName, fLocale); 986 } 987 988 uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo)); 989 990 mem = udata_create(outputDir, "res", dataName, 991 &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, &errorCode); 992 if(U_FAILURE(errorCode)){ 993 return; 994 } 995 996 /* write the root item */ 997 udata_write32(mem, fRoot->fRes); 998 999 /* 1000 * formatVersion 1.1 (ICU 2.8): 1001 * write int32_t indexes[] after root and before the key strings 1002 * to make it easier to parse resource bundles in icuswap or from Java etc. 1003 */ 1004 uprv_memset(indexes, 0, sizeof(indexes)); 1005 indexes[URES_INDEX_LENGTH]= fIndexLength; 1006 indexes[URES_INDEX_KEYS_TOP]= fKeysTop>>2; 1007 indexes[URES_INDEX_RESOURCES_TOP]= (int32_t)(top>>2); 1008 indexes[URES_INDEX_BUNDLE_TOP]= indexes[URES_INDEX_RESOURCES_TOP]; 1009 indexes[URES_INDEX_MAX_TABLE_LENGTH]= fMaxTableLength; 1010 1011 /* 1012 * formatVersion 1.2 (ICU 3.6): 1013 * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set 1014 * the memset() above initialized all indexes[] to 0 1015 */ 1016 if (fNoFallback) { 1017 indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK; 1018 } 1019 /* 1020 * formatVersion 2.0 (ICU 4.4): 1021 * more compact string value storage, optional pool bundle 1022 */ 1023 if (URES_INDEX_16BIT_TOP < fIndexLength) { 1024 indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1); 1025 } 1026 if (URES_INDEX_POOL_CHECKSUM < fIndexLength) { 1027 if (fIsPoolBundle) { 1028 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK; 1029 uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom), 1030 (uint32_t)(fKeysTop - fKeysBottom), 0); 1031 if (f16BitUnits.length() <= 1) { 1032 // no pool strings to checksum 1033 } else if (U_IS_BIG_ENDIAN) { 1034 checksum = computeCRC((const char *)f16BitUnits.getBuffer(), 1035 (uint32_t)f16BitUnits.length() * 2, checksum); 1036 } else { 1037 // Swap to big-endian so we get the same checksum on all platforms 1038 // (except for charset family, due to the key strings). 1039 UnicodeString s(f16BitUnits); 1040 s.append((UChar)1); // Ensure that we own this buffer. 1041 assert(!s.isBogus()); 1042 uint16_t *p = (uint16_t *)s.getBuffer(); 1043 for (int32_t count = f16BitUnits.length(); count > 0; --count) { 1044 uint16_t x = *p; 1045 *p++ = (uint16_t)((x << 8) | (x >> 8)); 1046 } 1047 checksum = computeCRC((const char *)p, 1048 (uint32_t)f16BitUnits.length() * 2, checksum); 1049 } 1050 indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum; 1051 } else if (gUsePoolBundle) { 1052 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE; 1053 indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum; 1054 } 1055 } 1056 // formatVersion 3 (ICU 56): 1057 // share string values via pool bundle strings 1058 indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8; // bits 23..0 -> 31..8 1059 indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000; // bits 27..24 -> 15..12 1060 indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16; 1061 1062 /* write the indexes[] */ 1063 udata_writeBlock(mem, indexes, fIndexLength*4); 1064 1065 /* write the table key strings */ 1066 udata_writeBlock(mem, fKeys+fKeysBottom, 1067 fKeysTop-fKeysBottom); 1068 1069 /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */ 1070 udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2); 1071 1072 /* write all of the bundle contents: the root item and its children */ 1073 byteOffset = fKeysTop + f16BitUnits.length() * 2; 1074 fRoot->write(mem, &byteOffset); 1075 assert(byteOffset == top); 1076 1077 size = udata_finish(mem, &errorCode); 1078 if(top != size) { 1079 fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n", 1080 (int)size, (int)top); 1081 errorCode = U_INTERNAL_PROGRAM_ERROR; 1082 } 1083 } 1084 1085 /* Opening Functions */ 1086 1087 TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { 1088 LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status); 1089 return U_SUCCESS(*status) ? res.orphan() : NULL; 1090 } 1091 1092 ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { 1093 LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status); 1094 return U_SUCCESS(*status) ? res.orphan() : NULL; 1095 } 1096 1097 struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { 1098 LocalPointer<SResource> res( 1099 new StringResource(bundle, tag, value, len, comment, *status), *status); 1100 return U_SUCCESS(*status) ? res.orphan() : NULL; 1101 } 1102 1103 struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { 1104 LocalPointer<SResource> res( 1105 new AliasResource(bundle, tag, value, len, comment, *status), *status); 1106 return U_SUCCESS(*status) ? res.orphan() : NULL; 1107 } 1108 1109 IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { 1110 LocalPointer<IntVectorResource> res( 1111 new IntVectorResource(bundle, tag, comment, *status), *status); 1112 return U_SUCCESS(*status) ? res.orphan() : NULL; 1113 } 1114 1115 struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) { 1116 LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status); 1117 return U_SUCCESS(*status) ? res.orphan() : NULL; 1118 } 1119 1120 struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) { 1121 LocalPointer<SResource> res( 1122 new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status); 1123 return U_SUCCESS(*status) ? res.orphan() : NULL; 1124 } 1125 1126 SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode) 1127 : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE), 1128 fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle), 1129 fKeys(NULL), fKeyMap(NULL), 1130 fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0), 1131 f16BitUnits(), f16BitStringsLength(0), 1132 fUsePoolBundle(&kNoPoolBundle), 1133 fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0), 1134 fWritePoolBundle(NULL) { 1135 if (U_FAILURE(errorCode)) { 1136 return; 1137 } 1138 1139 if (gFormatVersion > 1) { 1140 // f16BitUnits must start with a zero for empty resources. 1141 // We might be able to omit it if there are no empty 16-bit resources. 1142 f16BitUnits.append((UChar)0); 1143 } 1144 1145 fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE); 1146 if (isPoolBundle) { 1147 fRoot = new PseudoListResource(this, errorCode); 1148 } else { 1149 fRoot = new TableResource(this, NULL, comment, errorCode); 1150 } 1151 if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) { 1152 if (U_SUCCESS(errorCode)) { 1153 errorCode = U_MEMORY_ALLOCATION_ERROR; 1154 } 1155 return; 1156 } 1157 1158 fKeysCapacity = KEY_SPACE_SIZE; 1159 /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */ 1160 if (gUsePoolBundle || isPoolBundle) { 1161 fIndexLength = URES_INDEX_POOL_CHECKSUM + 1; 1162 } else if (gFormatVersion >= 2) { 1163 fIndexLength = URES_INDEX_16BIT_TOP + 1; 1164 } else /* formatVersion 1 */ { 1165 fIndexLength = URES_INDEX_ATTRIBUTES + 1; 1166 } 1167 fKeysBottom = (1 /* root */ + fIndexLength) * 4; 1168 uprv_memset(fKeys, 0, fKeysBottom); 1169 fKeysTop = fKeysBottom; 1170 1171 if (gFormatVersion == 1) { 1172 fStringsForm = STRINGS_UTF16_V1; 1173 } else { 1174 fStringsForm = STRINGS_UTF16_V2; 1175 } 1176 } 1177 1178 /* Closing Functions */ 1179 1180 void res_close(struct SResource *res) { 1181 delete res; 1182 } 1183 1184 SRBRoot::~SRBRoot() { 1185 delete fRoot; 1186 uprv_free(fLocale); 1187 uprv_free(fKeys); 1188 uprv_free(fKeyMap); 1189 } 1190 1191 /* Misc Functions */ 1192 1193 void SRBRoot::setLocale(UChar *locale, UErrorCode &errorCode) { 1194 if(U_FAILURE(errorCode)) { 1195 return; 1196 } 1197 1198 uprv_free(fLocale); 1199 fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1)); 1200 if(fLocale == NULL) { 1201 errorCode = U_MEMORY_ALLOCATION_ERROR; 1202 return; 1203 } 1204 1205 u_UCharsToChars(locale, fLocale, u_strlen(locale)+1); 1206 } 1207 1208 const char * 1209 SRBRoot::getKeyString(int32_t key) const { 1210 if (key < 0) { 1211 return fUsePoolBundle->fKeys + (key & 0x7fffffff); 1212 } else { 1213 return fKeys + key; 1214 } 1215 } 1216 1217 const char * 1218 SResource::getKeyString(const SRBRoot *bundle) const { 1219 if (fKey == -1) { 1220 return NULL; 1221 } 1222 return bundle->getKeyString(fKey); 1223 } 1224 1225 const char * 1226 SRBRoot::getKeyBytes(int32_t *pLength) const { 1227 *pLength = fKeysTop - fKeysBottom; 1228 return fKeys + fKeysBottom; 1229 } 1230 1231 int32_t 1232 SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) { 1233 int32_t keypos; 1234 1235 if (U_FAILURE(errorCode)) { 1236 return -1; 1237 } 1238 if (length < 0 || (keyBytes == NULL && length != 0)) { 1239 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 1240 return -1; 1241 } 1242 if (length == 0) { 1243 return fKeysTop; 1244 } 1245 1246 keypos = fKeysTop; 1247 fKeysTop += length; 1248 if (fKeysTop >= fKeysCapacity) { 1249 /* overflow - resize the keys buffer */ 1250 fKeysCapacity += KEY_SPACE_SIZE; 1251 fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity)); 1252 if(fKeys == NULL) { 1253 errorCode = U_MEMORY_ALLOCATION_ERROR; 1254 return -1; 1255 } 1256 } 1257 1258 uprv_memcpy(fKeys + keypos, keyBytes, length); 1259 1260 return keypos; 1261 } 1262 1263 int32_t 1264 SRBRoot::addTag(const char *tag, UErrorCode &errorCode) { 1265 int32_t keypos; 1266 1267 if (U_FAILURE(errorCode)) { 1268 return -1; 1269 } 1270 1271 if (tag == NULL) { 1272 /* no error: the root table and array items have no keys */ 1273 return -1; 1274 } 1275 1276 keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode); 1277 if (U_SUCCESS(errorCode)) { 1278 ++fKeysCount; 1279 } 1280 return keypos; 1281 } 1282 1283 static int32_t 1284 compareInt32(int32_t lPos, int32_t rPos) { 1285 /* 1286 * Compare possibly-negative key offsets. Don't just return lPos - rPos 1287 * because that is prone to negative-integer underflows. 1288 */ 1289 if (lPos < rPos) { 1290 return -1; 1291 } else if (lPos > rPos) { 1292 return 1; 1293 } else { 1294 return 0; 1295 } 1296 } 1297 1298 static int32_t U_CALLCONV 1299 compareKeySuffixes(const void *context, const void *l, const void *r) { 1300 const struct SRBRoot *bundle=(const struct SRBRoot *)context; 1301 int32_t lPos = ((const KeyMapEntry *)l)->oldpos; 1302 int32_t rPos = ((const KeyMapEntry *)r)->oldpos; 1303 const char *lStart = bundle->getKeyString(lPos); 1304 const char *lLimit = lStart; 1305 const char *rStart = bundle->getKeyString(rPos); 1306 const char *rLimit = rStart; 1307 int32_t diff; 1308 while (*lLimit != 0) { ++lLimit; } 1309 while (*rLimit != 0) { ++rLimit; } 1310 /* compare keys in reverse character order */ 1311 while (lStart < lLimit && rStart < rLimit) { 1312 diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit; 1313 if (diff != 0) { 1314 return diff; 1315 } 1316 } 1317 /* sort equal suffixes by descending key length */ 1318 diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart); 1319 if (diff != 0) { 1320 return diff; 1321 } 1322 /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */ 1323 return compareInt32(lPos, rPos); 1324 } 1325 1326 static int32_t U_CALLCONV 1327 compareKeyNewpos(const void * /*context*/, const void *l, const void *r) { 1328 return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos); 1329 } 1330 1331 static int32_t U_CALLCONV 1332 compareKeyOldpos(const void * /*context*/, const void *l, const void *r) { 1333 return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos); 1334 } 1335 1336 void 1337 SRBRoot::compactKeys(UErrorCode &errorCode) { 1338 KeyMapEntry *map; 1339 char *keys; 1340 int32_t i; 1341 int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount; 1342 if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) { 1343 return; 1344 } 1345 map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry)); 1346 if (map == NULL) { 1347 errorCode = U_MEMORY_ALLOCATION_ERROR; 1348 return; 1349 } 1350 keys = (char *)fUsePoolBundle->fKeys; 1351 for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) { 1352 map[i].oldpos = 1353 (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000; /* negative oldpos */ 1354 map[i].newpos = 0; 1355 while (*keys != 0) { ++keys; } /* skip the key */ 1356 ++keys; /* skip the NUL */ 1357 } 1358 keys = fKeys + fKeysBottom; 1359 for (; i < keysCount; ++i) { 1360 map[i].oldpos = (int32_t)(keys - fKeys); 1361 map[i].newpos = 0; 1362 while (*keys != 0) { ++keys; } /* skip the key */ 1363 ++keys; /* skip the NUL */ 1364 } 1365 /* Sort the keys so that each one is immediately followed by all of its suffixes. */ 1366 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), 1367 compareKeySuffixes, this, FALSE, &errorCode); 1368 /* 1369 * Make suffixes point into earlier, longer strings that contain them 1370 * and mark the old, now unused suffix bytes as deleted. 1371 */ 1372 if (U_SUCCESS(errorCode)) { 1373 keys = fKeys; 1374 for (i = 0; i < keysCount;) { 1375 /* 1376 * This key is not a suffix of the previous one; 1377 * keep this one and delete the following ones that are 1378 * suffixes of this one. 1379 */ 1380 const char *key; 1381 const char *keyLimit; 1382 int32_t j = i + 1; 1383 map[i].newpos = map[i].oldpos; 1384 if (j < keysCount && map[j].oldpos < 0) { 1385 /* Key string from the pool bundle, do not delete. */ 1386 i = j; 1387 continue; 1388 } 1389 key = getKeyString(map[i].oldpos); 1390 for (keyLimit = key; *keyLimit != 0; ++keyLimit) {} 1391 for (; j < keysCount && map[j].oldpos >= 0; ++j) { 1392 const char *k; 1393 char *suffix; 1394 const char *suffixLimit; 1395 int32_t offset; 1396 suffix = keys + map[j].oldpos; 1397 for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {} 1398 offset = (int32_t)(keyLimit - key) - (suffixLimit - suffix); 1399 if (offset < 0) { 1400 break; /* suffix cannot be longer than the original */ 1401 } 1402 /* Is it a suffix of the earlier, longer key? */ 1403 for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {} 1404 if (suffix == suffixLimit && *k == *suffixLimit) { 1405 map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */ 1406 /* mark the suffix as deleted */ 1407 while (*suffix != 0) { *suffix++ = 1; } 1408 *suffix = 1; 1409 } else { 1410 break; /* not a suffix, restart from here */ 1411 } 1412 } 1413 i = j; 1414 } 1415 /* 1416 * Re-sort by newpos, then modify the key characters array in-place 1417 * to squeeze out unused bytes, and readjust the newpos offsets. 1418 */ 1419 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), 1420 compareKeyNewpos, NULL, FALSE, &errorCode); 1421 if (U_SUCCESS(errorCode)) { 1422 int32_t oldpos, newpos, limit; 1423 oldpos = newpos = fKeysBottom; 1424 limit = fKeysTop; 1425 /* skip key offsets that point into the pool bundle rather than this new bundle */ 1426 for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {} 1427 if (i < keysCount) { 1428 while (oldpos < limit) { 1429 if (keys[oldpos] == 1) { 1430 ++oldpos; /* skip unused bytes */ 1431 } else { 1432 /* adjust the new offsets for keys starting here */ 1433 while (i < keysCount && map[i].newpos == oldpos) { 1434 map[i++].newpos = newpos; 1435 } 1436 /* move the key characters to their new position */ 1437 keys[newpos++] = keys[oldpos++]; 1438 } 1439 } 1440 assert(i == keysCount); 1441 } 1442 fKeysTop = newpos; 1443 /* Re-sort once more, by old offsets for binary searching. */ 1444 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), 1445 compareKeyOldpos, NULL, FALSE, &errorCode); 1446 if (U_SUCCESS(errorCode)) { 1447 /* key size reduction by limit - newpos */ 1448 fKeyMap = map; 1449 map = NULL; 1450 } 1451 } 1452 } 1453 uprv_free(map); 1454 } 1455 1456 static int32_t U_CALLCONV 1457 compareStringSuffixes(const void * /*context*/, const void *l, const void *r) { 1458 const StringResource *left = *((const StringResource **)l); 1459 const StringResource *right = *((const StringResource **)r); 1460 const UChar *lStart = left->getBuffer(); 1461 const UChar *lLimit = lStart + left->length(); 1462 const UChar *rStart = right->getBuffer(); 1463 const UChar *rLimit = rStart + right->length(); 1464 int32_t diff; 1465 /* compare keys in reverse character order */ 1466 while (lStart < lLimit && rStart < rLimit) { 1467 diff = (int32_t)*--lLimit - (int32_t)*--rLimit; 1468 if (diff != 0) { 1469 return diff; 1470 } 1471 } 1472 /* sort equal suffixes by descending string length */ 1473 return right->length() - left->length(); 1474 } 1475 1476 static int32_t U_CALLCONV 1477 compareStringLengths(const void * /*context*/, const void *l, const void *r) { 1478 const StringResource *left = *((const StringResource **)l); 1479 const StringResource *right = *((const StringResource **)r); 1480 int32_t diff; 1481 /* Make "is suffix of another string" compare greater than a non-suffix. */ 1482 diff = (int)(left->fSame != NULL) - (int)(right->fSame != NULL); 1483 if (diff != 0) { 1484 return diff; 1485 } 1486 /* sort by ascending string length */ 1487 diff = left->length() - right->length(); 1488 if (diff != 0) { 1489 return diff; 1490 } 1491 // sort by descending size reduction 1492 diff = right->fNumUnitsSaved - left->fNumUnitsSaved; 1493 if (diff != 0) { 1494 return diff; 1495 } 1496 // sort lexically 1497 return left->fString.compare(right->fString); 1498 } 1499 1500 void 1501 StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) { 1502 int32_t len = length(); 1503 fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length()); 1504 fWritten = TRUE; 1505 switch(fNumCharsForLength) { 1506 case 0: 1507 break; 1508 case 1: 1509 dest.append((UChar)(0xdc00 + len)); 1510 break; 1511 case 2: 1512 dest.append((UChar)(0xdfef + (len >> 16))); 1513 dest.append((UChar)len); 1514 break; 1515 case 3: 1516 dest.append((UChar)0xdfff); 1517 dest.append((UChar)(len >> 16)); 1518 dest.append((UChar)len); 1519 break; 1520 default: 1521 break; /* will not occur */ 1522 } 1523 dest.append(fString); 1524 dest.append((UChar)0); 1525 } 1526 1527 void 1528 SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) { 1529 if (U_FAILURE(errorCode)) { 1530 return; 1531 } 1532 // Store the StringResource pointers in an array for 1533 // easy sorting and processing. 1534 // We enumerate a set of strings, so there are no duplicates. 1535 int32_t count = uhash_count(stringSet); 1536 LocalArray<StringResource *> array(new StringResource *[count], errorCode); 1537 if (U_FAILURE(errorCode)) { 1538 return; 1539 } 1540 for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) { 1541 array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer; 1542 } 1543 /* Sort the strings so that each one is immediately followed by all of its suffixes. */ 1544 uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **), 1545 compareStringSuffixes, NULL, FALSE, &errorCode); 1546 if (U_FAILURE(errorCode)) { 1547 return; 1548 } 1549 /* 1550 * Make suffixes point into earlier, longer strings that contain them. 1551 * Temporarily use fSame and fSuffixOffset for suffix strings to 1552 * refer to the remaining ones. 1553 */ 1554 for (int32_t i = 0; i < count;) { 1555 /* 1556 * This string is not a suffix of the previous one; 1557 * write this one and subsume the following ones that are 1558 * suffixes of this one. 1559 */ 1560 StringResource *res = array[i]; 1561 res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength(); 1562 // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit, 1563 // see StringResource::handlePreflightStrings(). 1564 int32_t j; 1565 for (j = i + 1; j < count; ++j) { 1566 StringResource *suffixRes = array[j]; 1567 /* Is it a suffix of the earlier, longer string? */ 1568 if (res->fString.endsWith(suffixRes->fString)) { 1569 assert(res->length() != suffixRes->length()); // Set strings are unique. 1570 if (suffixRes->fWritten) { 1571 // Pool string, skip. 1572 } else if (suffixRes->fNumCharsForLength == 0) { 1573 /* yes, point to the earlier string */ 1574 suffixRes->fSame = res; 1575 suffixRes->fSuffixOffset = res->length() - suffixRes->length(); 1576 if (res->fWritten) { 1577 // Suffix-share res which is a pool string. 1578 // Compute the resource word and collect the maximum. 1579 suffixRes->fRes = 1580 res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset; 1581 int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes); 1582 if (poolStringIndex >= fPoolStringIndexLimit) { 1583 fPoolStringIndexLimit = poolStringIndex + 1; 1584 } 1585 suffixRes->fWritten = TRUE; 1586 } 1587 res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength(); 1588 } else { 1589 /* write the suffix by itself if we need explicit length */ 1590 } 1591 } else { 1592 break; /* not a suffix, restart from here */ 1593 } 1594 } 1595 i = j; 1596 } 1597 /* 1598 * Re-sort the strings by ascending length (except suffixes last) 1599 * to optimize for URES_TABLE16 and URES_ARRAY16: 1600 * Keep as many as possible within reach of 16-bit offsets. 1601 */ 1602 uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **), 1603 compareStringLengths, NULL, FALSE, &errorCode); 1604 if (U_FAILURE(errorCode)) { 1605 return; 1606 } 1607 if (fIsPoolBundle) { 1608 // Write strings that are sufficiently shared. 1609 // Avoid writing other strings. 1610 int32_t numStringsWritten = 0; 1611 int32_t numUnitsSaved = 0; 1612 int32_t numUnitsNotSaved = 0; 1613 for (int32_t i = 0; i < count; ++i) { 1614 StringResource *res = array[i]; 1615 // Maximum pool string index when suffix-sharing the last character. 1616 int32_t maxStringIndex = 1617 f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1; 1618 if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING && 1619 maxStringIndex < RES_MAX_OFFSET) { 1620 res->writeUTF16v2(0, f16BitUnits); 1621 ++numStringsWritten; 1622 numUnitsSaved += res->fNumUnitsSaved; 1623 } else { 1624 numUnitsNotSaved += res->fNumUnitsSaved; 1625 res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING); 1626 res->fWritten = TRUE; 1627 } 1628 } 1629 if (f16BitUnits.isBogus()) { 1630 errorCode = U_MEMORY_ALLOCATION_ERROR; 1631 } 1632 if (getShowWarning()) { // not quiet 1633 printf("number of shared strings: %d\n", (int)numStringsWritten); 1634 printf("16-bit units for strings: %6d = %6d bytes\n", 1635 (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2); 1636 printf("16-bit units saved: %6d = %6d bytes\n", 1637 (int)numUnitsSaved, (int)numUnitsSaved * 2); 1638 printf("16-bit units not saved: %6d = %6d bytes\n", 1639 (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2); 1640 } 1641 } else { 1642 assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit); 1643 /* Write the non-suffix strings. */ 1644 int32_t i; 1645 for (i = 0; i < count && array[i]->fSame == NULL; ++i) { 1646 StringResource *res = array[i]; 1647 if (!res->fWritten) { 1648 int32_t localStringIndex = f16BitUnits.length(); 1649 if (localStringIndex >= fLocalStringIndexLimit) { 1650 fLocalStringIndexLimit = localStringIndex + 1; 1651 } 1652 res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits); 1653 } 1654 } 1655 if (f16BitUnits.isBogus()) { 1656 errorCode = U_MEMORY_ALLOCATION_ERROR; 1657 return; 1658 } 1659 if (fWritePoolBundle != NULL && gFormatVersion >= 3) { 1660 PseudoListResource *poolStrings = 1661 static_cast<PseudoListResource *>(fWritePoolBundle->fRoot); 1662 for (i = 0; i < count && array[i]->fSame == NULL; ++i) { 1663 assert(!array[i]->fString.isEmpty()); 1664 StringResource *poolString = 1665 new StringResource(fWritePoolBundle, array[i]->fString, errorCode); 1666 if (poolString == NULL) { 1667 errorCode = U_MEMORY_ALLOCATION_ERROR; 1668 break; 1669 } 1670 poolStrings->add(poolString); 1671 } 1672 } 1673 /* Write the suffix strings. Make each point to the real string. */ 1674 for (; i < count; ++i) { 1675 StringResource *res = array[i]; 1676 if (res->fWritten) { 1677 continue; 1678 } 1679 StringResource *same = res->fSame; 1680 assert(res->length() != same->length()); // Set strings are unique. 1681 res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset; 1682 int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit; 1683 // Suffixes of pool strings have been set already. 1684 assert(localStringIndex >= 0); 1685 if (localStringIndex >= fLocalStringIndexLimit) { 1686 fLocalStringIndexLimit = localStringIndex + 1; 1687 } 1688 res->fWritten = TRUE; 1689 } 1690 } 1691 // +1 to account for the initial zero in f16BitUnits 1692 assert(f16BitUnits.length() <= (f16BitStringsLength + 1)); 1693 } 1694