1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2000-2015, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File reslist.cpp 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 02/21/00 weiv Creation. 15 ******************************************************************************* 16 */ 17 18 // Safer use of UnicodeString. 19 #ifndef UNISTR_FROM_CHAR_EXPLICIT 20 # define UNISTR_FROM_CHAR_EXPLICIT explicit 21 #endif 22 23 // Less important, but still a good idea. 24 #ifndef UNISTR_FROM_STRING_EXPLICIT 25 # define UNISTR_FROM_STRING_EXPLICIT explicit 26 #endif 27 28 #include <assert.h> 29 #include <stdio.h> 30 #include "unicode/localpointer.h" 31 #include "reslist.h" 32 #include "unewdata.h" 33 #include "unicode/ures.h" 34 #include "unicode/putil.h" 35 #include "errmsg.h" 36 37 #include "uarrsort.h" 38 #include "uelement.h" 39 #include "uhash.h" 40 #include "uinvchar.h" 41 #include "ustr_imp.h" 42 #include "unicode/utf16.h" 43 /* 44 * Align binary data at a 16-byte offset from the start of the resource bundle, 45 * to be safe for any data type it may contain. 46 */ 47 #define BIN_ALIGNMENT 16 48 49 // This numeric constant must be at least 1. 50 // If StringResource.fNumUnitsSaved == 0 then the string occurs only once, 51 // and it makes no sense to move it to the pool bundle. 52 // The larger the threshold for fNumUnitsSaved 53 // the smaller the savings, and the smaller the pool bundle. 54 // We trade some total size reduction to reduce the pool bundle a bit, 55 // so that one can reasonably save data size by 56 // removing bundle files without rebuilding the pool bundle. 57 // This can also help to keep the pool and total (pool+local) string indexes 58 // within 16 bits, that is, within range of Table16 and Array16 containers. 59 #ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 60 # define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10 61 #endif 62 63 U_NAMESPACE_USE 64 65 static UBool gIncludeCopyright = FALSE; 66 static UBool gUsePoolBundle = FALSE; 67 static UBool gIsDefaultFormatVersion = TRUE; 68 static int32_t gFormatVersion = 3; 69 70 /* How do we store string values? */ 71 enum { 72 STRINGS_UTF16_V1, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */ 73 STRINGS_UTF16_V2 /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */ 74 }; 75 76 static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40; /* do not store the length explicitly for such strings */ 77 78 static const ResFile kNoPoolBundle; 79 80 /* 81 * res_none() returns the address of kNoResource, 82 * for use in non-error cases when no resource is to be added to the bundle. 83 * (NULL is used in error cases.) 84 */ 85 static SResource kNoResource; // TODO: const 86 87 static UDataInfo dataInfo= { 88 sizeof(UDataInfo), 89 0, 90 91 U_IS_BIG_ENDIAN, 92 U_CHARSET_FAMILY, 93 sizeof(UChar), 94 0, 95 96 {0x52, 0x65, 0x73, 0x42}, /* dataFormat="ResB" */ 97 {1, 3, 0, 0}, /* formatVersion */ 98 {1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/ 99 }; 100 101 static const UVersionInfo gFormatVersions[4] = { /* indexed by a major-formatVersion integer */ 102 { 0, 0, 0, 0 }, 103 { 1, 3, 0, 0 }, 104 { 2, 0, 0, 0 }, 105 { 3, 0, 0, 0 } 106 }; 107 // Remember to update genrb.h GENRB_VERSION when changing the data format. 108 // (Or maybe we should remove GENRB_VERSION and report the ICU version number?) 109 110 static uint8_t calcPadding(uint32_t size) { 111 /* returns space we need to pad */ 112 return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0); 113 114 } 115 116 void setIncludeCopyright(UBool val){ 117 gIncludeCopyright=val; 118 } 119 120 UBool getIncludeCopyright(void){ 121 return gIncludeCopyright; 122 } 123 124 void setFormatVersion(int32_t formatVersion) { 125 gIsDefaultFormatVersion = FALSE; 126 gFormatVersion = formatVersion; 127 } 128 129 int32_t getFormatVersion() { 130 return gFormatVersion; 131 } 132 133 void setUsePoolBundle(UBool use) { 134 gUsePoolBundle = use; 135 } 136 137 // TODO: return const pointer, or find another way to express "none" 138 struct SResource* res_none() { 139 return &kNoResource; 140 } 141 142 SResource::SResource() 143 : fType(URES_NONE), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1), 144 line(0), fNext(NULL) { 145 ustr_init(&fComment); 146 } 147 148 SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment, 149 UErrorCode &errorCode) 150 : fType(type), fWritten(FALSE), fRes(RES_BOGUS), fRes16(-1), 151 fKey(bundle != NULL ? bundle->addTag(tag, errorCode) : -1), fKey16(-1), 152 line(0), fNext(NULL) { 153 ustr_init(&fComment); 154 if(comment != NULL) { 155 ustr_cpy(&fComment, comment, &errorCode); 156 } 157 } 158 159 SResource::~SResource() { 160 ustr_deinit(&fComment); 161 } 162 163 ContainerResource::~ContainerResource() { 164 SResource *current = fFirst; 165 while (current != NULL) { 166 SResource *next = current->fNext; 167 delete current; 168 current = next; 169 } 170 } 171 172 TableResource::~TableResource() {} 173 174 // TODO: clarify that containers adopt new items, even in error cases; use LocalPointer 175 void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) { 176 if (U_FAILURE(errorCode) || res == NULL || res == &kNoResource) { 177 return; 178 } 179 180 /* remember this linenumber to report to the user if there is a duplicate key */ 181 res->line = linenumber; 182 183 /* here we need to traverse the list */ 184 ++fCount; 185 186 /* is the list still empty? */ 187 if (fFirst == NULL) { 188 fFirst = res; 189 res->fNext = NULL; 190 return; 191 } 192 193 const char *resKeyString = fRoot->fKeys + res->fKey; 194 195 SResource *current = fFirst; 196 197 SResource *prev = NULL; 198 while (current != NULL) { 199 const char *currentKeyString = fRoot->fKeys + current->fKey; 200 int diff; 201 /* 202 * formatVersion 1: compare key strings in native-charset order 203 * formatVersion 2 and up: compare key strings in ASCII order 204 */ 205 if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) { 206 diff = uprv_strcmp(currentKeyString, resKeyString); 207 } else { 208 diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString); 209 } 210 if (diff < 0) { 211 prev = current; 212 current = current->fNext; 213 } else if (diff > 0) { 214 /* we're either in front of the list, or in the middle */ 215 if (prev == NULL) { 216 /* front of the list */ 217 fFirst = res; 218 } else { 219 /* middle of the list */ 220 prev->fNext = res; 221 } 222 223 res->fNext = current; 224 return; 225 } else { 226 /* Key already exists! ERROR! */ 227 error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line); 228 errorCode = U_UNSUPPORTED_ERROR; 229 return; 230 } 231 } 232 233 /* end of list */ 234 prev->fNext = res; 235 res->fNext = NULL; 236 } 237 238 ArrayResource::~ArrayResource() {} 239 240 void ArrayResource::add(SResource *res) { 241 if (res != NULL && res != &kNoResource) { 242 if (fFirst == NULL) { 243 fFirst = res; 244 } else { 245 fLast->fNext = res; 246 } 247 fLast = res; 248 ++fCount; 249 } 250 } 251 252 PseudoListResource::~PseudoListResource() {} 253 254 void PseudoListResource::add(SResource *res) { 255 if (res != NULL && res != &kNoResource) { 256 res->fNext = fFirst; 257 fFirst = res; 258 ++fCount; 259 } 260 } 261 262 StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type, 263 const UChar *value, int32_t len, 264 const UString* comment, UErrorCode &errorCode) 265 : SResource(bundle, tag, type, comment, errorCode) { 266 if (len == 0 && gFormatVersion > 1) { 267 fRes = URES_MAKE_EMPTY_RESOURCE(type); 268 fWritten = TRUE; 269 return; 270 } 271 272 fString.setTo(value, len); 273 fString.getTerminatedBuffer(); // Some code relies on NUL-termination. 274 if (U_SUCCESS(errorCode) && fString.isBogus()) { 275 errorCode = U_MEMORY_ALLOCATION_ERROR; 276 } 277 } 278 279 StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type, 280 const icu::UnicodeString &value, UErrorCode &errorCode) 281 : SResource(bundle, NULL, type, NULL, errorCode), fString(value) { 282 if (value.isEmpty() && gFormatVersion > 1) { 283 fRes = URES_MAKE_EMPTY_RESOURCE(type); 284 fWritten = TRUE; 285 return; 286 } 287 288 fString.getTerminatedBuffer(); // Some code relies on NUL-termination. 289 if (U_SUCCESS(errorCode) && fString.isBogus()) { 290 errorCode = U_MEMORY_ALLOCATION_ERROR; 291 } 292 } 293 294 // Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty. 295 StringBaseResource::StringBaseResource(int8_t type, const UChar *value, int32_t len, 296 UErrorCode &errorCode) 297 : SResource(NULL, NULL, type, NULL, errorCode), fString(TRUE, value, len) { 298 assert(len > 0); 299 assert(!fString.isBogus()); 300 } 301 302 StringBaseResource::~StringBaseResource() {} 303 304 static int32_t U_CALLCONV 305 string_hash(const UElement key) { 306 const StringResource *res = static_cast<const StringResource *>(key.pointer); 307 return res->fString.hashCode(); 308 } 309 310 static UBool U_CALLCONV 311 string_comp(const UElement key1, const UElement key2) { 312 const StringResource *res1 = static_cast<const StringResource *>(key1.pointer); 313 const StringResource *res2 = static_cast<const StringResource *>(key2.pointer); 314 return res1->fString == res2->fString; 315 } 316 317 StringResource::~StringResource() {} 318 319 AliasResource::~AliasResource() {} 320 321 IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value, 322 const UString* comment, UErrorCode &errorCode) 323 : SResource(bundle, tag, URES_INT, comment, errorCode) { 324 fValue = value; 325 fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET); 326 fWritten = TRUE; 327 } 328 329 IntResource::~IntResource() {} 330 331 IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag, 332 const UString* comment, UErrorCode &errorCode) 333 : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode), 334 fCount(0), fArray(new uint32_t[RESLIST_MAX_INT_VECTOR]) { 335 if (fArray == NULL) { 336 errorCode = U_MEMORY_ALLOCATION_ERROR; 337 return; 338 } 339 } 340 341 IntVectorResource::~IntVectorResource() { 342 delete[] fArray; 343 } 344 345 void IntVectorResource::add(int32_t value, UErrorCode &errorCode) { 346 if (U_SUCCESS(errorCode)) { 347 fArray[fCount++] = value; 348 } 349 } 350 351 BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag, 352 uint32_t length, uint8_t *data, const char* fileName, 353 const UString* comment, UErrorCode &errorCode) 354 : SResource(bundle, tag, URES_BINARY, comment, errorCode), 355 fLength(length), fData(NULL), fFileName(NULL) { 356 if (U_FAILURE(errorCode)) { 357 return; 358 } 359 if (fileName != NULL && *fileName != 0){ 360 fFileName = new char[uprv_strlen(fileName)+1]; 361 if (fFileName == NULL) { 362 errorCode = U_MEMORY_ALLOCATION_ERROR; 363 return; 364 } 365 uprv_strcpy(fFileName, fileName); 366 } 367 if (length > 0) { 368 fData = new uint8_t[length]; 369 if (fData == NULL) { 370 errorCode = U_MEMORY_ALLOCATION_ERROR; 371 return; 372 } 373 uprv_memcpy(fData, data, length); 374 } else { 375 if (gFormatVersion > 1) { 376 fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY); 377 fWritten = TRUE; 378 } 379 } 380 } 381 382 BinaryResource::~BinaryResource() { 383 delete[] fData; 384 delete[] fFileName; 385 } 386 387 /* Writing Functions */ 388 389 void 390 StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, 391 UErrorCode &errorCode) { 392 assert(fSame == NULL); 393 fSame = static_cast<StringResource *>(uhash_get(stringSet, this)); 394 if (fSame != NULL) { 395 // This is a duplicate of a pool bundle string or of an earlier-visited string. 396 if (++fSame->fNumCopies == 1) { 397 assert(fSame->fWritten); 398 int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes); 399 if (poolStringIndex >= bundle->fPoolStringIndexLimit) { 400 bundle->fPoolStringIndexLimit = poolStringIndex + 1; 401 } 402 } 403 return; 404 } 405 /* Put this string into the set for finding duplicates. */ 406 fNumCopies = 1; 407 uhash_put(stringSet, this, this, &errorCode); 408 409 if (bundle->fStringsForm != STRINGS_UTF16_V1) { 410 int32_t len = length(); 411 if (len <= MAX_IMPLICIT_STRING_LENGTH && 412 !U16_IS_TRAIL(fString[0]) && fString.indexOf((UChar)0) < 0) { 413 /* 414 * This string will be stored without an explicit length. 415 * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen(). 416 */ 417 fNumCharsForLength = 0; 418 } else if (len <= 0x3ee) { 419 fNumCharsForLength = 1; 420 } else if (len <= 0xfffff) { 421 fNumCharsForLength = 2; 422 } else { 423 fNumCharsForLength = 3; 424 } 425 bundle->f16BitStringsLength += fNumCharsForLength + len + 1; /* +1 for the NUL */ 426 } 427 } 428 429 void 430 ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, 431 UErrorCode &errorCode) { 432 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 433 current->preflightStrings(bundle, stringSet, errorCode); 434 } 435 } 436 437 void 438 SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) { 439 if (U_FAILURE(errorCode)) { 440 return; 441 } 442 if (fRes != RES_BOGUS) { 443 /* 444 * The resource item word was already precomputed, which means 445 * no further data needs to be written. 446 * This might be an integer, or an empty string/binary/etc. 447 */ 448 return; 449 } 450 handlePreflightStrings(bundle, stringSet, errorCode); 451 } 452 453 void 454 SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/, 455 UErrorCode & /*errorCode*/) { 456 /* Neither a string nor a container. */ 457 } 458 459 int32_t 460 SRBRoot::makeRes16(uint32_t resWord) const { 461 if (resWord == 0) { 462 return 0; /* empty string */ 463 } 464 uint32_t type = RES_GET_TYPE(resWord); 465 int32_t offset = (int32_t)RES_GET_OFFSET(resWord); 466 if (type == URES_STRING_V2) { 467 assert(offset > 0); 468 if (offset < fPoolStringIndexLimit) { 469 if (offset < fPoolStringIndex16Limit) { 470 return offset; 471 } 472 } else { 473 offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit; 474 if (offset <= 0xffff) { 475 return offset; 476 } 477 } 478 } 479 return -1; 480 } 481 482 int32_t 483 SRBRoot::mapKey(int32_t oldpos) const { 484 const KeyMapEntry *map = fKeyMap; 485 if (map == NULL) { 486 return oldpos; 487 } 488 int32_t i, start, limit; 489 490 /* do a binary search for the old, pre-compactKeys() key offset */ 491 start = fUsePoolBundle->fKeysCount; 492 limit = start + fKeysCount; 493 while (start < limit - 1) { 494 i = (start + limit) / 2; 495 if (oldpos < map[i].oldpos) { 496 limit = i; 497 } else { 498 start = i; 499 } 500 } 501 assert(oldpos == map[start].oldpos); 502 return map[start].newpos; 503 } 504 505 /* 506 * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings. 507 * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS 508 * and exits early. 509 */ 510 void 511 StringResource::handleWrite16(SRBRoot * /*bundle*/) { 512 SResource *same; 513 if ((same = fSame) != NULL) { 514 /* This is a duplicate. */ 515 assert(same->fRes != RES_BOGUS && same->fWritten); 516 fRes = same->fRes; 517 fWritten = same->fWritten; 518 } 519 } 520 521 void 522 ContainerResource::writeAllRes16(SRBRoot *bundle) { 523 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 524 bundle->f16BitUnits.append((UChar)current->fRes16); 525 } 526 fWritten = TRUE; 527 } 528 529 void 530 ArrayResource::handleWrite16(SRBRoot *bundle) { 531 if (fCount == 0 && gFormatVersion > 1) { 532 fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY); 533 fWritten = TRUE; 534 return; 535 } 536 537 int32_t res16 = 0; 538 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 539 current->write16(bundle); 540 res16 |= current->fRes16; 541 } 542 if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) { 543 fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length()); 544 bundle->f16BitUnits.append((UChar)fCount); 545 writeAllRes16(bundle); 546 } 547 } 548 549 void 550 TableResource::handleWrite16(SRBRoot *bundle) { 551 if (fCount == 0 && gFormatVersion > 1) { 552 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE); 553 fWritten = TRUE; 554 return; 555 } 556 /* Find the smallest table type that fits the data. */ 557 int32_t key16 = 0; 558 int32_t res16 = 0; 559 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 560 current->write16(bundle); 561 key16 |= current->fKey16; 562 res16 |= current->fRes16; 563 } 564 if(fCount > (uint32_t)bundle->fMaxTableLength) { 565 bundle->fMaxTableLength = fCount; 566 } 567 if (fCount <= 0xffff && key16 >= 0) { 568 if (res16 >= 0 && gFormatVersion > 1) { 569 /* 16-bit count, key offsets and values */ 570 fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length()); 571 bundle->f16BitUnits.append((UChar)fCount); 572 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 573 bundle->f16BitUnits.append((UChar)current->fKey16); 574 } 575 writeAllRes16(bundle); 576 } else { 577 /* 16-bit count, 16-bit key offsets, 32-bit values */ 578 fTableType = URES_TABLE; 579 } 580 } else { 581 /* 32-bit count, key offsets and values */ 582 fTableType = URES_TABLE32; 583 } 584 } 585 586 void 587 PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) { 588 fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE); 589 fWritten = TRUE; 590 } 591 592 void 593 SResource::write16(SRBRoot *bundle) { 594 if (fKey >= 0) { 595 // A tagged resource has a non-negative key index into the parsed key strings. 596 // compactKeys() built a map from parsed key index to the final key index. 597 // After the mapping, negative key indexes are used for shared pool bundle keys. 598 fKey = bundle->mapKey(fKey); 599 // If the key index fits into a Key16 for a Table or Table16, 600 // then set the fKey16 field accordingly. 601 // Otherwise keep it at -1. 602 if (fKey >= 0) { 603 if (fKey < bundle->fLocalKeyLimit) { 604 fKey16 = fKey; 605 } 606 } else { 607 int32_t poolKeyIndex = fKey & 0x7fffffff; 608 if (poolKeyIndex <= 0xffff) { 609 poolKeyIndex += bundle->fLocalKeyLimit; 610 if (poolKeyIndex <= 0xffff) { 611 fKey16 = poolKeyIndex; 612 } 613 } 614 } 615 } 616 /* 617 * fRes != RES_BOGUS: 618 * The resource item word was already precomputed, which means 619 * no further data needs to be written. 620 * This might be an integer, or an empty or UTF-16 v2 string, 621 * an empty binary, etc. 622 */ 623 if (fRes == RES_BOGUS) { 624 handleWrite16(bundle); 625 } 626 // Compute fRes16 for precomputed as well as just-computed fRes. 627 fRes16 = bundle->makeRes16(fRes); 628 } 629 630 void 631 SResource::handleWrite16(SRBRoot * /*bundle*/) { 632 /* Only a few resource types write 16-bit units. */ 633 } 634 635 /* 636 * Only called for UTF-16 v1 strings, and for aliases. 637 * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS 638 * and exits early. 639 */ 640 void 641 StringBaseResource::handlePreWrite(uint32_t *byteOffset) { 642 /* Write the UTF-16 v1 string. */ 643 fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2); 644 *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR; 645 } 646 647 void 648 IntVectorResource::handlePreWrite(uint32_t *byteOffset) { 649 if (fCount == 0 && gFormatVersion > 1) { 650 fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR); 651 fWritten = TRUE; 652 } else { 653 fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2); 654 *byteOffset += (1 + fCount) * 4; 655 } 656 } 657 658 void 659 BinaryResource::handlePreWrite(uint32_t *byteOffset) { 660 uint32_t pad = 0; 661 uint32_t dataStart = *byteOffset + sizeof(fLength); 662 663 if (dataStart % BIN_ALIGNMENT) { 664 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT); 665 *byteOffset += pad; /* pad == 4 or 8 or 12 */ 666 } 667 fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2); 668 *byteOffset += 4 + fLength; 669 } 670 671 void 672 ContainerResource::preWriteAllRes(uint32_t *byteOffset) { 673 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 674 current->preWrite(byteOffset); 675 } 676 } 677 678 void 679 ArrayResource::handlePreWrite(uint32_t *byteOffset) { 680 preWriteAllRes(byteOffset); 681 fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2); 682 *byteOffset += (1 + fCount) * 4; 683 } 684 685 void 686 TableResource::handlePreWrite(uint32_t *byteOffset) { 687 preWriteAllRes(byteOffset); 688 if (fTableType == URES_TABLE) { 689 /* 16-bit count, 16-bit key offsets, 32-bit values */ 690 fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2); 691 *byteOffset += 2 + fCount * 6; 692 } else { 693 /* 32-bit count, key offsets and values */ 694 fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2); 695 *byteOffset += 4 + fCount * 8; 696 } 697 } 698 699 void 700 SResource::preWrite(uint32_t *byteOffset) { 701 if (fRes != RES_BOGUS) { 702 /* 703 * The resource item word was already precomputed, which means 704 * no further data needs to be written. 705 * This might be an integer, or an empty or UTF-16 v2 string, 706 * an empty binary, etc. 707 */ 708 return; 709 } 710 handlePreWrite(byteOffset); 711 *byteOffset += calcPadding(*byteOffset); 712 } 713 714 void 715 SResource::handlePreWrite(uint32_t * /*byteOffset*/) { 716 assert(FALSE); 717 } 718 719 /* 720 * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings, 721 * write() sees fWritten and exits early. 722 */ 723 void 724 StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 725 /* Write the UTF-16 v1 string. */ 726 int32_t len = length(); 727 udata_write32(mem, len); 728 udata_writeUString(mem, getBuffer(), len + 1); 729 *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR; 730 fWritten = TRUE; 731 } 732 733 void 734 ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) { 735 uint32_t i = 0; 736 for (SResource *current = fFirst; current != NULL; ++i, current = current->fNext) { 737 current->write(mem, byteOffset); 738 } 739 assert(i == fCount); 740 } 741 742 void 743 ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) { 744 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 745 udata_write32(mem, current->fRes); 746 } 747 *byteOffset += fCount * 4; 748 } 749 750 void 751 ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 752 writeAllRes(mem, byteOffset); 753 udata_write32(mem, fCount); 754 *byteOffset += 4; 755 writeAllRes32(mem, byteOffset); 756 } 757 758 void 759 IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 760 udata_write32(mem, fCount); 761 for(uint32_t i = 0; i < fCount; ++i) { 762 udata_write32(mem, fArray[i]); 763 } 764 *byteOffset += (1 + fCount) * 4; 765 } 766 767 void 768 BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 769 uint32_t pad = 0; 770 uint32_t dataStart = *byteOffset + sizeof(fLength); 771 772 if (dataStart % BIN_ALIGNMENT) { 773 pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT); 774 udata_writePadding(mem, pad); /* pad == 4 or 8 or 12 */ 775 *byteOffset += pad; 776 } 777 778 udata_write32(mem, fLength); 779 if (fLength > 0) { 780 udata_writeBlock(mem, fData, fLength); 781 } 782 *byteOffset += 4 + fLength; 783 } 784 785 void 786 TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) { 787 writeAllRes(mem, byteOffset); 788 if(fTableType == URES_TABLE) { 789 udata_write16(mem, (uint16_t)fCount); 790 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 791 udata_write16(mem, current->fKey16); 792 } 793 *byteOffset += (1 + fCount)* 2; 794 if ((fCount & 1) == 0) { 795 /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */ 796 udata_writePadding(mem, 2); 797 *byteOffset += 2; 798 } 799 } else /* URES_TABLE32 */ { 800 udata_write32(mem, fCount); 801 for (SResource *current = fFirst; current != NULL; current = current->fNext) { 802 udata_write32(mem, (uint32_t)current->fKey); 803 } 804 *byteOffset += (1 + fCount)* 4; 805 } 806 writeAllRes32(mem, byteOffset); 807 } 808 809 void 810 SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) { 811 if (fWritten) { 812 assert(fRes != RES_BOGUS); 813 return; 814 } 815 handleWrite(mem, byteOffset); 816 uint8_t paddingSize = calcPadding(*byteOffset); 817 if (paddingSize > 0) { 818 udata_writePadding(mem, paddingSize); 819 *byteOffset += paddingSize; 820 } 821 fWritten = TRUE; 822 } 823 824 void 825 SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) { 826 assert(FALSE); 827 } 828 829 void SRBRoot::write(const char *outputDir, const char *outputPkg, 830 char *writtenFilename, int writtenFilenameLen, 831 UErrorCode &errorCode) { 832 UNewDataMemory *mem = NULL; 833 uint32_t byteOffset = 0; 834 uint32_t top, size; 835 char dataName[1024]; 836 int32_t indexes[URES_INDEX_TOP]; 837 838 compactKeys(errorCode); 839 /* 840 * Add padding bytes to fKeys so that fKeysTop is 4-aligned. 841 * Safe because the capacity is a multiple of 4. 842 */ 843 while (fKeysTop & 3) { 844 fKeys[fKeysTop++] = (char)0xaa; 845 } 846 /* 847 * In URES_TABLE, use all local key offsets that fit into 16 bits, 848 * and use the remaining 16-bit offsets for pool key offsets 849 * if there are any. 850 * If there are no local keys, then use the whole 16-bit space 851 * for pool key offsets. 852 * Note: This cannot be changed without changing the major formatVersion. 853 */ 854 if (fKeysBottom < fKeysTop) { 855 if (fKeysTop <= 0x10000) { 856 fLocalKeyLimit = fKeysTop; 857 } else { 858 fLocalKeyLimit = 0x10000; 859 } 860 } else { 861 fLocalKeyLimit = 0; 862 } 863 864 UHashtable *stringSet; 865 if (gFormatVersion > 1) { 866 stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode); 867 if (U_SUCCESS(errorCode) && 868 fUsePoolBundle != NULL && fUsePoolBundle->fStrings != NULL) { 869 for (SResource *current = fUsePoolBundle->fStrings->fFirst; 870 current != NULL; 871 current = current->fNext) { 872 StringResource *sr = static_cast<StringResource *>(current); 873 sr->fNumCopies = 0; 874 sr->fNumUnitsSaved = 0; 875 uhash_put(stringSet, sr, sr, &errorCode); 876 } 877 } 878 fRoot->preflightStrings(this, stringSet, errorCode); 879 } else { 880 stringSet = NULL; 881 } 882 if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) { 883 compactStringsV2(stringSet, errorCode); 884 } 885 uhash_close(stringSet); 886 if (U_FAILURE(errorCode)) { 887 return; 888 } 889 890 int32_t formatVersion = gFormatVersion; 891 if (fPoolStringIndexLimit != 0) { 892 int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit; 893 if ((sum - 1) > RES_MAX_OFFSET) { 894 errorCode = U_BUFFER_OVERFLOW_ERROR; 895 return; 896 } 897 if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) { 898 // 16-bit indexes work for all pool + local strings. 899 fPoolStringIndex16Limit = fPoolStringIndexLimit; 900 } else { 901 // Set the pool index threshold so that 16-bit indexes work 902 // for some pool strings and some local strings. 903 fPoolStringIndex16Limit = (int32_t)( 904 ((int64_t)fPoolStringIndexLimit * 0xffff) / sum); 905 } 906 } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) { 907 // If we just default to formatVersion 3 908 // but there are no pool bundle strings to share 909 // and we do not write a pool bundle, 910 // then write formatVersion 2 which is just as good. 911 formatVersion = 2; 912 } 913 914 fRoot->write16(this); 915 if (f16BitUnits.isBogus()) { 916 errorCode = U_MEMORY_ALLOCATION_ERROR; 917 return; 918 } 919 if (f16BitUnits.length() & 1) { 920 f16BitUnits.append((UChar)0xaaaa); /* pad to multiple of 4 bytes */ 921 } 922 /* all keys have been mapped */ 923 uprv_free(fKeyMap); 924 fKeyMap = NULL; 925 926 byteOffset = fKeysTop + f16BitUnits.length() * 2; 927 fRoot->preWrite(&byteOffset); 928 929 /* total size including the root item */ 930 top = byteOffset; 931 932 if (writtenFilename && writtenFilenameLen) { 933 *writtenFilename = 0; 934 } 935 936 if (writtenFilename) { 937 int32_t off = 0, len = 0; 938 if (outputDir) { 939 len = (int32_t)uprv_strlen(outputDir); 940 if (len > writtenFilenameLen) { 941 len = writtenFilenameLen; 942 } 943 uprv_strncpy(writtenFilename, outputDir, len); 944 } 945 if (writtenFilenameLen -= len) { 946 off += len; 947 writtenFilename[off] = U_FILE_SEP_CHAR; 948 if (--writtenFilenameLen) { 949 ++off; 950 if(outputPkg != NULL) 951 { 952 uprv_strcpy(writtenFilename+off, outputPkg); 953 off += (int32_t)uprv_strlen(outputPkg); 954 writtenFilename[off] = '_'; 955 ++off; 956 } 957 958 len = (int32_t)uprv_strlen(fLocale); 959 if (len > writtenFilenameLen) { 960 len = writtenFilenameLen; 961 } 962 uprv_strncpy(writtenFilename + off, fLocale, len); 963 if (writtenFilenameLen -= len) { 964 off += len; 965 len = 5; 966 if (len > writtenFilenameLen) { 967 len = writtenFilenameLen; 968 } 969 uprv_strncpy(writtenFilename + off, ".res", len); 970 } 971 } 972 } 973 } 974 975 if(outputPkg) 976 { 977 uprv_strcpy(dataName, outputPkg); 978 uprv_strcat(dataName, "_"); 979 uprv_strcat(dataName, fLocale); 980 } 981 else 982 { 983 uprv_strcpy(dataName, fLocale); 984 } 985 986 uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo)); 987 988 mem = udata_create(outputDir, "res", dataName, 989 &dataInfo, (gIncludeCopyright==TRUE)? U_COPYRIGHT_STRING:NULL, &errorCode); 990 if(U_FAILURE(errorCode)){ 991 return; 992 } 993 994 /* write the root item */ 995 udata_write32(mem, fRoot->fRes); 996 997 /* 998 * formatVersion 1.1 (ICU 2.8): 999 * write int32_t indexes[] after root and before the key strings 1000 * to make it easier to parse resource bundles in icuswap or from Java etc. 1001 */ 1002 uprv_memset(indexes, 0, sizeof(indexes)); 1003 indexes[URES_INDEX_LENGTH]= fIndexLength; 1004 indexes[URES_INDEX_KEYS_TOP]= fKeysTop>>2; 1005 indexes[URES_INDEX_RESOURCES_TOP]= (int32_t)(top>>2); 1006 indexes[URES_INDEX_BUNDLE_TOP]= indexes[URES_INDEX_RESOURCES_TOP]; 1007 indexes[URES_INDEX_MAX_TABLE_LENGTH]= fMaxTableLength; 1008 1009 /* 1010 * formatVersion 1.2 (ICU 3.6): 1011 * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set 1012 * the memset() above initialized all indexes[] to 0 1013 */ 1014 if (fNoFallback) { 1015 indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK; 1016 } 1017 /* 1018 * formatVersion 2.0 (ICU 4.4): 1019 * more compact string value storage, optional pool bundle 1020 */ 1021 if (URES_INDEX_16BIT_TOP < fIndexLength) { 1022 indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1); 1023 } 1024 if (URES_INDEX_POOL_CHECKSUM < fIndexLength) { 1025 if (fIsPoolBundle) { 1026 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK; 1027 uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom), 1028 (uint32_t)(fKeysTop - fKeysBottom), 0); 1029 if (f16BitUnits.length() <= 1) { 1030 // no pool strings to checksum 1031 } else if (U_IS_BIG_ENDIAN) { 1032 checksum = computeCRC((const char *)f16BitUnits.getBuffer(), 1033 (uint32_t)f16BitUnits.length() * 2, checksum); 1034 } else { 1035 // Swap to big-endian so we get the same checksum on all platforms 1036 // (except for charset family, due to the key strings). 1037 UnicodeString s(f16BitUnits); 1038 s.append((UChar)1); // Ensure that we own this buffer. 1039 assert(!s.isBogus()); 1040 uint16_t *p = (uint16_t *)s.getBuffer(); 1041 for (int32_t count = f16BitUnits.length(); count > 0; --count) { 1042 uint16_t x = *p; 1043 *p++ = (uint16_t)((x << 8) | (x >> 8)); 1044 } 1045 checksum = computeCRC((const char *)p, 1046 (uint32_t)f16BitUnits.length() * 2, checksum); 1047 } 1048 indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum; 1049 } else if (gUsePoolBundle) { 1050 indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE; 1051 indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum; 1052 } 1053 } 1054 // formatVersion 3 (ICU 56): 1055 // share string values via pool bundle strings 1056 indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8; // bits 23..0 -> 31..8 1057 indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000; // bits 27..24 -> 15..12 1058 indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16; 1059 1060 /* write the indexes[] */ 1061 udata_writeBlock(mem, indexes, fIndexLength*4); 1062 1063 /* write the table key strings */ 1064 udata_writeBlock(mem, fKeys+fKeysBottom, 1065 fKeysTop-fKeysBottom); 1066 1067 /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */ 1068 udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2); 1069 1070 /* write all of the bundle contents: the root item and its children */ 1071 byteOffset = fKeysTop + f16BitUnits.length() * 2; 1072 fRoot->write(mem, &byteOffset); 1073 assert(byteOffset == top); 1074 1075 size = udata_finish(mem, &errorCode); 1076 if(top != size) { 1077 fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n", 1078 (int)size, (int)top); 1079 errorCode = U_INTERNAL_PROGRAM_ERROR; 1080 } 1081 } 1082 1083 /* Opening Functions */ 1084 1085 TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { 1086 LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status); 1087 return U_SUCCESS(*status) ? res.orphan() : NULL; 1088 } 1089 1090 ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { 1091 LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status); 1092 return U_SUCCESS(*status) ? res.orphan() : NULL; 1093 } 1094 1095 struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { 1096 LocalPointer<SResource> res( 1097 new StringResource(bundle, tag, value, len, comment, *status), *status); 1098 return U_SUCCESS(*status) ? res.orphan() : NULL; 1099 } 1100 1101 struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) { 1102 LocalPointer<SResource> res( 1103 new AliasResource(bundle, tag, value, len, comment, *status), *status); 1104 return U_SUCCESS(*status) ? res.orphan() : NULL; 1105 } 1106 1107 IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) { 1108 LocalPointer<IntVectorResource> res( 1109 new IntVectorResource(bundle, tag, comment, *status), *status); 1110 return U_SUCCESS(*status) ? res.orphan() : NULL; 1111 } 1112 1113 struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) { 1114 LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status); 1115 return U_SUCCESS(*status) ? res.orphan() : NULL; 1116 } 1117 1118 struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) { 1119 LocalPointer<SResource> res( 1120 new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status); 1121 return U_SUCCESS(*status) ? res.orphan() : NULL; 1122 } 1123 1124 SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode) 1125 : fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE), 1126 fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle), 1127 fKeys(NULL), fKeyMap(NULL), 1128 fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0), 1129 f16BitUnits(), f16BitStringsLength(0), 1130 fUsePoolBundle(&kNoPoolBundle), 1131 fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0), 1132 fWritePoolBundle(NULL) { 1133 if (U_FAILURE(errorCode)) { 1134 return; 1135 } 1136 1137 if (gFormatVersion > 1) { 1138 // f16BitUnits must start with a zero for empty resources. 1139 // We might be able to omit it if there are no empty 16-bit resources. 1140 f16BitUnits.append((UChar)0); 1141 } 1142 1143 fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE); 1144 if (isPoolBundle) { 1145 fRoot = new PseudoListResource(this, errorCode); 1146 } else { 1147 fRoot = new TableResource(this, NULL, comment, errorCode); 1148 } 1149 if (fKeys == NULL || fRoot == NULL || U_FAILURE(errorCode)) { 1150 if (U_SUCCESS(errorCode)) { 1151 errorCode = U_MEMORY_ALLOCATION_ERROR; 1152 } 1153 return; 1154 } 1155 1156 fKeysCapacity = KEY_SPACE_SIZE; 1157 /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */ 1158 if (gUsePoolBundle || isPoolBundle) { 1159 fIndexLength = URES_INDEX_POOL_CHECKSUM + 1; 1160 } else if (gFormatVersion >= 2) { 1161 fIndexLength = URES_INDEX_16BIT_TOP + 1; 1162 } else /* formatVersion 1 */ { 1163 fIndexLength = URES_INDEX_ATTRIBUTES + 1; 1164 } 1165 fKeysBottom = (1 /* root */ + fIndexLength) * 4; 1166 uprv_memset(fKeys, 0, fKeysBottom); 1167 fKeysTop = fKeysBottom; 1168 1169 if (gFormatVersion == 1) { 1170 fStringsForm = STRINGS_UTF16_V1; 1171 } else { 1172 fStringsForm = STRINGS_UTF16_V2; 1173 } 1174 } 1175 1176 /* Closing Functions */ 1177 1178 void res_close(struct SResource *res) { 1179 delete res; 1180 } 1181 1182 SRBRoot::~SRBRoot() { 1183 delete fRoot; 1184 uprv_free(fLocale); 1185 uprv_free(fKeys); 1186 uprv_free(fKeyMap); 1187 } 1188 1189 /* Misc Functions */ 1190 1191 void SRBRoot::setLocale(UChar *locale, UErrorCode &errorCode) { 1192 if(U_FAILURE(errorCode)) { 1193 return; 1194 } 1195 1196 uprv_free(fLocale); 1197 fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1)); 1198 if(fLocale == NULL) { 1199 errorCode = U_MEMORY_ALLOCATION_ERROR; 1200 return; 1201 } 1202 1203 u_UCharsToChars(locale, fLocale, u_strlen(locale)+1); 1204 } 1205 1206 const char * 1207 SRBRoot::getKeyString(int32_t key) const { 1208 if (key < 0) { 1209 return fUsePoolBundle->fKeys + (key & 0x7fffffff); 1210 } else { 1211 return fKeys + key; 1212 } 1213 } 1214 1215 const char * 1216 SResource::getKeyString(const SRBRoot *bundle) const { 1217 if (fKey == -1) { 1218 return NULL; 1219 } 1220 return bundle->getKeyString(fKey); 1221 } 1222 1223 const char * 1224 SRBRoot::getKeyBytes(int32_t *pLength) const { 1225 *pLength = fKeysTop - fKeysBottom; 1226 return fKeys + fKeysBottom; 1227 } 1228 1229 int32_t 1230 SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) { 1231 int32_t keypos; 1232 1233 if (U_FAILURE(errorCode)) { 1234 return -1; 1235 } 1236 if (length < 0 || (keyBytes == NULL && length != 0)) { 1237 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 1238 return -1; 1239 } 1240 if (length == 0) { 1241 return fKeysTop; 1242 } 1243 1244 keypos = fKeysTop; 1245 fKeysTop += length; 1246 if (fKeysTop >= fKeysCapacity) { 1247 /* overflow - resize the keys buffer */ 1248 fKeysCapacity += KEY_SPACE_SIZE; 1249 fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity)); 1250 if(fKeys == NULL) { 1251 errorCode = U_MEMORY_ALLOCATION_ERROR; 1252 return -1; 1253 } 1254 } 1255 1256 uprv_memcpy(fKeys + keypos, keyBytes, length); 1257 1258 return keypos; 1259 } 1260 1261 int32_t 1262 SRBRoot::addTag(const char *tag, UErrorCode &errorCode) { 1263 int32_t keypos; 1264 1265 if (U_FAILURE(errorCode)) { 1266 return -1; 1267 } 1268 1269 if (tag == NULL) { 1270 /* no error: the root table and array items have no keys */ 1271 return -1; 1272 } 1273 1274 keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode); 1275 if (U_SUCCESS(errorCode)) { 1276 ++fKeysCount; 1277 } 1278 return keypos; 1279 } 1280 1281 static int32_t 1282 compareInt32(int32_t lPos, int32_t rPos) { 1283 /* 1284 * Compare possibly-negative key offsets. Don't just return lPos - rPos 1285 * because that is prone to negative-integer underflows. 1286 */ 1287 if (lPos < rPos) { 1288 return -1; 1289 } else if (lPos > rPos) { 1290 return 1; 1291 } else { 1292 return 0; 1293 } 1294 } 1295 1296 static int32_t U_CALLCONV 1297 compareKeySuffixes(const void *context, const void *l, const void *r) { 1298 const struct SRBRoot *bundle=(const struct SRBRoot *)context; 1299 int32_t lPos = ((const KeyMapEntry *)l)->oldpos; 1300 int32_t rPos = ((const KeyMapEntry *)r)->oldpos; 1301 const char *lStart = bundle->getKeyString(lPos); 1302 const char *lLimit = lStart; 1303 const char *rStart = bundle->getKeyString(rPos); 1304 const char *rLimit = rStart; 1305 int32_t diff; 1306 while (*lLimit != 0) { ++lLimit; } 1307 while (*rLimit != 0) { ++rLimit; } 1308 /* compare keys in reverse character order */ 1309 while (lStart < lLimit && rStart < rLimit) { 1310 diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit; 1311 if (diff != 0) { 1312 return diff; 1313 } 1314 } 1315 /* sort equal suffixes by descending key length */ 1316 diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart); 1317 if (diff != 0) { 1318 return diff; 1319 } 1320 /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */ 1321 return compareInt32(lPos, rPos); 1322 } 1323 1324 static int32_t U_CALLCONV 1325 compareKeyNewpos(const void * /*context*/, const void *l, const void *r) { 1326 return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos); 1327 } 1328 1329 static int32_t U_CALLCONV 1330 compareKeyOldpos(const void * /*context*/, const void *l, const void *r) { 1331 return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos); 1332 } 1333 1334 void 1335 SRBRoot::compactKeys(UErrorCode &errorCode) { 1336 KeyMapEntry *map; 1337 char *keys; 1338 int32_t i; 1339 int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount; 1340 if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) { 1341 return; 1342 } 1343 map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry)); 1344 if (map == NULL) { 1345 errorCode = U_MEMORY_ALLOCATION_ERROR; 1346 return; 1347 } 1348 keys = (char *)fUsePoolBundle->fKeys; 1349 for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) { 1350 map[i].oldpos = 1351 (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000; /* negative oldpos */ 1352 map[i].newpos = 0; 1353 while (*keys != 0) { ++keys; } /* skip the key */ 1354 ++keys; /* skip the NUL */ 1355 } 1356 keys = fKeys + fKeysBottom; 1357 for (; i < keysCount; ++i) { 1358 map[i].oldpos = (int32_t)(keys - fKeys); 1359 map[i].newpos = 0; 1360 while (*keys != 0) { ++keys; } /* skip the key */ 1361 ++keys; /* skip the NUL */ 1362 } 1363 /* Sort the keys so that each one is immediately followed by all of its suffixes. */ 1364 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), 1365 compareKeySuffixes, this, FALSE, &errorCode); 1366 /* 1367 * Make suffixes point into earlier, longer strings that contain them 1368 * and mark the old, now unused suffix bytes as deleted. 1369 */ 1370 if (U_SUCCESS(errorCode)) { 1371 keys = fKeys; 1372 for (i = 0; i < keysCount;) { 1373 /* 1374 * This key is not a suffix of the previous one; 1375 * keep this one and delete the following ones that are 1376 * suffixes of this one. 1377 */ 1378 const char *key; 1379 const char *keyLimit; 1380 int32_t j = i + 1; 1381 map[i].newpos = map[i].oldpos; 1382 if (j < keysCount && map[j].oldpos < 0) { 1383 /* Key string from the pool bundle, do not delete. */ 1384 i = j; 1385 continue; 1386 } 1387 key = getKeyString(map[i].oldpos); 1388 for (keyLimit = key; *keyLimit != 0; ++keyLimit) {} 1389 for (; j < keysCount && map[j].oldpos >= 0; ++j) { 1390 const char *k; 1391 char *suffix; 1392 const char *suffixLimit; 1393 int32_t offset; 1394 suffix = keys + map[j].oldpos; 1395 for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {} 1396 offset = (int32_t)(keyLimit - key) - (suffixLimit - suffix); 1397 if (offset < 0) { 1398 break; /* suffix cannot be longer than the original */ 1399 } 1400 /* Is it a suffix of the earlier, longer key? */ 1401 for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {} 1402 if (suffix == suffixLimit && *k == *suffixLimit) { 1403 map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */ 1404 /* mark the suffix as deleted */ 1405 while (*suffix != 0) { *suffix++ = 1; } 1406 *suffix = 1; 1407 } else { 1408 break; /* not a suffix, restart from here */ 1409 } 1410 } 1411 i = j; 1412 } 1413 /* 1414 * Re-sort by newpos, then modify the key characters array in-place 1415 * to squeeze out unused bytes, and readjust the newpos offsets. 1416 */ 1417 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), 1418 compareKeyNewpos, NULL, FALSE, &errorCode); 1419 if (U_SUCCESS(errorCode)) { 1420 int32_t oldpos, newpos, limit; 1421 oldpos = newpos = fKeysBottom; 1422 limit = fKeysTop; 1423 /* skip key offsets that point into the pool bundle rather than this new bundle */ 1424 for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {} 1425 if (i < keysCount) { 1426 while (oldpos < limit) { 1427 if (keys[oldpos] == 1) { 1428 ++oldpos; /* skip unused bytes */ 1429 } else { 1430 /* adjust the new offsets for keys starting here */ 1431 while (i < keysCount && map[i].newpos == oldpos) { 1432 map[i++].newpos = newpos; 1433 } 1434 /* move the key characters to their new position */ 1435 keys[newpos++] = keys[oldpos++]; 1436 } 1437 } 1438 assert(i == keysCount); 1439 } 1440 fKeysTop = newpos; 1441 /* Re-sort once more, by old offsets for binary searching. */ 1442 uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry), 1443 compareKeyOldpos, NULL, FALSE, &errorCode); 1444 if (U_SUCCESS(errorCode)) { 1445 /* key size reduction by limit - newpos */ 1446 fKeyMap = map; 1447 map = NULL; 1448 } 1449 } 1450 } 1451 uprv_free(map); 1452 } 1453 1454 static int32_t U_CALLCONV 1455 compareStringSuffixes(const void * /*context*/, const void *l, const void *r) { 1456 const StringResource *left = *((const StringResource **)l); 1457 const StringResource *right = *((const StringResource **)r); 1458 const UChar *lStart = left->getBuffer(); 1459 const UChar *lLimit = lStart + left->length(); 1460 const UChar *rStart = right->getBuffer(); 1461 const UChar *rLimit = rStart + right->length(); 1462 int32_t diff; 1463 /* compare keys in reverse character order */ 1464 while (lStart < lLimit && rStart < rLimit) { 1465 diff = (int32_t)*--lLimit - (int32_t)*--rLimit; 1466 if (diff != 0) { 1467 return diff; 1468 } 1469 } 1470 /* sort equal suffixes by descending string length */ 1471 return right->length() - left->length(); 1472 } 1473 1474 static int32_t U_CALLCONV 1475 compareStringLengths(const void * /*context*/, const void *l, const void *r) { 1476 const StringResource *left = *((const StringResource **)l); 1477 const StringResource *right = *((const StringResource **)r); 1478 int32_t diff; 1479 /* Make "is suffix of another string" compare greater than a non-suffix. */ 1480 diff = (int)(left->fSame != NULL) - (int)(right->fSame != NULL); 1481 if (diff != 0) { 1482 return diff; 1483 } 1484 /* sort by ascending string length */ 1485 diff = left->length() - right->length(); 1486 if (diff != 0) { 1487 return diff; 1488 } 1489 // sort by descending size reduction 1490 diff = right->fNumUnitsSaved - left->fNumUnitsSaved; 1491 if (diff != 0) { 1492 return diff; 1493 } 1494 // sort lexically 1495 return left->fString.compare(right->fString); 1496 } 1497 1498 void 1499 StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) { 1500 int32_t len = length(); 1501 fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length()); 1502 fWritten = TRUE; 1503 switch(fNumCharsForLength) { 1504 case 0: 1505 break; 1506 case 1: 1507 dest.append((UChar)(0xdc00 + len)); 1508 break; 1509 case 2: 1510 dest.append((UChar)(0xdfef + (len >> 16))); 1511 dest.append((UChar)len); 1512 break; 1513 case 3: 1514 dest.append((UChar)0xdfff); 1515 dest.append((UChar)(len >> 16)); 1516 dest.append((UChar)len); 1517 break; 1518 default: 1519 break; /* will not occur */ 1520 } 1521 dest.append(fString); 1522 dest.append((UChar)0); 1523 } 1524 1525 void 1526 SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) { 1527 if (U_FAILURE(errorCode)) { 1528 return; 1529 } 1530 // Store the StringResource pointers in an array for 1531 // easy sorting and processing. 1532 // We enumerate a set of strings, so there are no duplicates. 1533 int32_t count = uhash_count(stringSet); 1534 LocalArray<StringResource *> array(new StringResource *[count], errorCode); 1535 if (U_FAILURE(errorCode)) { 1536 return; 1537 } 1538 for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) { 1539 array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer; 1540 } 1541 /* Sort the strings so that each one is immediately followed by all of its suffixes. */ 1542 uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **), 1543 compareStringSuffixes, NULL, FALSE, &errorCode); 1544 if (U_FAILURE(errorCode)) { 1545 return; 1546 } 1547 /* 1548 * Make suffixes point into earlier, longer strings that contain them. 1549 * Temporarily use fSame and fSuffixOffset for suffix strings to 1550 * refer to the remaining ones. 1551 */ 1552 for (int32_t i = 0; i < count;) { 1553 /* 1554 * This string is not a suffix of the previous one; 1555 * write this one and subsume the following ones that are 1556 * suffixes of this one. 1557 */ 1558 StringResource *res = array[i]; 1559 res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength(); 1560 // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit, 1561 // see StringResource::handlePreflightStrings(). 1562 int32_t j; 1563 for (j = i + 1; j < count; ++j) { 1564 StringResource *suffixRes = array[j]; 1565 /* Is it a suffix of the earlier, longer string? */ 1566 if (res->fString.endsWith(suffixRes->fString)) { 1567 assert(res->length() != suffixRes->length()); // Set strings are unique. 1568 if (suffixRes->fWritten) { 1569 // Pool string, skip. 1570 } else if (suffixRes->fNumCharsForLength == 0) { 1571 /* yes, point to the earlier string */ 1572 suffixRes->fSame = res; 1573 suffixRes->fSuffixOffset = res->length() - suffixRes->length(); 1574 if (res->fWritten) { 1575 // Suffix-share res which is a pool string. 1576 // Compute the resource word and collect the maximum. 1577 suffixRes->fRes = 1578 res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset; 1579 int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes); 1580 if (poolStringIndex >= fPoolStringIndexLimit) { 1581 fPoolStringIndexLimit = poolStringIndex + 1; 1582 } 1583 suffixRes->fWritten = TRUE; 1584 } 1585 res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength(); 1586 } else { 1587 /* write the suffix by itself if we need explicit length */ 1588 } 1589 } else { 1590 break; /* not a suffix, restart from here */ 1591 } 1592 } 1593 i = j; 1594 } 1595 /* 1596 * Re-sort the strings by ascending length (except suffixes last) 1597 * to optimize for URES_TABLE16 and URES_ARRAY16: 1598 * Keep as many as possible within reach of 16-bit offsets. 1599 */ 1600 uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **), 1601 compareStringLengths, NULL, FALSE, &errorCode); 1602 if (U_FAILURE(errorCode)) { 1603 return; 1604 } 1605 if (fIsPoolBundle) { 1606 // Write strings that are sufficiently shared. 1607 // Avoid writing other strings. 1608 int32_t numStringsWritten = 0; 1609 int32_t numUnitsSaved = 0; 1610 int32_t numUnitsNotSaved = 0; 1611 for (int32_t i = 0; i < count; ++i) { 1612 StringResource *res = array[i]; 1613 // Maximum pool string index when suffix-sharing the last character. 1614 int32_t maxStringIndex = 1615 f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1; 1616 if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING && 1617 maxStringIndex < RES_MAX_OFFSET) { 1618 res->writeUTF16v2(0, f16BitUnits); 1619 ++numStringsWritten; 1620 numUnitsSaved += res->fNumUnitsSaved; 1621 } else { 1622 numUnitsNotSaved += res->fNumUnitsSaved; 1623 res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING); 1624 res->fWritten = TRUE; 1625 } 1626 } 1627 if (f16BitUnits.isBogus()) { 1628 errorCode = U_MEMORY_ALLOCATION_ERROR; 1629 } 1630 if (getShowWarning()) { // not quiet 1631 printf("number of shared strings: %d\n", (int)numStringsWritten); 1632 printf("16-bit units for strings: %6d = %6d bytes\n", 1633 (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2); 1634 printf("16-bit units saved: %6d = %6d bytes\n", 1635 (int)numUnitsSaved, (int)numUnitsSaved * 2); 1636 printf("16-bit units not saved: %6d = %6d bytes\n", 1637 (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2); 1638 } 1639 } else { 1640 assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit); 1641 /* Write the non-suffix strings. */ 1642 int32_t i; 1643 for (i = 0; i < count && array[i]->fSame == NULL; ++i) { 1644 StringResource *res = array[i]; 1645 if (!res->fWritten) { 1646 int32_t localStringIndex = f16BitUnits.length(); 1647 if (localStringIndex >= fLocalStringIndexLimit) { 1648 fLocalStringIndexLimit = localStringIndex + 1; 1649 } 1650 res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits); 1651 } 1652 } 1653 if (f16BitUnits.isBogus()) { 1654 errorCode = U_MEMORY_ALLOCATION_ERROR; 1655 return; 1656 } 1657 if (fWritePoolBundle != NULL && gFormatVersion >= 3) { 1658 PseudoListResource *poolStrings = 1659 static_cast<PseudoListResource *>(fWritePoolBundle->fRoot); 1660 for (i = 0; i < count && array[i]->fSame == NULL; ++i) { 1661 assert(!array[i]->fString.isEmpty()); 1662 StringResource *poolString = 1663 new StringResource(fWritePoolBundle, array[i]->fString, errorCode); 1664 if (poolString == NULL) { 1665 errorCode = U_MEMORY_ALLOCATION_ERROR; 1666 break; 1667 } 1668 poolStrings->add(poolString); 1669 } 1670 } 1671 /* Write the suffix strings. Make each point to the real string. */ 1672 for (; i < count; ++i) { 1673 StringResource *res = array[i]; 1674 if (res->fWritten) { 1675 continue; 1676 } 1677 StringResource *same = res->fSame; 1678 assert(res->length() != same->length()); // Set strings are unique. 1679 res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset; 1680 int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit; 1681 // Suffixes of pool strings have been set already. 1682 assert(localStringIndex >= 0); 1683 if (localStringIndex >= fLocalStringIndexLimit) { 1684 fLocalStringIndexLimit = localStringIndex + 1; 1685 } 1686 res->fWritten = TRUE; 1687 } 1688 } 1689 // +1 to account for the initial zero in f16BitUnits 1690 assert(f16BitUnits.length() <= (f16BitStringsLength + 1)); 1691 } 1692