1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: pkgitems.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2005sep18 16 * created by: Markus W. Scherer 17 * 18 * Companion file to package.cpp. Deals with details of ICU data item formats. 19 * Used for item dependencies. 20 * Contains adapted code from ucnv_bld.c (swapper code from 2003). 21 */ 22 23 #include "unicode/utypes.h" 24 #include "unicode/ures.h" 25 #include "unicode/putil.h" 26 #include "unicode/udata.h" 27 #include "cstring.h" 28 #include "uinvchar.h" 29 #include "ucmndata.h" 30 #include "udataswp.h" 31 #include "swapimpl.h" 32 #include "toolutil.h" 33 #include "package.h" 34 #include "pkg_imp.h" 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 /* item formats in common */ 41 42 #include "uresdata.h" 43 #include "ucnv_bld.h" 44 #include "ucnv_io.h" 45 46 // general definitions ----------------------------------------------------- *** 47 48 U_CDECL_BEGIN 49 50 static void U_CALLCONV 51 printError(void *context, const char *fmt, va_list args) { 52 vfprintf((FILE *)context, fmt, args); 53 } 54 55 U_CDECL_END 56 57 // a data item in native-platform form ------------------------------------- *** 58 59 U_NAMESPACE_BEGIN 60 61 class NativeItem { 62 public: 63 NativeItem() : pItem(NULL), pInfo(NULL), bytes(NULL), swapped(NULL), length(0) {} 64 NativeItem(const Item *item, UDataSwapFn *swap) : swapped(NULL) { 65 setItem(item, swap); 66 } 67 ~NativeItem() { 68 delete [] swapped; 69 } 70 const UDataInfo *getDataInfo() const { 71 return pInfo; 72 } 73 const uint8_t *getBytes() const { 74 return bytes; 75 } 76 int32_t getLength() const { 77 return length; 78 } 79 80 void setItem(const Item *item, UDataSwapFn *swap) { 81 pItem=item; 82 int32_t infoLength, itemHeaderLength; 83 UErrorCode errorCode=U_ZERO_ERROR; 84 pInfo=::getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode); 85 if(U_FAILURE(errorCode)) { 86 exit(errorCode); // should succeed because readFile() checks headers 87 } 88 length=pItem->length-itemHeaderLength; 89 90 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY) { 91 bytes=pItem->data+itemHeaderLength; 92 } else { 93 UDataSwapper *ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 94 if(U_FAILURE(errorCode)) { 95 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 96 pItem->name, u_errorName(errorCode)); 97 exit(errorCode); 98 } 99 100 ds->printError=printError; 101 ds->printErrorContext=stderr; 102 103 swapped=new uint8_t[pItem->length]; 104 if(swapped==NULL) { 105 fprintf(stderr, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem->name); 106 exit(U_MEMORY_ALLOCATION_ERROR); 107 } 108 swap(ds, pItem->data, pItem->length, swapped, &errorCode); 109 pInfo=::getDataInfo(swapped, pItem->length, infoLength, itemHeaderLength, &errorCode); 110 bytes=swapped+itemHeaderLength; 111 udata_closeSwapper(ds); 112 } 113 } 114 115 private: 116 const Item *pItem; 117 const UDataInfo *pInfo; 118 const uint8_t *bytes; 119 uint8_t *swapped; 120 int32_t length; 121 }; 122 123 // check a dependency ------------------------------------------------------ *** 124 125 /* 126 * assemble the target item name from the source item name, an ID 127 * and a suffix 128 */ 129 static void 130 makeTargetName(const char *itemName, const char *id, int32_t idLength, const char *suffix, 131 char *target, int32_t capacity, 132 UErrorCode *pErrorCode) { 133 const char *itemID; 134 int32_t treeLength, suffixLength, targetLength; 135 136 // get the item basename 137 itemID=strrchr(itemName, '/'); 138 if(itemID!=NULL) { 139 ++itemID; 140 } else { 141 itemID=itemName; 142 } 143 144 // build the target string 145 treeLength=(int32_t)(itemID-itemName); 146 if(idLength<0) { 147 idLength=(int32_t)strlen(id); 148 } 149 suffixLength=(int32_t)strlen(suffix); 150 targetLength=treeLength+idLength+suffixLength; 151 if(targetLength>=capacity) { 152 fprintf(stderr, "icupkg/makeTargetName(%s) target item name length %ld too long\n", 153 itemName, (long)targetLength); 154 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 155 return; 156 } 157 158 memcpy(target, itemName, treeLength); 159 memcpy(target+treeLength, id, idLength); 160 memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL 161 } 162 163 static void 164 checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix, 165 CheckDependency check, void *context, 166 UErrorCode *pErrorCode) { 167 char target[200]; 168 makeTargetName(itemName, id, idLength, suffix, target, (int32_t)sizeof(target), pErrorCode); 169 if(U_SUCCESS(*pErrorCode)) { 170 check(context, itemName, target); 171 } 172 } 173 174 /* assemble the target item name from the item's parent item name */ 175 static void 176 checkParent(const char *itemName, CheckDependency check, void *context, 177 UErrorCode *pErrorCode) { 178 const char *itemID, *parent, *parentLimit, *suffix; 179 int32_t parentLength; 180 181 // get the item basename 182 itemID=strrchr(itemName, '/'); 183 if(itemID!=NULL) { 184 ++itemID; 185 } else { 186 itemID=itemName; 187 } 188 189 // get the item suffix 190 suffix=strrchr(itemID, '.'); 191 if(suffix==NULL) { 192 // empty suffix, point to the end of the string 193 suffix=strrchr(itemID, 0); 194 } 195 196 // get the position of the last '_' 197 for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {} 198 199 if(parentLimit!=itemID) { 200 // get the parent item name by truncating the last part of this item's name */ 201 parent=itemID; 202 parentLength=(int32_t)(parentLimit-itemID); 203 } else { 204 // no '_' in the item name: the parent is the root bundle 205 parent="root"; 206 parentLength=4; 207 if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) { 208 // the item itself is "root", which does not depend on a parent 209 return; 210 } 211 } 212 checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode); 213 } 214 215 // get dependencies from resource bundles ---------------------------------- *** 216 217 static const UChar SLASH=0x2f; 218 219 /* 220 * Check for the alias from the string or alias resource res. 221 */ 222 static void 223 checkAlias(const char *itemName, 224 Resource res, const UChar *alias, int32_t length, UBool useResSuffix, 225 CheckDependency check, void *context, UErrorCode *pErrorCode) { 226 int32_t i; 227 228 if(!uprv_isInvariantUString(alias, length)) { 229 fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n", 230 itemName, res); 231 *pErrorCode=U_INVALID_CHAR_FOUND; 232 return; 233 } 234 235 // extract the locale ID from alias strings like 236 // locale_ID/key1/key2/key3 237 // locale_ID 238 239 // search for the first slash 240 for(i=0; i<length && alias[i]!=SLASH; ++i) {} 241 242 if(res_getPublicType(res)==URES_ALIAS) { 243 // ignore aliases with an initial slash: 244 // /ICUDATA/... and /pkgname/... go to a different package 245 // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle 246 if(i==0) { 247 return; // initial slash ('/') 248 } 249 250 // ignore the intra-bundle path starting from the first slash ('/') 251 length=i; 252 } else /* URES_STRING */ { 253 // the whole string should only consist of a locale ID 254 if(i!=length) { 255 fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n", 256 itemName, res); 257 *pErrorCode=U_UNSUPPORTED_ERROR; 258 return; 259 } 260 } 261 262 // convert the Unicode string to char * 263 char localeID[32]; 264 if(length>=(int32_t)sizeof(localeID)) { 265 fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n", 266 itemName, res, (long)length); 267 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 268 return; 269 } 270 u_UCharsToChars(alias, localeID, length); 271 localeID[length]=0; 272 273 checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode); 274 } 275 276 /* 277 * Enumerate one resource item and its children and extract dependencies from 278 * aliases. 279 */ 280 static void 281 ures_enumDependencies(const char *itemName, 282 const ResourceData *pResData, 283 Resource res, const char *inKey, const char *parentKey, int32_t depth, 284 CheckDependency check, void *context, 285 Package *pkg, 286 UErrorCode *pErrorCode) { 287 switch(res_getPublicType(res)) { 288 case URES_STRING: 289 { 290 UBool useResSuffix = TRUE; 291 // Check for %%ALIAS 292 if(depth==1 && inKey!=NULL) { 293 if(0!=strcmp(inKey, "%%ALIAS")) { 294 break; 295 } 296 } 297 // Check for %%DEPENDENCY 298 else if(depth==2 && parentKey!=NULL) { 299 if(0!=strcmp(parentKey, "%%DEPENDENCY")) { 300 break; 301 } 302 useResSuffix = FALSE; 303 } else { 304 // we ignore all other strings 305 break; 306 } 307 int32_t length; 308 const UChar *alias=res_getString(pResData, res, &length); 309 checkAlias(itemName, res, alias, length, useResSuffix, check, context, pErrorCode); 310 } 311 break; 312 case URES_ALIAS: 313 { 314 int32_t length; 315 const UChar *alias=res_getAlias(pResData, res, &length); 316 checkAlias(itemName, res, alias, length, TRUE, check, context, pErrorCode); 317 } 318 break; 319 case URES_TABLE: 320 { 321 /* recurse */ 322 int32_t count=res_countArrayItems(pResData, res); 323 for(int32_t i=0; i<count; ++i) { 324 const char *itemKey; 325 Resource item=res_getTableItemByIndex(pResData, res, i, &itemKey); 326 ures_enumDependencies( 327 itemName, pResData, 328 item, itemKey, 329 inKey, depth+1, 330 check, context, 331 pkg, 332 pErrorCode); 333 if(U_FAILURE(*pErrorCode)) { 334 fprintf(stderr, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n", 335 itemName, res, i, itemKey, item); 336 break; 337 } 338 } 339 } 340 break; 341 case URES_ARRAY: 342 { 343 /* recurse */ 344 int32_t count=res_countArrayItems(pResData, res); 345 for(int32_t i=0; i<count; ++i) { 346 Resource item=res_getArrayItem(pResData, res, i); 347 ures_enumDependencies( 348 itemName, pResData, 349 item, NULL, 350 inKey, depth+1, 351 check, context, 352 pkg, 353 pErrorCode); 354 if(U_FAILURE(*pErrorCode)) { 355 fprintf(stderr, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n", 356 itemName, res, i, item); 357 break; 358 } 359 } 360 } 361 break; 362 default: 363 break; 364 } 365 } 366 367 static void 368 ures_enumDependencies(const char *itemName, const UDataInfo *pInfo, 369 const uint8_t *inBytes, int32_t length, 370 CheckDependency check, void *context, 371 Package *pkg, 372 UErrorCode *pErrorCode) { 373 ResourceData resData; 374 375 res_read(&resData, pInfo, inBytes, length, pErrorCode); 376 if(U_FAILURE(*pErrorCode)) { 377 fprintf(stderr, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n", 378 pInfo->formatVersion[0], pInfo->formatVersion[1]); 379 exit(U_UNSUPPORTED_ERROR); 380 } 381 382 /* 383 * if the bundle attributes are present and the nofallback flag is not set, 384 * then add the parent bundle as a dependency 385 */ 386 if(pInfo->formatVersion[0]>1 || (pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1)) { 387 if(!resData.noFallback) { 388 /* this bundle participates in locale fallback */ 389 checkParent(itemName, check, context, pErrorCode); 390 } 391 } 392 393 icu::NativeItem nativePool; 394 395 if(resData.usesPoolBundle) { 396 char poolName[200]; 397 makeTargetName(itemName, "pool", 4, ".res", poolName, (int32_t)sizeof(poolName), pErrorCode); 398 if(U_FAILURE(*pErrorCode)) { 399 return; 400 } 401 check(context, itemName, poolName); 402 int32_t index=pkg->findItem(poolName); 403 if(index<0) { 404 // We cannot work with a bundle if its pool resource is missing. 405 // check() already printed a complaint. 406 return; 407 } 408 // TODO: Cache the native version in the Item itself. 409 nativePool.setItem(pkg->getItem(index), ures_swap); 410 const UDataInfo *poolInfo=nativePool.getDataInfo(); 411 if(poolInfo->formatVersion[0]<=1) { 412 fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName); 413 return; 414 } 415 const int32_t *poolRoot=(const int32_t *)nativePool.getBytes(); 416 const int32_t *poolIndexes=poolRoot+1; 417 int32_t poolIndexLength=poolIndexes[URES_INDEX_LENGTH]&0xff; 418 if(!(poolIndexLength>URES_INDEX_POOL_CHECKSUM && 419 (poolIndexes[URES_INDEX_ATTRIBUTES]&URES_ATT_IS_POOL_BUNDLE)) 420 ) { 421 fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName); 422 return; 423 } 424 if(resData.pRoot[1+URES_INDEX_POOL_CHECKSUM]==poolIndexes[URES_INDEX_POOL_CHECKSUM]) { 425 resData.poolBundleKeys=(const char *)(poolIndexes+poolIndexLength); 426 resData.poolBundleStrings=(const uint16_t *)(poolRoot+poolIndexes[URES_INDEX_KEYS_TOP]); 427 } else { 428 fprintf(stderr, "icupkg: %s has mismatched checksum for %s\n", poolName, itemName); 429 return; 430 } 431 } 432 433 ures_enumDependencies( 434 itemName, &resData, 435 resData.rootRes, NULL, NULL, 0, 436 check, context, 437 pkg, 438 pErrorCode); 439 } 440 441 // get dependencies from conversion tables --------------------------------- *** 442 443 /* code adapted from ucnv_swap() */ 444 static void 445 ucnv_enumDependencies(const UDataSwapper *ds, 446 const char *itemName, const UDataInfo *pInfo, 447 const uint8_t *inBytes, int32_t length, 448 CheckDependency check, void *context, 449 UErrorCode *pErrorCode) { 450 uint32_t staticDataSize; 451 452 const UConverterStaticData *inStaticData; 453 454 const _MBCSHeader *inMBCSHeader; 455 uint8_t outputType; 456 457 /* check format version */ 458 if(!( 459 pInfo->formatVersion[0]==6 && 460 pInfo->formatVersion[1]>=2 461 )) { 462 fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n", 463 pInfo->formatVersion[0], pInfo->formatVersion[1]); 464 exit(U_UNSUPPORTED_ERROR); 465 } 466 467 /* read the initial UConverterStaticData structure after the UDataInfo header */ 468 inStaticData=(const UConverterStaticData *)inBytes; 469 470 if( length<(int32_t)sizeof(UConverterStaticData) || 471 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) 472 ) { 473 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n", 474 length); 475 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 476 return; 477 } 478 479 inBytes+=staticDataSize; 480 length-=(int32_t)staticDataSize; 481 482 /* check for supported conversionType values */ 483 if(inStaticData->conversionType==UCNV_MBCS) { 484 /* MBCS data */ 485 uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions; 486 int32_t extOffset; 487 488 inMBCSHeader=(const _MBCSHeader *)inBytes; 489 490 if(length<(int32_t)sizeof(_MBCSHeader)) { 491 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 492 length); 493 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 494 return; 495 } 496 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { 497 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; 498 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && 499 ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))& 500 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 501 ) { 502 mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK; 503 } else { 504 udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n", 505 inMBCSHeader->version[0], inMBCSHeader->version[1]); 506 *pErrorCode=U_UNSUPPORTED_ERROR; 507 return; 508 } 509 510 mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags); 511 extOffset=(int32_t)(mbcsHeaderFlags>>8); 512 outputType=(uint8_t)mbcsHeaderFlags; 513 514 if(outputType==MBCS_OUTPUT_EXT_ONLY) { 515 /* 516 * extension-only file, 517 * contains a base name instead of normal base table data 518 */ 519 char baseName[32]; 520 int32_t baseNameLength; 521 522 /* there is extension data after the base data, see ucnv_ext.h */ 523 if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { 524 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", 525 length); 526 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 527 return; 528 } 529 530 /* swap the base name, between the header and the extension data */ 531 const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4; 532 baseNameLength=(int32_t)strlen(inBaseName); 533 if(baseNameLength>=(int32_t)sizeof(baseName)) { 534 udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n", 535 itemName, baseNameLength); 536 *pErrorCode=U_UNSUPPORTED_ERROR; 537 return; 538 } 539 ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode); 540 541 checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode); 542 } 543 } 544 } 545 546 // ICU data formats -------------------------------------------------------- *** 547 548 static const struct { 549 uint8_t dataFormat[4]; 550 } dataFormats[]={ 551 { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */ 552 { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */ 553 { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */ 554 }; 555 556 enum { 557 FMT_RES, 558 FMT_CNV, 559 FMT_ALIAS, 560 FMT_COUNT 561 }; 562 563 static int32_t 564 getDataFormat(const uint8_t dataFormat[4]) { 565 int32_t i; 566 567 for(i=0; i<FMT_COUNT; ++i) { 568 if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) { 569 return i; 570 } 571 } 572 return -1; 573 } 574 575 // enumerate dependencies of a package item -------------------------------- *** 576 577 void 578 Package::enumDependencies(Item *pItem, void *context, CheckDependency check) { 579 int32_t infoLength, itemHeaderLength; 580 UErrorCode errorCode=U_ZERO_ERROR; 581 const UDataInfo *pInfo=getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode); 582 if(U_FAILURE(errorCode)) { 583 return; // should not occur because readFile() checks headers 584 } 585 586 // find the data format and call the corresponding function, if any 587 int32_t format=getDataFormat(pInfo->dataFormat); 588 if(format>=0) { 589 switch(format) { 590 case FMT_RES: 591 { 592 /* 593 * Swap the resource bundle (if necessary) so that we can use 594 * the normal runtime uresdata.c code to read it. 595 * We do not want to duplicate that code, especially not together with on-the-fly swapping. 596 */ 597 NativeItem nrb(pItem, ures_swap); 598 ures_enumDependencies(pItem->name, nrb.getDataInfo(), nrb.getBytes(), nrb.getLength(), check, context, this, &errorCode); 599 break; 600 } 601 case FMT_CNV: 602 { 603 // TODO: share/cache swappers 604 UDataSwapper *ds=udata_openSwapper( 605 (UBool)pInfo->isBigEndian, pInfo->charsetFamily, 606 U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, 607 &errorCode); 608 if(U_FAILURE(errorCode)) { 609 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 610 pItem->name, u_errorName(errorCode)); 611 exit(errorCode); 612 } 613 614 ds->printError=printError; 615 ds->printErrorContext=stderr; 616 617 const uint8_t *inBytes=pItem->data+itemHeaderLength; 618 int32_t length=pItem->length-itemHeaderLength; 619 620 ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode); 621 udata_closeSwapper(ds); 622 break; 623 } 624 default: 625 break; 626 } 627 628 if(U_FAILURE(errorCode)) { 629 exit(errorCode); 630 } 631 } 632 } 633 634 U_NAMESPACE_END 635