1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: pkgitems.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2005sep18 14 * created by: Markus W. Scherer 15 * 16 * Companion file to package.cpp. Deals with details of ICU data item formats. 17 * Used for item dependencies. 18 * Contains adapted code from uresdata.c and ucnv_bld.c (swapper code from 2003). 19 */ 20 21 #include "unicode/utypes.h" 22 #include "unicode/ures.h" 23 #include "unicode/putil.h" 24 #include "unicode/udata.h" 25 #include "cstring.h" 26 #include "ucmndata.h" 27 #include "udataswp.h" 28 #include "swapimpl.h" 29 #include "toolutil.h" 30 #include "package.h" 31 #include "pkg_imp.h" 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 37 /* item formats in common */ 38 39 #include "uresdata.h" 40 #include "ucnv_bld.h" 41 #include "ucnv_io.h" 42 43 // general definitions ----------------------------------------------------- *** 44 45 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 46 47 U_CDECL_BEGIN 48 49 static void U_CALLCONV 50 printError(void *context, const char *fmt, va_list args) { 51 vfprintf((FILE *)context, fmt, args); 52 } 53 54 U_CDECL_END 55 56 // check a dependency ------------------------------------------------------ *** 57 58 /* 59 * assemble the target item name from the source item name, an ID 60 * and a suffix 61 */ 62 static void 63 checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix, 64 CheckDependency check, void *context, 65 UErrorCode *pErrorCode) { 66 char target[200]; 67 const char *itemID; 68 int32_t treeLength, suffixLength, targetLength; 69 70 // get the item basename 71 itemID=strrchr(itemName, '/'); 72 if(itemID!=NULL) { 73 ++itemID; 74 } else { 75 itemID=itemName; 76 } 77 78 // build the target string 79 treeLength=(int32_t)(itemID-itemName); 80 if(idLength<0) { 81 idLength=(int32_t)strlen(id); 82 } 83 suffixLength=(int32_t)strlen(suffix); 84 targetLength=treeLength+idLength+suffixLength; 85 if(targetLength>=(int32_t)sizeof(target)) { 86 fprintf(stderr, "icupkg/checkIDSuffix(%s) alias target item name length %ld too long\n", 87 itemName, (long)targetLength); 88 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 89 return; 90 } 91 92 memcpy(target, itemName, treeLength); 93 memcpy(target+treeLength, id, idLength); 94 memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL 95 96 check(context, itemName, target); 97 } 98 99 /* assemble the target item name from the item's parent item name */ 100 static void 101 checkParent(const char *itemName, CheckDependency check, void *context, 102 UErrorCode *pErrorCode) { 103 const char *itemID, *parent, *parentLimit, *suffix; 104 int32_t parentLength; 105 106 // get the item basename 107 itemID=strrchr(itemName, '/'); 108 if(itemID!=NULL) { 109 ++itemID; 110 } else { 111 itemID=itemName; 112 } 113 114 // get the item suffix 115 suffix=strrchr(itemID, '.'); 116 if(suffix==NULL) { 117 // empty suffix, point to the end of the string 118 suffix=strrchr(itemID, 0); 119 } 120 121 // get the position of the last '_' 122 for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {} 123 124 if(parentLimit!=itemID) { 125 // get the parent item name by truncating the last part of this item's name */ 126 parent=itemID; 127 parentLength=(int32_t)(parentLimit-itemID); 128 } else { 129 // no '_' in the item name: the parent is the root bundle 130 parent="root"; 131 parentLength=4; 132 if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) { 133 // the item itself is "root", which does not depend on a parent 134 return; 135 } 136 } 137 checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode); 138 } 139 140 // get dependencies from resource bundles ---------------------------------- *** 141 142 static const char gAliasKey[]="%%ALIAS"; 143 static const char gDependencyKey[]="%%DEPENDENCY"; 144 enum { gAliasKeyLength=7, gDependencyKeyLength=12 }; 145 146 /* 147 * Enumerate one resource item and its children and extract dependencies from 148 * aliases. 149 * Code adapted from ures_preflightResource() and ures_swapResource(). 150 */ 151 static void 152 ures_enumDependencies(const UDataSwapper *ds, 153 const char *itemName, 154 const Resource *inBundle, int32_t length, 155 Resource res, const char *inKey, const char *parentKey, int32_t depth, 156 CheckDependency check, void *context, 157 UErrorCode *pErrorCode) { 158 const Resource *p; 159 int32_t offset; 160 UBool useResSuffix = TRUE; 161 162 if(res==0 || RES_GET_TYPE(res)==URES_INT) { 163 /* empty string or integer, nothing to do */ 164 return; 165 } 166 167 /* all other types use an offset to point to their data */ 168 offset=(int32_t)RES_GET_OFFSET(res); 169 if(0<=length && length<=offset) { 170 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource offset exceeds bundle length %d\n", 171 itemName, res, length); 172 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 173 return; 174 } 175 p=inBundle+offset; 176 177 switch(RES_GET_TYPE(res)) { 178 /* strings and aliases have physically the same value layout */ 179 case URES_STRING: 180 // Check for %%ALIAS 181 if(depth==1 && inKey!=NULL) { 182 char key[gAliasKeyLength+1]; 183 int32_t keyLength; 184 185 keyLength=(int32_t)strlen(inKey); 186 if(keyLength!=gAliasKeyLength) { 187 break; 188 } 189 ds->swapInvChars(ds, inKey, gAliasKeyLength+1, key, pErrorCode); 190 if(U_FAILURE(*pErrorCode)) { 191 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n", 192 itemName, res); 193 return; 194 } 195 if(0!=strcmp(key, gAliasKey)) { 196 break; 197 } 198 } 199 // Check for %%DEPENDENCY 200 else if(depth==2 && parentKey!=NULL) { 201 char key[gDependencyKeyLength+1]; 202 int32_t keyLength; 203 204 keyLength=(int32_t)strlen(parentKey); 205 if(keyLength!=gDependencyKeyLength) { 206 break; 207 } 208 ds->swapInvChars(ds, parentKey, gDependencyKeyLength+1, key, pErrorCode); 209 if(U_FAILURE(*pErrorCode)) { 210 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n", 211 itemName, res); 212 return; 213 } 214 if(0!=strcmp(key, gDependencyKey)) { 215 break; 216 } 217 useResSuffix = FALSE; 218 } else { 219 // we ignore all other strings 220 break; 221 } 222 // for the top-level %%ALIAS or %%DEPENDENCY string fall through to URES_ALIAS 223 case URES_ALIAS: 224 { 225 char localeID[32]; 226 const uint16_t *p16; 227 int32_t i, stringLength; 228 uint16_t u16, ored16; 229 230 stringLength=udata_readInt32(ds, (int32_t)*p); 231 232 /* top=offset+1+(string length +1)/2 rounded up */ 233 offset+=1+((stringLength+1)+1)/2; 234 if(offset>length) { 235 break; // the resource does not fit into the bundle, print error below 236 } 237 238 // extract the locale ID from alias strings like 239 // locale_ID/key1/key2/key3 240 // locale_ID 241 if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) { 242 u16=0x2f; // slash in local endianness 243 } else { 244 u16=0x2f00; // slash in opposite endianness 245 } 246 p16=(const uint16_t *)(p+1); // Unicode string contents 247 248 // search for the first slash 249 for(i=0; i<stringLength && p16[i]!=u16; ++i) {} 250 251 if(RES_GET_TYPE(res)==URES_ALIAS) { 252 // ignore aliases with an initial slash: 253 // /ICUDATA/... and /pkgname/... go to a different package 254 // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle 255 if(i==0) { 256 break; // initial slash ('/') 257 } 258 259 // ignore the intra-bundle path starting from the first slash ('/') 260 stringLength=i; 261 } else /* URES_STRING */ { 262 // the whole string should only consist of a locale ID 263 if(i!=stringLength) { 264 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n", 265 itemName, res); 266 *pErrorCode=U_UNSUPPORTED_ERROR; 267 return; 268 } 269 } 270 271 // convert the Unicode string to char * and 272 // check that it has a bundle path but no package 273 if(stringLength>=(int32_t)sizeof(localeID)) { 274 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n", 275 itemName, res, stringLength); 276 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 277 return; 278 } 279 280 // convert the alias Unicode string to US-ASCII 281 ored16=0; 282 if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) { 283 for(i=0; i<stringLength; ++i) { 284 u16=p16[i]; 285 ored16|=u16; 286 localeID[i]=(char)u16; 287 } 288 } else { 289 for(i=0; i<stringLength; ++i) { 290 u16=p16[i]; 291 ored16|=u16; 292 localeID[i]=(char)(u16>>8); 293 } 294 ored16=(uint16_t)((ored16<<8)|(ored16>>8)); 295 } 296 localeID[stringLength]=0; 297 if(ored16>0x7f) { 298 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-ASCII characters\n", 299 itemName, res); 300 *pErrorCode=U_INVALID_CHAR_FOUND; 301 return; 302 } 303 304 #if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY) 305 // swap to EBCDIC 306 // our swapper is probably not the right one, but 307 // the function uses it only for printing errors 308 uprv_ebcdicFromAscii(ds, localeID, stringLength, localeID, pErrorCode); 309 if(U_FAILURE(*pErrorCode)) { 310 return; 311 } 312 #endif 313 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY && U_CHARSET_FAMILY!=U_EBCDIC_FAMILY 314 # error Unknown U_CHARSET_FAMILY value! 315 #endif 316 317 checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode); 318 } 319 break; 320 case URES_TABLE: 321 case URES_TABLE32: 322 { 323 const uint16_t *pKey16; 324 const int32_t *pKey32; 325 326 Resource item; 327 int32_t i, count; 328 329 if(RES_GET_TYPE(res)==URES_TABLE) { 330 /* get table item count */ 331 pKey16=(const uint16_t *)p; 332 count=ds->readUInt16(*pKey16++); 333 334 pKey32=NULL; 335 336 /* top=((1+ table item count)/2 rounded up)+(table item count) */ 337 offset+=((1+count)+1)/2; 338 } else { 339 /* get table item count */ 340 pKey32=(const int32_t *)p; 341 count=udata_readInt32(ds, *pKey32++); 342 343 pKey16=NULL; 344 345 /* top=(1+ table item count)+(table item count) */ 346 offset+=1+count; 347 } 348 349 p=inBundle+offset; /* pointer to table resources */ 350 offset+=count; 351 352 if(offset>length) { 353 break; // the resource does not fit into the bundle, print error below 354 } 355 356 /* recurse */ 357 for(i=0; i<count; ++i) { 358 item=ds->readUInt32(*p++); 359 ures_enumDependencies( 360 ds, itemName, inBundle, length, item, 361 ((const char *)inBundle)+ 362 (pKey16!=NULL ? 363 ds->readUInt16(pKey16[i]) : 364 udata_readInt32(ds, pKey32[i])), 365 inKey, depth+1, 366 check, context, 367 pErrorCode); 368 if(U_FAILURE(*pErrorCode)) { 369 udata_printError(ds, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%08x) failed\n", 370 itemName, res, i, item); 371 break; 372 } 373 } 374 } 375 break; 376 case URES_ARRAY: 377 { 378 Resource item; 379 int32_t i, count; 380 381 /* top=offset+1+(array length) */ 382 count=udata_readInt32(ds, (int32_t)*p++); 383 offset+=1+count; 384 385 if(offset>length) { 386 break; // the resource does not fit into the bundle, print error below 387 } 388 389 /* recurse */ 390 for(i=0; i<count; ++i) { 391 item=ds->readUInt32(*p++); 392 ures_enumDependencies( 393 ds, itemName, inBundle, length, 394 item, NULL, inKey, depth+1, 395 check, context, 396 pErrorCode); 397 if(U_FAILURE(*pErrorCode)) { 398 udata_printError(ds, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n", 399 itemName, res, i, item); 400 break; 401 } 402 } 403 } 404 break; 405 default: 406 break; 407 } 408 409 if(U_FAILURE(*pErrorCode)) { 410 /* nothing to do */ 411 } else if(0<=length && length<offset) { 412 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource limit exceeds bundle length %d\n", 413 itemName, res, length); 414 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 415 } 416 } 417 418 /* code adapted from ures_swap() */ 419 static void 420 ures_enumDependencies(const UDataSwapper *ds, 421 const char *itemName, const UDataInfo *pInfo, 422 const uint8_t *inBytes, int32_t length, 423 CheckDependency check, void *context, 424 UErrorCode *pErrorCode) { 425 const Resource *inBundle; 426 Resource rootRes; 427 428 /* the following integers count Resource item offsets (4 bytes each), not bytes */ 429 int32_t bundleLength; 430 431 /* check format version */ 432 if(pInfo->formatVersion[0]!=1) { 433 fprintf(stderr, "icupkg: .res format version %02x not supported\n", 434 pInfo->formatVersion[0]); 435 exit(U_UNSUPPORTED_ERROR); 436 } 437 438 /* a resource bundle must contain at least one resource item */ 439 bundleLength=length/4; 440 441 /* formatVersion 1.1 must have a root item and at least 5 indexes */ 442 if( bundleLength< 443 (pInfo->formatVersion[1]==0 ? 1 : 1+5) 444 ) { 445 fprintf(stderr, "icupkg: too few bytes (%d after header) for a resource bundle\n", 446 length); 447 exit(U_INDEX_OUTOFBOUNDS_ERROR); 448 } 449 450 inBundle=(const Resource *)inBytes; 451 rootRes=ds->readUInt32(*inBundle); 452 453 ures_enumDependencies( 454 ds, itemName, inBundle, bundleLength, 455 rootRes, NULL, NULL, 0, 456 check, context, 457 pErrorCode); 458 459 /* 460 * if the bundle attributes are present and the nofallback flag is not set, 461 * then add the parent bundle as a dependency 462 */ 463 if(pInfo->formatVersion[1]>=1) { 464 int32_t indexes[URES_INDEX_TOP]; 465 const int32_t *inIndexes; 466 467 inIndexes=(const int32_t *)inBundle+1; 468 indexes[URES_INDEX_LENGTH]=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH]); 469 if(indexes[URES_INDEX_LENGTH]>URES_INDEX_ATTRIBUTES) { 470 indexes[URES_INDEX_ATTRIBUTES]=udata_readInt32(ds, inIndexes[URES_INDEX_ATTRIBUTES]); 471 if(0==(indexes[URES_INDEX_ATTRIBUTES]&URES_ATT_NO_FALLBACK)) { 472 /* this bundle participates in locale fallback */ 473 checkParent(itemName, check, context, pErrorCode); 474 } 475 } 476 } 477 } 478 479 // get dependencies from conversion tables --------------------------------- *** 480 481 /* code adapted from ucnv_swap() */ 482 static void 483 ucnv_enumDependencies(const UDataSwapper *ds, 484 const char *itemName, const UDataInfo *pInfo, 485 const uint8_t *inBytes, int32_t length, 486 CheckDependency check, void *context, 487 UErrorCode *pErrorCode) { 488 uint32_t staticDataSize; 489 490 const UConverterStaticData *inStaticData; 491 492 const _MBCSHeader *inMBCSHeader; 493 uint8_t outputType; 494 495 /* check format version */ 496 if(!( 497 pInfo->formatVersion[0]==6 && 498 pInfo->formatVersion[1]>=2 499 )) { 500 fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n", 501 pInfo->formatVersion[0], pInfo->formatVersion[1]); 502 exit(U_UNSUPPORTED_ERROR); 503 } 504 505 /* read the initial UConverterStaticData structure after the UDataInfo header */ 506 inStaticData=(const UConverterStaticData *)inBytes; 507 508 if( length<(int32_t)sizeof(UConverterStaticData) || 509 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) 510 ) { 511 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n", 512 length); 513 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 514 return; 515 } 516 517 inBytes+=staticDataSize; 518 length-=(int32_t)staticDataSize; 519 520 /* check for supported conversionType values */ 521 if(inStaticData->conversionType==UCNV_MBCS) { 522 /* MBCS data */ 523 uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions; 524 int32_t extOffset; 525 526 inMBCSHeader=(const _MBCSHeader *)inBytes; 527 528 if(length<(int32_t)sizeof(_MBCSHeader)) { 529 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 530 length); 531 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 532 return; 533 } 534 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { 535 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; 536 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && 537 ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))& 538 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 539 ) { 540 mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK; 541 } else { 542 udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n", 543 inMBCSHeader->version[0], inMBCSHeader->version[1]); 544 *pErrorCode=U_UNSUPPORTED_ERROR; 545 return; 546 } 547 548 mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags); 549 extOffset=(int32_t)(mbcsHeaderFlags>>8); 550 outputType=(uint8_t)mbcsHeaderFlags; 551 552 if(outputType==MBCS_OUTPUT_EXT_ONLY) { 553 /* 554 * extension-only file, 555 * contains a base name instead of normal base table data 556 */ 557 char baseName[32]; 558 int32_t baseNameLength; 559 560 /* there is extension data after the base data, see ucnv_ext.h */ 561 if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { 562 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", 563 length); 564 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 565 return; 566 } 567 568 /* swap the base name, between the header and the extension data */ 569 const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4; 570 baseNameLength=(int32_t)strlen(inBaseName); 571 if(baseNameLength>=(int32_t)sizeof(baseName)) { 572 udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n", 573 itemName, baseNameLength); 574 *pErrorCode=U_UNSUPPORTED_ERROR; 575 return; 576 } 577 ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode); 578 579 checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode); 580 } 581 } 582 } 583 584 // ICU data formats -------------------------------------------------------- *** 585 586 static const struct { 587 uint8_t dataFormat[4]; 588 } dataFormats[]={ 589 { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */ 590 { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */ 591 { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */ 592 }; 593 594 enum { 595 FMT_RES, 596 FMT_CNV, 597 FMT_ALIAS, 598 FMT_COUNT 599 }; 600 601 static int32_t 602 getDataFormat(const uint8_t dataFormat[4]) { 603 int32_t i; 604 605 for(i=0; i<FMT_COUNT; ++i) { 606 if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) { 607 return i; 608 } 609 } 610 return -1; 611 } 612 613 // enumerate dependencies of a package item -------------------------------- *** 614 615 U_NAMESPACE_BEGIN 616 617 void 618 Package::enumDependencies(Item *pItem, void *context, CheckDependency check) { 619 const UDataInfo *pInfo; 620 const uint8_t *inBytes; 621 int32_t format, length, infoLength, itemHeaderLength; 622 UErrorCode errorCode; 623 624 errorCode=U_ZERO_ERROR; 625 pInfo=getDataInfo(pItem->data,pItem->length, infoLength, itemHeaderLength, &errorCode); 626 if(U_FAILURE(errorCode)) { 627 return; // should not occur because readFile() checks headers 628 } 629 630 // find the data format and call the corresponding function, if any 631 format=getDataFormat(pInfo->dataFormat); 632 if(format>=0) { 633 UDataSwapper *ds; 634 635 // TODO: share/cache swappers 636 ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 637 if(U_FAILURE(errorCode)) { 638 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 639 pItem->name, u_errorName(errorCode)); 640 exit(errorCode); 641 } 642 643 ds->printError=printError; 644 ds->printErrorContext=stderr; 645 646 inBytes=pItem->data+itemHeaderLength; 647 length=pItem->length-itemHeaderLength; 648 649 switch(format) { 650 case FMT_RES: 651 ures_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode); 652 break; 653 case FMT_CNV: 654 ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode); 655 break; 656 default: 657 break; 658 } 659 660 udata_closeSwapper(ds); 661 662 if(U_FAILURE(errorCode)) { 663 exit(errorCode); 664 } 665 } 666 } 667 U_NAMESPACE_END 668