1 /* 2 ******************************************************************************** 3 * 4 * Copyright (C) 1998-2015, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************** 8 * 9 * 10 * makeconv.cpp: 11 * tool creating a binary (compressed) representation of the conversion mapping 12 * table (IBM NLTC ucmap format). 13 * 14 * 05/04/2000 helena Added fallback mapping into the picture... 15 * 06/29/2000 helena Major rewrite of the callback APIs. 16 */ 17 18 #include <stdio.h> 19 #include "unicode/putil.h" 20 #include "unicode/ucnv_err.h" 21 #include "charstr.h" 22 #include "ucnv_bld.h" 23 #include "ucnv_imp.h" 24 #include "ucnv_cnv.h" 25 #include "cstring.h" 26 #include "cmemory.h" 27 #include "uinvchar.h" 28 #include "filestrm.h" 29 #include "toolutil.h" 30 #include "uoptions.h" 31 #include "unicode/udata.h" 32 #include "unewdata.h" 33 #include "uparse.h" 34 #include "ucm.h" 35 #include "makeconv.h" 36 #include "genmbcs.h" 37 38 #define DEBUG 0 39 40 typedef struct ConvData { 41 UCMFile *ucm; 42 NewConverter *cnvData, *extData; 43 UConverterSharedData sharedData; 44 UConverterStaticData staticData; 45 } ConvData; 46 47 static void 48 initConvData(ConvData *data) { 49 uprv_memset(data, 0, sizeof(ConvData)); 50 data->sharedData.structSize=sizeof(UConverterSharedData); 51 data->staticData.structSize=sizeof(UConverterStaticData); 52 data->sharedData.staticData=&data->staticData; 53 } 54 55 static void 56 cleanupConvData(ConvData *data) { 57 if(data!=NULL) { 58 if(data->cnvData!=NULL) { 59 data->cnvData->close(data->cnvData); 60 data->cnvData=NULL; 61 } 62 if(data->extData!=NULL) { 63 data->extData->close(data->extData); 64 data->extData=NULL; 65 } 66 ucm_close(data->ucm); 67 data->ucm=NULL; 68 } 69 } 70 71 /* 72 * from ucnvstat.c - static prototypes of data-based converters 73 */ 74 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; 75 76 /* 77 * Global - verbosity 78 */ 79 UBool VERBOSE = FALSE; 80 UBool QUIET = FALSE; 81 UBool SMALL = FALSE; 82 UBool IGNORE_SISO_CHECK = FALSE; 83 84 static void 85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); 86 87 /* 88 * Set up the UNewData and write the converter.. 89 */ 90 static void 91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status); 92 93 UBool haveCopyright=TRUE; 94 95 static UDataInfo dataInfo={ 96 sizeof(UDataInfo), 97 0, 98 99 U_IS_BIG_ENDIAN, 100 U_CHARSET_FAMILY, 101 sizeof(UChar), 102 0, 103 104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ 105 {6, 2, 0, 0}, /* formatVersion */ 106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ 107 }; 108 109 static void 110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) 111 { 112 UNewDataMemory *mem = NULL; 113 uint32_t sz2; 114 uint32_t size = 0; 115 int32_t tableType; 116 117 if(U_FAILURE(*status)) 118 { 119 return; 120 } 121 122 tableType=TABLE_NONE; 123 if(data->cnvData!=NULL) { 124 tableType|=TABLE_BASE; 125 } 126 if(data->extData!=NULL) { 127 tableType|=TABLE_EXT; 128 } 129 130 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); 131 132 if(U_FAILURE(*status)) 133 { 134 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", 135 cnvName, 136 "cnv", 137 u_errorName(*status)); 138 return; 139 } 140 141 if(VERBOSE) 142 { 143 printf("- Opened udata %s.%s\n", cnvName, "cnv"); 144 } 145 146 147 /* all read only, clean, platform independent data. Mmmm. :) */ 148 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); 149 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ 150 /* Now, write the table */ 151 if(tableType&TABLE_BASE) { 152 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); 153 } 154 if(tableType&TABLE_EXT) { 155 size += data->extData->write(data->extData, &data->staticData, mem, tableType); 156 } 157 158 sz2 = udata_finish(mem, status); 159 if(size != sz2) 160 { 161 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); 162 *status=U_INTERNAL_PROGRAM_ERROR; 163 } 164 if(VERBOSE) 165 { 166 printf("- Wrote %u bytes to the udata.\n", (int)sz2); 167 } 168 } 169 170 enum { 171 OPT_HELP_H, 172 OPT_HELP_QUESTION_MARK, 173 OPT_COPYRIGHT, 174 OPT_VERSION, 175 OPT_DESTDIR, 176 OPT_VERBOSE, 177 OPT_SMALL, 178 OPT_IGNORE_SISO_CHECK, 179 OPT_QUIET, 180 181 OPT_COUNT 182 }; 183 184 static UOption options[]={ 185 UOPTION_HELP_H, 186 UOPTION_HELP_QUESTION_MARK, 187 UOPTION_COPYRIGHT, 188 UOPTION_VERSION, 189 UOPTION_DESTDIR, 190 UOPTION_VERBOSE, 191 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, 192 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, 193 UOPTION_QUIET, 194 }; 195 196 int main(int argc, char* argv[]) 197 { 198 ConvData data; 199 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; 200 201 U_MAIN_INIT_ARGS(argc, argv); 202 203 /* Set up the ICU version number */ 204 UVersionInfo icuVersion; 205 u_getVersion(icuVersion); 206 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); 207 208 /* preset then read command line options */ 209 options[OPT_DESTDIR].value=u_getDataDirectory(); 210 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); 211 212 /* error handling, printing usage message */ 213 if(argc<0) { 214 fprintf(stderr, 215 "error in command line argument \"%s\"\n", 216 argv[-argc]); 217 } else if(argc<2) { 218 argc=-1; 219 } 220 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { 221 FILE *stdfile=argc<0 ? stderr : stdout; 222 fprintf(stdfile, 223 "usage: %s [-options] files...\n" 224 "\tread .ucm codepage mapping files and write .cnv files\n" 225 "options:\n" 226 "\t-h or -? or --help this usage text\n" 227 "\t-V or --version show a version message\n" 228 "\t-c or --copyright include a copyright notice\n" 229 "\t-d or --destdir destination directory, followed by the path\n" 230 "\t-v or --verbose Turn on verbose output\n" 231 "\t-q or --quiet do not display warnings and progress\n", 232 argv[0]); 233 fprintf(stdfile, 234 "\t --small Generate smaller .cnv files. They will be\n" 235 "\t significantly smaller but may not be compatible with\n" 236 "\t older versions of ICU and will require heap memory\n" 237 "\t allocation when loaded.\n" 238 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); 239 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; 240 } 241 242 if(options[OPT_VERSION].doesOccur) { 243 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", 244 dataInfo.formatVersion[0], dataInfo.formatVersion[1]); 245 printf("%s\n", U_COPYRIGHT_STRING); 246 exit(0); 247 } 248 249 /* get the options values */ 250 haveCopyright = options[OPT_COPYRIGHT].doesOccur; 251 const char *destdir = options[OPT_DESTDIR].value; 252 VERBOSE = options[OPT_VERBOSE].doesOccur; 253 QUIET = options[OPT_QUIET].doesOccur; 254 SMALL = options[OPT_SMALL].doesOccur; 255 256 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { 257 IGNORE_SISO_CHECK = TRUE; 258 } 259 260 icu::CharString outFileName; 261 UErrorCode err = U_ZERO_ERROR; 262 if (destdir != NULL && *destdir != 0) { 263 outFileName.append(destdir, err).ensureEndsWithFileSeparator(err); 264 if (U_FAILURE(err)) { 265 return err; 266 } 267 } 268 int32_t outBasenameStart = outFileName.length(); 269 270 #if DEBUG 271 { 272 int i; 273 printf("makeconv: processing %d files...\n", argc - 1); 274 for(i=1; i<argc; ++i) { 275 printf("%s ", argv[i]); 276 } 277 printf("\n"); 278 fflush(stdout); 279 } 280 #endif 281 282 UBool printFilename = (UBool) (argc > 2 || VERBOSE); 283 for (++argv; --argc; ++argv) 284 { 285 UErrorCode localError = U_ZERO_ERROR; 286 const char *arg = getLongPathname(*argv); 287 288 /*produces the right destination path for display*/ 289 outFileName.truncate(outBasenameStart); 290 if (outBasenameStart != 0) 291 { 292 /* find the last file sepator */ 293 const char *basename = findBasename(arg); 294 outFileName.append(basename, localError); 295 } 296 else 297 { 298 outFileName.append(arg, localError); 299 } 300 if (U_FAILURE(localError)) { 301 return localError; 302 } 303 304 /*removes the extension if any is found*/ 305 int32_t lastDotIndex = outFileName.lastIndexOf('.'); 306 if (lastDotIndex >= outBasenameStart) { 307 outFileName.truncate(lastDotIndex); 308 } 309 310 /* the basename without extension is the converter name */ 311 if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) { 312 fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart); 313 return U_BUFFER_OVERFLOW_ERROR; 314 } 315 uprv_strcpy(cnvName, outFileName.data() + outBasenameStart); 316 317 /*Adds the target extension*/ 318 outFileName.append(CONVERTER_FILE_EXTENSION, localError); 319 if (U_FAILURE(localError)) { 320 return localError; 321 } 322 323 #if DEBUG 324 printf("makeconv: processing %s ...\n", arg); 325 fflush(stdout); 326 #endif 327 initConvData(&data); 328 createConverter(&data, arg, &localError); 329 330 if (U_FAILURE(localError)) 331 { 332 /* if an error is found, print out an error msg and keep going */ 333 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", 334 outFileName.data(), arg, u_errorName(localError)); 335 if(U_SUCCESS(err)) { 336 err = localError; 337 } 338 } 339 else 340 { 341 /* Insure the static data name matches the file name */ 342 /* Changed to ignore directory and only compare base name 343 LDH 1/2/08*/ 344 char *p; 345 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ 346 347 if(p == NULL) /* OK, try alternate */ 348 { 349 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); 350 if(p == NULL) 351 { 352 p=cnvName; /* If no separators, no problem */ 353 } 354 } 355 else 356 { 357 p++; /* If found separator, don't include it in compare */ 358 } 359 if(uprv_stricmp(p,data.staticData.name) && !QUIET) 360 { 361 fprintf(stderr, "Warning: %s%s claims to be '%s'\n", 362 cnvName, CONVERTER_FILE_EXTENSION, 363 data.staticData.name); 364 } 365 366 uprv_strcpy((char*)data.staticData.name, cnvName); 367 368 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { 369 fprintf(stderr, 370 "Error: A converter name must contain only invariant characters.\n" 371 "%s is not a valid converter name.\n", 372 data.staticData.name); 373 if(U_SUCCESS(err)) { 374 err = U_INVALID_TABLE_FORMAT; 375 } 376 } 377 378 localError = U_ZERO_ERROR; 379 writeConverterData(&data, cnvName, destdir, &localError); 380 381 if(U_FAILURE(localError)) 382 { 383 /* if an error is found, print out an error msg and keep going*/ 384 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg, 385 u_errorName(localError)); 386 if(U_SUCCESS(err)) { 387 err = localError; 388 } 389 } 390 else if (printFilename) 391 { 392 puts(outFileName.data() + outBasenameStart); 393 } 394 } 395 fflush(stdout); 396 fflush(stderr); 397 398 cleanupConvData(&data); 399 } 400 401 return err; 402 } 403 404 static void 405 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { 406 if( (name[0]=='i' || name[0]=='I') && 407 (name[1]=='b' || name[1]=='B') && 408 (name[2]=='m' || name[2]=='M') 409 ) { 410 name+=3; 411 if(*name=='-') { 412 ++name; 413 } 414 *pPlatform=UCNV_IBM; 415 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); 416 } else { 417 *pPlatform=UCNV_UNKNOWN; 418 *pCCSID=0; 419 } 420 } 421 422 static void 423 readHeader(ConvData *data, 424 FileStream* convFile, 425 UErrorCode *pErrorCode) { 426 char line[1024]; 427 char *s, *key, *value; 428 const UConverterStaticData *prototype; 429 UConverterStaticData *staticData; 430 431 if(U_FAILURE(*pErrorCode)) { 432 return; 433 } 434 435 staticData=&data->staticData; 436 staticData->platform=UCNV_IBM; 437 staticData->subCharLen=0; 438 439 while(T_FileStream_readLine(convFile, line, sizeof(line))) { 440 /* basic parsing and handling of state-related items */ 441 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { 442 continue; 443 } 444 445 /* stop at the beginning of the mapping section */ 446 if(uprv_strcmp(line, "CHARMAP")==0) { 447 break; 448 } 449 450 /* collect the information from the header field, ignore unknown keys */ 451 if(uprv_strcmp(key, "code_set_name")==0) { 452 if(*value!=0) { 453 uprv_strcpy((char *)staticData->name, value); 454 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); 455 } 456 } else if(uprv_strcmp(key, "subchar")==0) { 457 uint8_t bytes[UCNV_EXT_MAX_BYTES]; 458 int8_t length; 459 460 s=value; 461 length=ucm_parseBytes(bytes, line, (const char **)&s); 462 if(1<=length && length<=4 && *s==0) { 463 staticData->subCharLen=length; 464 uprv_memcpy(staticData->subChar, bytes, length); 465 } else { 466 fprintf(stderr, "error: illegal <subchar> %s\n", value); 467 *pErrorCode=U_INVALID_TABLE_FORMAT; 468 return; 469 } 470 } else if(uprv_strcmp(key, "subchar1")==0) { 471 uint8_t bytes[UCNV_EXT_MAX_BYTES]; 472 473 s=value; 474 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { 475 staticData->subChar1=bytes[0]; 476 } else { 477 fprintf(stderr, "error: illegal <subchar1> %s\n", value); 478 *pErrorCode=U_INVALID_TABLE_FORMAT; 479 return; 480 } 481 } 482 } 483 484 /* copy values from the UCMFile to the static data */ 485 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; 486 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; 487 staticData->conversionType=data->ucm->states.conversionType; 488 489 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { 490 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); 491 *pErrorCode=U_INVALID_TABLE_FORMAT; 492 return; 493 } 494 495 /* 496 * Now that we know the type, copy any 'default' values from the table. 497 * We need not check the type any further because the parser only 498 * recognizes what we have prototypes for. 499 * 500 * For delta (extension-only) tables, copy values from the base file 501 * instead, see createConverter(). 502 */ 503 if(data->ucm->baseName[0]==0) { 504 prototype=ucnv_converterStaticData[staticData->conversionType]; 505 if(prototype!=NULL) { 506 if(staticData->name[0]==0) { 507 uprv_strcpy((char *)staticData->name, prototype->name); 508 } 509 510 if(staticData->codepage==0) { 511 staticData->codepage=prototype->codepage; 512 } 513 514 if(staticData->platform==0) { 515 staticData->platform=prototype->platform; 516 } 517 518 if(staticData->minBytesPerChar==0) { 519 staticData->minBytesPerChar=prototype->minBytesPerChar; 520 } 521 522 if(staticData->maxBytesPerChar==0) { 523 staticData->maxBytesPerChar=prototype->maxBytesPerChar; 524 } 525 526 if(staticData->subCharLen==0) { 527 staticData->subCharLen=prototype->subCharLen; 528 if(prototype->subCharLen>0) { 529 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); 530 } 531 } 532 } 533 } 534 535 if(data->ucm->states.outputType<0) { 536 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; 537 } 538 539 if( staticData->subChar1!=0 && 540 (staticData->minBytesPerChar>1 || 541 (staticData->conversionType!=UCNV_MBCS && 542 staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) 543 ) { 544 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); 545 *pErrorCode=U_INVALID_TABLE_FORMAT; 546 } 547 } 548 549 /* return TRUE if a base table was read, FALSE for an extension table */ 550 static UBool 551 readFile(ConvData *data, const char* converterName, 552 UErrorCode *pErrorCode) { 553 char line[1024]; 554 char *end; 555 FileStream *convFile; 556 557 UCMStates *baseStates; 558 UBool dataIsBase; 559 560 if(U_FAILURE(*pErrorCode)) { 561 return FALSE; 562 } 563 564 data->ucm=ucm_open(); 565 566 convFile=T_FileStream_open(converterName, "r"); 567 if(convFile==NULL) { 568 *pErrorCode=U_FILE_ACCESS_ERROR; 569 return FALSE; 570 } 571 572 readHeader(data, convFile, pErrorCode); 573 if(U_FAILURE(*pErrorCode)) { 574 return FALSE; 575 } 576 577 if(data->ucm->baseName[0]==0) { 578 dataIsBase=TRUE; 579 baseStates=&data->ucm->states; 580 ucm_processStates(baseStates, IGNORE_SISO_CHECK); 581 } else { 582 dataIsBase=FALSE; 583 baseStates=NULL; 584 } 585 586 /* read the base table */ 587 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); 588 if(U_FAILURE(*pErrorCode)) { 589 return FALSE; 590 } 591 592 /* read an extension table if there is one */ 593 while(T_FileStream_readLine(convFile, line, sizeof(line))) { 594 end=uprv_strchr(line, 0); 595 while(line<end && 596 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) { 597 --end; 598 } 599 *end=0; 600 601 if(line[0]=='#' || u_skipWhitespace(line)==end) { 602 continue; /* ignore empty and comment lines */ 603 } 604 605 if(0==uprv_strcmp(line, "CHARMAP")) { 606 /* read the extension table */ 607 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); 608 } else { 609 fprintf(stderr, "unexpected text after the base mapping table\n"); 610 } 611 break; 612 } 613 614 T_FileStream_close(convFile); 615 616 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) { 617 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); 618 *pErrorCode=U_INVALID_TABLE_FORMAT; 619 } 620 621 return dataIsBase; 622 } 623 624 static void 625 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { 626 ConvData baseData; 627 UBool dataIsBase; 628 629 UConverterStaticData *staticData; 630 UCMStates *states, *baseStates; 631 632 if(U_FAILURE(*pErrorCode)) { 633 return; 634 } 635 636 initConvData(data); 637 638 dataIsBase=readFile(data, converterName, pErrorCode); 639 if(U_FAILURE(*pErrorCode)) { 640 return; 641 } 642 643 staticData=&data->staticData; 644 states=&data->ucm->states; 645 646 if(dataIsBase) { 647 /* 648 * Build a normal .cnv file with a base table 649 * and an optional extension table. 650 */ 651 data->cnvData=MBCSOpen(data->ucm); 652 if(data->cnvData==NULL) { 653 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 654 655 } else if(!data->cnvData->isValid(data->cnvData, 656 staticData->subChar, staticData->subCharLen) 657 ) { 658 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); 659 *pErrorCode=U_INVALID_TABLE_FORMAT; 660 661 } else if(staticData->subChar1!=0 && 662 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) 663 ) { 664 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); 665 *pErrorCode=U_INVALID_TABLE_FORMAT; 666 667 } else if( 668 data->ucm->ext->mappingsLength>0 && 669 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) 670 ) { 671 *pErrorCode=U_INVALID_TABLE_FORMAT; 672 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { 673 /* sort the table so that it can be turned into UTF-8-friendly data */ 674 ucm_sortTable(data->ucm->base); 675 } 676 677 if(U_SUCCESS(*pErrorCode)) { 678 if( 679 /* add the base table after ucm_checkBaseExt()! */ 680 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) 681 ) { 682 *pErrorCode=U_INVALID_TABLE_FORMAT; 683 } else { 684 /* 685 * addTable() may have requested moving more mappings to the extension table 686 * if they fit into the base toUnicode table but not into the 687 * base fromUnicode table. 688 * (Especially for UTF-8-friendly fromUnicode tables.) 689 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them 690 * to be excluded from the extension toUnicode data. 691 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into 692 * the base fromUnicode table. 693 */ 694 ucm_moveMappings(data->ucm->base, data->ucm->ext); 695 ucm_sortTable(data->ucm->ext); 696 if(data->ucm->ext->mappingsLength>0) { 697 /* prepare the extension table, if there is one */ 698 data->extData=CnvExtOpen(data->ucm); 699 if(data->extData==NULL) { 700 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 701 } else if( 702 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) 703 ) { 704 *pErrorCode=U_INVALID_TABLE_FORMAT; 705 } 706 } 707 } 708 } 709 } else { 710 /* Build an extension-only .cnv file. */ 711 char baseFilename[500]; 712 char *basename; 713 714 initConvData(&baseData); 715 716 /* assemble a path/filename for data->ucm->baseName */ 717 uprv_strcpy(baseFilename, converterName); 718 basename=(char *)findBasename(baseFilename); 719 uprv_strcpy(basename, data->ucm->baseName); 720 uprv_strcat(basename, ".ucm"); 721 722 /* read the base table */ 723 dataIsBase=readFile(&baseData, baseFilename, pErrorCode); 724 if(U_FAILURE(*pErrorCode)) { 725 return; 726 } else if(!dataIsBase) { 727 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename); 728 *pErrorCode=U_INVALID_TABLE_FORMAT; 729 } else { 730 /* prepare the extension table */ 731 data->extData=CnvExtOpen(data->ucm); 732 if(data->extData==NULL) { 733 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 734 } else { 735 /* fill in gaps in extension file header fields */ 736 UCMapping *m, *mLimit; 737 uint8_t fallbackFlags; 738 739 baseStates=&baseData.ucm->states; 740 if(states->conversionType==UCNV_DBCS) { 741 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); 742 } else if(states->minCharLength==0) { 743 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); 744 } 745 if(states->maxCharLength<states->minCharLength) { 746 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); 747 } 748 749 if(staticData->subCharLen==0) { 750 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); 751 staticData->subCharLen=baseData.staticData.subCharLen; 752 } 753 /* 754 * do not copy subChar1 - 755 * only use what is explicitly specified 756 * because it cannot be unset in the extension file header 757 */ 758 759 /* get the fallback flags */ 760 fallbackFlags=0; 761 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; 762 m<mLimit && fallbackFlags!=3; 763 ++m 764 ) { 765 if(m->f==1) { 766 fallbackFlags|=1; 767 } else if(m->f==3) { 768 fallbackFlags|=2; 769 } 770 } 771 772 if(fallbackFlags&1) { 773 staticData->hasFromUnicodeFallback=TRUE; 774 } 775 if(fallbackFlags&2) { 776 staticData->hasToUnicodeFallback=TRUE; 777 } 778 779 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { 780 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); 781 *pErrorCode=U_INVALID_TABLE_FORMAT; 782 783 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { 784 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); 785 *pErrorCode=U_INVALID_TABLE_FORMAT; 786 787 } else if( 788 !ucm_checkValidity(data->ucm->ext, baseStates) || 789 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) 790 ) { 791 *pErrorCode=U_INVALID_TABLE_FORMAT; 792 } else { 793 if(states->maxCharLength>1) { 794 /* 795 * When building a normal .cnv file with a base table 796 * for an MBCS (not SBCS) table with explicit precision flags, 797 * the MBCSAddTable() function marks some mappings for moving 798 * to the extension table. 799 * They fit into the base toUnicode table but not into the 800 * base fromUnicode table. 801 * (Note: We do have explicit precision flags because they are 802 * required for extension table generation, and 803 * ucm_checkBaseExt() verified it.) 804 * 805 * We do not call MBCSAddTable() here (we probably could) 806 * so we need to do the analysis before building the extension table. 807 * We assume that MBCSAddTable() will build a UTF-8-friendly table. 808 * Redundant mappings in the extension table are ok except they cost some size. 809 * 810 * Do this after ucm_checkBaseExt(). 811 */ 812 const MBCSData *mbcsData=MBCSGetDummy(); 813 int32_t needsMove=0; 814 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; 815 m<mLimit; 816 ++m 817 ) { 818 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) { 819 m->f|=MBCS_FROM_U_EXT_FLAG; 820 m->moveFlag=UCM_MOVE_TO_EXT; 821 ++needsMove; 822 } 823 } 824 825 if(needsMove!=0) { 826 ucm_moveMappings(baseData.ucm->base, data->ucm->ext); 827 ucm_sortTable(data->ucm->ext); 828 } 829 } 830 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { 831 *pErrorCode=U_INVALID_TABLE_FORMAT; 832 } 833 } 834 } 835 } 836 837 cleanupConvData(&baseData); 838 } 839 } 840 841 /* 842 * Hey, Emacs, please set the following: 843 * 844 * Local Variables: 845 * indent-tabs-mode: nil 846 * End: 847 * 848 */ 849