1 /* 2 ******************************************************************************** 3 * 4 * Copyright (C) 1998-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************** 8 * 9 * 10 * makeconv.c: 11 * tool creating a binary (compressed) representation of the conversion mapping 12 * table (IBM NLTC ucmap format). 13 * 14 * 05/04/2000 helena Added fallback mapping into the picture... 15 * 06/29/2000 helena Major rewrite of the callback APIs. 16 */ 17 18 #include <stdio.h> 19 #include "unicode/putil.h" 20 #include "unicode/ucnv_err.h" 21 #include "ucnv_bld.h" 22 #include "ucnv_imp.h" 23 #include "ucnv_cnv.h" 24 #include "cstring.h" 25 #include "cmemory.h" 26 #include "uinvchar.h" 27 #include "filestrm.h" 28 #include "toolutil.h" 29 #include "uoptions.h" 30 #include "unicode/udata.h" 31 #include "unewdata.h" 32 #include "uparse.h" 33 #include "ucm.h" 34 #include "makeconv.h" 35 #include "genmbcs.h" 36 37 #define DEBUG 0 38 39 typedef struct ConvData { 40 UCMFile *ucm; 41 NewConverter *cnvData, *extData; 42 UConverterSharedData sharedData; 43 UConverterStaticData staticData; 44 } ConvData; 45 46 static void 47 initConvData(ConvData *data) { 48 uprv_memset(data, 0, sizeof(ConvData)); 49 data->sharedData.structSize=sizeof(UConverterSharedData); 50 data->staticData.structSize=sizeof(UConverterStaticData); 51 data->sharedData.staticData=&data->staticData; 52 } 53 54 static void 55 cleanupConvData(ConvData *data) { 56 if(data!=NULL) { 57 if(data->cnvData!=NULL) { 58 data->cnvData->close(data->cnvData); 59 data->cnvData=NULL; 60 } 61 if(data->extData!=NULL) { 62 data->extData->close(data->extData); 63 data->extData=NULL; 64 } 65 ucm_close(data->ucm); 66 data->ucm=NULL; 67 } 68 } 69 70 /* 71 * from ucnvstat.c - static prototypes of data-based converters 72 */ 73 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; 74 75 /* 76 * Global - verbosity 77 */ 78 UBool VERBOSE = FALSE; 79 UBool SMALL = FALSE; 80 UBool IGNORE_SISO_CHECK = FALSE; 81 82 static void 83 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); 84 85 /* 86 * Set up the UNewData and write the converter.. 87 */ 88 static void 89 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status); 90 91 UBool haveCopyright=TRUE; 92 93 static UDataInfo dataInfo={ 94 sizeof(UDataInfo), 95 0, 96 97 U_IS_BIG_ENDIAN, 98 U_CHARSET_FAMILY, 99 sizeof(UChar), 100 0, 101 102 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ 103 {6, 2, 0, 0}, /* formatVersion */ 104 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ 105 }; 106 107 static void 108 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) 109 { 110 UNewDataMemory *mem = NULL; 111 uint32_t sz2; 112 uint32_t size = 0; 113 int32_t tableType; 114 115 if(U_FAILURE(*status)) 116 { 117 return; 118 } 119 120 tableType=TABLE_NONE; 121 if(data->cnvData!=NULL) { 122 tableType|=TABLE_BASE; 123 } 124 if(data->extData!=NULL) { 125 tableType|=TABLE_EXT; 126 } 127 128 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); 129 130 if(U_FAILURE(*status)) 131 { 132 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", 133 cnvName, 134 "cnv", 135 u_errorName(*status)); 136 return; 137 } 138 139 if(VERBOSE) 140 { 141 printf("- Opened udata %s.%s\n", cnvName, "cnv"); 142 } 143 144 145 /* all read only, clean, platform independent data. Mmmm. :) */ 146 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); 147 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ 148 /* Now, write the table */ 149 if(tableType&TABLE_BASE) { 150 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); 151 } 152 if(tableType&TABLE_EXT) { 153 size += data->extData->write(data->extData, &data->staticData, mem, tableType); 154 } 155 156 sz2 = udata_finish(mem, status); 157 if(size != sz2) 158 { 159 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); 160 *status=U_INTERNAL_PROGRAM_ERROR; 161 } 162 if(VERBOSE) 163 { 164 printf("- Wrote %u bytes to the udata.\n", (int)sz2); 165 } 166 } 167 168 enum { 169 OPT_HELP_H, 170 OPT_HELP_QUESTION_MARK, 171 OPT_COPYRIGHT, 172 OPT_VERSION, 173 OPT_DESTDIR, 174 OPT_VERBOSE, 175 OPT_SMALL, 176 OPT_IGNORE_SISO_CHECK, 177 OPT_COUNT 178 }; 179 180 static UOption options[]={ 181 UOPTION_HELP_H, 182 UOPTION_HELP_QUESTION_MARK, 183 UOPTION_COPYRIGHT, 184 UOPTION_VERSION, 185 UOPTION_DESTDIR, 186 UOPTION_VERBOSE, 187 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, 188 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } 189 }; 190 191 int main(int argc, char* argv[]) 192 { 193 ConvData data; 194 UErrorCode err = U_ZERO_ERROR, localError; 195 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; 196 const char* destdir, *arg; 197 size_t destdirlen; 198 char* dot = NULL, *outBasename; 199 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; 200 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; 201 UVersionInfo icuVersion; 202 UBool printFilename; 203 204 err = U_ZERO_ERROR; 205 206 U_MAIN_INIT_ARGS(argc, argv); 207 208 /* Set up the ICU version number */ 209 u_getVersion(icuVersion); 210 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); 211 212 /* preset then read command line options */ 213 options[OPT_DESTDIR].value=u_getDataDirectory(); 214 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); 215 216 /* error handling, printing usage message */ 217 if(argc<0) { 218 fprintf(stderr, 219 "error in command line argument \"%s\"\n", 220 argv[-argc]); 221 } else if(argc<2) { 222 argc=-1; 223 } 224 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { 225 FILE *stdfile=argc<0 ? stderr : stdout; 226 fprintf(stdfile, 227 "usage: %s [-options] files...\n" 228 "\tread .ucm codepage mapping files and write .cnv files\n" 229 "options:\n" 230 "\t-h or -? or --help this usage text\n" 231 "\t-V or --version show a version message\n" 232 "\t-c or --copyright include a copyright notice\n" 233 "\t-d or --destdir destination directory, followed by the path\n" 234 "\t-v or --verbose Turn on verbose output\n", 235 argv[0]); 236 fprintf(stdfile, 237 "\t --small Generate smaller .cnv files. They will be\n" 238 "\t significantly smaller but may not be compatible with\n" 239 "\t older versions of ICU and will require heap memory\n" 240 "\t allocation when loaded.\n" 241 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); 242 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; 243 } 244 245 if(options[OPT_VERSION].doesOccur) { 246 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", 247 dataInfo.formatVersion[0], dataInfo.formatVersion[1]); 248 printf("%s\n", U_COPYRIGHT_STRING); 249 exit(0); 250 } 251 252 /* get the options values */ 253 haveCopyright = options[OPT_COPYRIGHT].doesOccur; 254 destdir = options[OPT_DESTDIR].value; 255 VERBOSE = options[OPT_VERBOSE].doesOccur; 256 SMALL = options[OPT_SMALL].doesOccur; 257 258 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { 259 IGNORE_SISO_CHECK = TRUE; 260 } 261 262 if (destdir != NULL && *destdir != 0) { 263 uprv_strcpy(outFileName, destdir); 264 destdirlen = uprv_strlen(destdir); 265 outBasename = outFileName + destdirlen; 266 if (*(outBasename - 1) != U_FILE_SEP_CHAR) { 267 *outBasename++ = U_FILE_SEP_CHAR; 268 ++destdirlen; 269 } 270 } else { 271 destdirlen = 0; 272 outBasename = outFileName; 273 } 274 275 #if DEBUG 276 { 277 int i; 278 printf("makeconv: processing %d files...\n", argc - 1); 279 for(i=1; i<argc; ++i) { 280 printf("%s ", argv[i]); 281 } 282 printf("\n"); 283 fflush(stdout); 284 } 285 #endif 286 287 err = U_ZERO_ERROR; 288 printFilename = (UBool) (argc > 2 || VERBOSE); 289 for (++argv; --argc; ++argv) 290 { 291 arg = getLongPathname(*argv); 292 293 /* Check for potential buffer overflow */ 294 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH) 295 { 296 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); 297 return U_BUFFER_OVERFLOW_ERROR; 298 } 299 300 /*produces the right destination path for display*/ 301 if (destdirlen != 0) 302 { 303 const char *basename; 304 305 /* find the last file sepator */ 306 basename = findBasename(arg); 307 uprv_strcpy(outBasename, basename); 308 } 309 else 310 { 311 uprv_strcpy(outFileName, arg); 312 } 313 314 /*removes the extension if any is found*/ 315 dot = uprv_strrchr(outBasename, '.'); 316 if (dot) 317 { 318 *dot = '\0'; 319 } 320 321 /* the basename without extension is the converter name */ 322 uprv_strcpy(cnvName, outBasename); 323 324 /*Adds the target extension*/ 325 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); 326 327 #if DEBUG 328 printf("makeconv: processing %s ...\n", arg); 329 fflush(stdout); 330 #endif 331 localError = U_ZERO_ERROR; 332 initConvData(&data); 333 createConverter(&data, arg, &localError); 334 335 if (U_FAILURE(localError)) 336 { 337 /* if an error is found, print out an error msg and keep going */ 338 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg, 339 u_errorName(localError)); 340 if(U_SUCCESS(err)) { 341 err = localError; 342 } 343 } 344 else 345 { 346 /* Insure the static data name matches the file name */ 347 /* Changed to ignore directory and only compare base name 348 LDH 1/2/08*/ 349 char *p; 350 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ 351 352 if(p == NULL) /* OK, try alternate */ 353 { 354 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); 355 if(p == NULL) 356 { 357 p=cnvName; /* If no separators, no problem */ 358 } 359 } 360 else 361 { 362 p++; /* If found separtor, don't include it in compare */ 363 } 364 if(uprv_stricmp(p,data.staticData.name)) 365 { 366 fprintf(stderr, "Warning: %s%s claims to be '%s'\n", 367 cnvName, CONVERTER_FILE_EXTENSION, 368 data.staticData.name); 369 } 370 371 uprv_strcpy((char*)data.staticData.name, cnvName); 372 373 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { 374 fprintf(stderr, 375 "Error: A converter name must contain only invariant characters.\n" 376 "%s is not a valid converter name.\n", 377 data.staticData.name); 378 if(U_SUCCESS(err)) { 379 err = U_INVALID_TABLE_FORMAT; 380 } 381 } 382 383 uprv_strcpy(cnvNameWithPkg, cnvName); 384 385 localError = U_ZERO_ERROR; 386 writeConverterData(&data, cnvNameWithPkg, destdir, &localError); 387 388 if(U_FAILURE(localError)) 389 { 390 /* if an error is found, print out an error msg and keep going*/ 391 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg, 392 u_errorName(localError)); 393 if(U_SUCCESS(err)) { 394 err = localError; 395 } 396 } 397 else if (printFilename) 398 { 399 puts(outBasename); 400 } 401 } 402 fflush(stdout); 403 fflush(stderr); 404 405 cleanupConvData(&data); 406 } 407 408 return err; 409 } 410 411 static void 412 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { 413 if( (name[0]=='i' || name[0]=='I') && 414 (name[1]=='b' || name[1]=='B') && 415 (name[2]=='m' || name[2]=='M') 416 ) { 417 name+=3; 418 if(*name=='-') { 419 ++name; 420 } 421 *pPlatform=UCNV_IBM; 422 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); 423 } else { 424 *pPlatform=UCNV_UNKNOWN; 425 *pCCSID=0; 426 } 427 } 428 429 static void 430 readHeader(ConvData *data, 431 FileStream* convFile, 432 const char* converterName, 433 UErrorCode *pErrorCode) { 434 char line[1024]; 435 char *s, *key, *value; 436 const UConverterStaticData *prototype; 437 UConverterStaticData *staticData; 438 439 if(U_FAILURE(*pErrorCode)) { 440 return; 441 } 442 443 staticData=&data->staticData; 444 staticData->platform=UCNV_IBM; 445 staticData->subCharLen=0; 446 447 while(T_FileStream_readLine(convFile, line, sizeof(line))) { 448 /* basic parsing and handling of state-related items */ 449 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { 450 continue; 451 } 452 453 /* stop at the beginning of the mapping section */ 454 if(uprv_strcmp(line, "CHARMAP")==0) { 455 break; 456 } 457 458 /* collect the information from the header field, ignore unknown keys */ 459 if(uprv_strcmp(key, "code_set_name")==0) { 460 if(*value!=0) { 461 uprv_strcpy((char *)staticData->name, value); 462 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); 463 } 464 } else if(uprv_strcmp(key, "subchar")==0) { 465 uint8_t bytes[UCNV_EXT_MAX_BYTES]; 466 int8_t length; 467 468 s=value; 469 length=ucm_parseBytes(bytes, line, (const char **)&s); 470 if(1<=length && length<=4 && *s==0) { 471 staticData->subCharLen=length; 472 uprv_memcpy(staticData->subChar, bytes, length); 473 } else { 474 fprintf(stderr, "error: illegal <subchar> %s\n", value); 475 *pErrorCode=U_INVALID_TABLE_FORMAT; 476 return; 477 } 478 } else if(uprv_strcmp(key, "subchar1")==0) { 479 uint8_t bytes[UCNV_EXT_MAX_BYTES]; 480 481 s=value; 482 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { 483 staticData->subChar1=bytes[0]; 484 } else { 485 fprintf(stderr, "error: illegal <subchar1> %s\n", value); 486 *pErrorCode=U_INVALID_TABLE_FORMAT; 487 return; 488 } 489 } 490 } 491 492 /* copy values from the UCMFile to the static data */ 493 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; 494 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; 495 staticData->conversionType=data->ucm->states.conversionType; 496 497 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { 498 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); 499 *pErrorCode=U_INVALID_TABLE_FORMAT; 500 return; 501 } 502 503 /* 504 * Now that we know the type, copy any 'default' values from the table. 505 * We need not check the type any further because the parser only 506 * recognizes what we have prototypes for. 507 * 508 * For delta (extension-only) tables, copy values from the base file 509 * instead, see createConverter(). 510 */ 511 if(data->ucm->baseName[0]==0) { 512 prototype=ucnv_converterStaticData[staticData->conversionType]; 513 if(prototype!=NULL) { 514 if(staticData->name[0]==0) { 515 uprv_strcpy((char *)staticData->name, prototype->name); 516 } 517 518 if(staticData->codepage==0) { 519 staticData->codepage=prototype->codepage; 520 } 521 522 if(staticData->platform==0) { 523 staticData->platform=prototype->platform; 524 } 525 526 if(staticData->minBytesPerChar==0) { 527 staticData->minBytesPerChar=prototype->minBytesPerChar; 528 } 529 530 if(staticData->maxBytesPerChar==0) { 531 staticData->maxBytesPerChar=prototype->maxBytesPerChar; 532 } 533 534 if(staticData->subCharLen==0) { 535 staticData->subCharLen=prototype->subCharLen; 536 if(prototype->subCharLen>0) { 537 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); 538 } 539 } 540 } 541 } 542 543 if(data->ucm->states.outputType<0) { 544 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; 545 } 546 547 if( staticData->subChar1!=0 && 548 (staticData->minBytesPerChar>1 || 549 (staticData->conversionType!=UCNV_MBCS && 550 staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) 551 ) { 552 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); 553 *pErrorCode=U_INVALID_TABLE_FORMAT; 554 } 555 } 556 557 /* return TRUE if a base table was read, FALSE for an extension table */ 558 static UBool 559 readFile(ConvData *data, const char* converterName, 560 UErrorCode *pErrorCode) { 561 char line[1024]; 562 char *end; 563 FileStream *convFile; 564 565 UCMStates *baseStates; 566 UBool dataIsBase; 567 568 if(U_FAILURE(*pErrorCode)) { 569 return FALSE; 570 } 571 572 data->ucm=ucm_open(); 573 574 convFile=T_FileStream_open(converterName, "r"); 575 if(convFile==NULL) { 576 *pErrorCode=U_FILE_ACCESS_ERROR; 577 return FALSE; 578 } 579 580 readHeader(data, convFile, converterName, pErrorCode); 581 if(U_FAILURE(*pErrorCode)) { 582 return FALSE; 583 } 584 585 if(data->ucm->baseName[0]==0) { 586 dataIsBase=TRUE; 587 baseStates=&data->ucm->states; 588 ucm_processStates(baseStates, IGNORE_SISO_CHECK); 589 } else { 590 dataIsBase=FALSE; 591 baseStates=NULL; 592 } 593 594 /* read the base table */ 595 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); 596 if(U_FAILURE(*pErrorCode)) { 597 return FALSE; 598 } 599 600 /* read an extension table if there is one */ 601 while(T_FileStream_readLine(convFile, line, sizeof(line))) { 602 end=uprv_strchr(line, 0); 603 while(line<end && 604 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) { 605 --end; 606 } 607 *end=0; 608 609 if(line[0]=='#' || u_skipWhitespace(line)==end) { 610 continue; /* ignore empty and comment lines */ 611 } 612 613 if(0==uprv_strcmp(line, "CHARMAP")) { 614 /* read the extension table */ 615 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); 616 } else { 617 fprintf(stderr, "unexpected text after the base mapping table\n"); 618 } 619 break; 620 } 621 622 T_FileStream_close(convFile); 623 624 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) { 625 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); 626 *pErrorCode=U_INVALID_TABLE_FORMAT; 627 } 628 629 return dataIsBase; 630 } 631 632 static void 633 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { 634 ConvData baseData; 635 UBool dataIsBase; 636 637 UConverterStaticData *staticData; 638 UCMStates *states, *baseStates; 639 640 if(U_FAILURE(*pErrorCode)) { 641 return; 642 } 643 644 initConvData(data); 645 646 dataIsBase=readFile(data, converterName, pErrorCode); 647 if(U_FAILURE(*pErrorCode)) { 648 return; 649 } 650 651 staticData=&data->staticData; 652 states=&data->ucm->states; 653 654 if(dataIsBase) { 655 /* 656 * Build a normal .cnv file with a base table 657 * and an optional extension table. 658 */ 659 data->cnvData=MBCSOpen(data->ucm); 660 if(data->cnvData==NULL) { 661 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 662 663 } else if(!data->cnvData->isValid(data->cnvData, 664 staticData->subChar, staticData->subCharLen) 665 ) { 666 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); 667 *pErrorCode=U_INVALID_TABLE_FORMAT; 668 669 } else if(staticData->subChar1!=0 && 670 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) 671 ) { 672 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); 673 *pErrorCode=U_INVALID_TABLE_FORMAT; 674 675 } else if( 676 data->ucm->ext->mappingsLength>0 && 677 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) 678 ) { 679 *pErrorCode=U_INVALID_TABLE_FORMAT; 680 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { 681 /* sort the table so that it can be turned into UTF-8-friendly data */ 682 ucm_sortTable(data->ucm->base); 683 } 684 685 if(U_SUCCESS(*pErrorCode)) { 686 if( 687 /* add the base table after ucm_checkBaseExt()! */ 688 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) 689 ) { 690 *pErrorCode=U_INVALID_TABLE_FORMAT; 691 } else { 692 /* 693 * addTable() may have requested moving more mappings to the extension table 694 * if they fit into the base toUnicode table but not into the 695 * base fromUnicode table. 696 * (Especially for UTF-8-friendly fromUnicode tables.) 697 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them 698 * to be excluded from the extension toUnicode data. 699 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into 700 * the base fromUnicode table. 701 */ 702 ucm_moveMappings(data->ucm->base, data->ucm->ext); 703 ucm_sortTable(data->ucm->ext); 704 if(data->ucm->ext->mappingsLength>0) { 705 /* prepare the extension table, if there is one */ 706 data->extData=CnvExtOpen(data->ucm); 707 if(data->extData==NULL) { 708 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 709 } else if( 710 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) 711 ) { 712 *pErrorCode=U_INVALID_TABLE_FORMAT; 713 } 714 } 715 } 716 } 717 } else { 718 /* Build an extension-only .cnv file. */ 719 char baseFilename[500]; 720 char *basename; 721 722 initConvData(&baseData); 723 724 /* assemble a path/filename for data->ucm->baseName */ 725 uprv_strcpy(baseFilename, converterName); 726 basename=(char *)findBasename(baseFilename); 727 uprv_strcpy(basename, data->ucm->baseName); 728 uprv_strcat(basename, ".ucm"); 729 730 /* read the base table */ 731 dataIsBase=readFile(&baseData, baseFilename, pErrorCode); 732 if(U_FAILURE(*pErrorCode)) { 733 return; 734 } else if(!dataIsBase) { 735 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename); 736 *pErrorCode=U_INVALID_TABLE_FORMAT; 737 } else { 738 /* prepare the extension table */ 739 data->extData=CnvExtOpen(data->ucm); 740 if(data->extData==NULL) { 741 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 742 } else { 743 /* fill in gaps in extension file header fields */ 744 UCMapping *m, *mLimit; 745 uint8_t fallbackFlags; 746 747 baseStates=&baseData.ucm->states; 748 if(states->conversionType==UCNV_DBCS) { 749 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); 750 } else if(states->minCharLength==0) { 751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); 752 } 753 if(states->maxCharLength<states->minCharLength) { 754 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); 755 } 756 757 if(staticData->subCharLen==0) { 758 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); 759 staticData->subCharLen=baseData.staticData.subCharLen; 760 } 761 /* 762 * do not copy subChar1 - 763 * only use what is explicitly specified 764 * because it cannot be unset in the extension file header 765 */ 766 767 /* get the fallback flags */ 768 fallbackFlags=0; 769 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; 770 m<mLimit && fallbackFlags!=3; 771 ++m 772 ) { 773 if(m->f==1) { 774 fallbackFlags|=1; 775 } else if(m->f==3) { 776 fallbackFlags|=2; 777 } 778 } 779 780 if(fallbackFlags&1) { 781 staticData->hasFromUnicodeFallback=TRUE; 782 } 783 if(fallbackFlags&2) { 784 staticData->hasToUnicodeFallback=TRUE; 785 } 786 787 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { 788 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); 789 *pErrorCode=U_INVALID_TABLE_FORMAT; 790 791 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { 792 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); 793 *pErrorCode=U_INVALID_TABLE_FORMAT; 794 795 } else if( 796 !ucm_checkValidity(data->ucm->ext, baseStates) || 797 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) 798 ) { 799 *pErrorCode=U_INVALID_TABLE_FORMAT; 800 } else { 801 if(states->maxCharLength>1) { 802 /* 803 * When building a normal .cnv file with a base table 804 * for an MBCS (not SBCS) table with explicit precision flags, 805 * the MBCSAddTable() function marks some mappings for moving 806 * to the extension table. 807 * They fit into the base toUnicode table but not into the 808 * base fromUnicode table. 809 * (Note: We do have explicit precision flags because they are 810 * required for extension table generation, and 811 * ucm_checkBaseExt() verified it.) 812 * 813 * We do not call MBCSAddTable() here (we probably could) 814 * so we need to do the analysis before building the extension table. 815 * We assume that MBCSAddTable() will build a UTF-8-friendly table. 816 * Redundant mappings in the extension table are ok except they cost some size. 817 * 818 * Do this after ucm_checkBaseExt(). 819 */ 820 const MBCSData *mbcsData=MBCSGetDummy(); 821 int32_t needsMove=0; 822 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; 823 m<mLimit; 824 ++m 825 ) { 826 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) { 827 m->f|=MBCS_FROM_U_EXT_FLAG; 828 m->moveFlag=UCM_MOVE_TO_EXT; 829 ++needsMove; 830 } 831 } 832 833 if(needsMove!=0) { 834 ucm_moveMappings(baseData.ucm->base, data->ucm->ext); 835 ucm_sortTable(data->ucm->ext); 836 } 837 } 838 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { 839 *pErrorCode=U_INVALID_TABLE_FORMAT; 840 } 841 } 842 } 843 } 844 845 cleanupConvData(&baseData); 846 } 847 } 848 849 /* 850 * Hey, Emacs, please set the following: 851 * 852 * Local Variables: 853 * indent-tabs-mode: nil 854 * End: 855 * 856 */ 857