1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2000-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: genuca.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created at the end of XX century 14 * created by: Vladimir Weinstein 15 * 16 * This program reads the Franctional UCA table and generates 17 * internal format for UCA table as well as inverse UCA table. 18 * It then writes binary files containing the data: ucadata.dat 19 * & invuca.dat 20 * Change history: 21 * 02/23/2001 grhoten Made it into a tool 22 * 02/23/2001 weiv Moved element & table handling code to i18n 23 * 05/09/2001 weiv Case bits are now in the CEs, not in front 24 */ 25 26 #include "unicode/utypes.h" 27 #include "unicode/putil.h" 28 #include "unicode/udata.h" 29 #include "unicode/uclean.h" 30 #include "ucol_imp.h" 31 #include "genuca.h" 32 #include "uoptions.h" 33 #include "toolutil.h" 34 #include "unewdata.h" 35 #include "cstring.h" 36 #include "cmemory.h" 37 38 #include <stdio.h> 39 40 /* 41 * Global - verbosity 42 */ 43 UBool VERBOSE = FALSE; 44 45 static UVersionInfo UCAVersion; 46 47 #if UCONFIG_NO_COLLATION 48 49 /* dummy UDataInfo cf. udata.h */ 50 static UDataInfo dummyDataInfo = { 51 sizeof(UDataInfo), 52 0, 53 54 U_IS_BIG_ENDIAN, 55 U_CHARSET_FAMILY, 56 U_SIZEOF_UCHAR, 57 0, 58 59 { 0, 0, 0, 0 }, /* dummy dataFormat */ 60 { 0, 0, 0, 0 }, /* dummy formatVersion */ 61 { 0, 0, 0, 0 } /* dummy dataVersion */ 62 }; 63 64 #else 65 66 static const UDataInfo ucaDataInfo={ 67 sizeof(UDataInfo), 68 0, 69 70 U_IS_BIG_ENDIAN, 71 U_CHARSET_FAMILY, 72 sizeof(UChar), 73 0, 74 75 {UCA_DATA_FORMAT_0, UCA_DATA_FORMAT_1, UCA_DATA_FORMAT_2, UCA_DATA_FORMAT_3}, /* dataFormat="UCol" */ 76 /* 03/26/2002 bumped up version since format has changed */ 77 /* 09/16/2002 bumped up version since we went from UColAttributeValue */ 78 /* to int32_t in UColOptionSet */ 79 /* 05/13/2003 This one also updated since we added UCA and UCD versions */ 80 /* to header */ 81 /* 09/11/2003 Adding information required by data swapper */ 82 {UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1, UCA_FORMAT_VERSION_2, UCA_FORMAT_VERSION_3}, /* formatVersion */ 83 {0, 0, 0, 0} /* dataVersion = Unicode Version*/ 84 }; 85 86 static const UDataInfo invUcaDataInfo={ 87 sizeof(UDataInfo), 88 0, 89 90 U_IS_BIG_ENDIAN, 91 U_CHARSET_FAMILY, 92 sizeof(UChar), 93 0, 94 95 {INVUCA_DATA_FORMAT_0, INVUCA_DATA_FORMAT_1, INVUCA_DATA_FORMAT_2, INVUCA_DATA_FORMAT_3}, /* dataFormat="InvC" */ 96 /* 03/26/2002 bumped up version since format has changed */ 97 /* 04/29/2003 2.1 format - we have added UCA version to header */ 98 {INVUCA_FORMAT_VERSION_0, INVUCA_FORMAT_VERSION_1, INVUCA_FORMAT_VERSION_2, INVUCA_FORMAT_VERSION_3}, /* formatVersion */ 99 {0, 0, 0, 0} /* dataVersion = Unicode Version*/ 100 }; 101 102 UCAElements le; 103 104 int32_t readElement(char **from, char *to, char separator, UErrorCode *status) { 105 if(U_FAILURE(*status)) { 106 return 0; 107 } 108 char buffer[1024]; 109 int32_t i = 0; 110 while(**from != separator) { 111 if(**from != ' ') { 112 *(buffer+i++) = **from; 113 } 114 (*from)++; 115 } 116 (*from)++; 117 *(buffer + i) = 0; 118 //*to = (char *)malloc(strlen(buffer)+1); 119 strcpy(to, buffer); 120 return i/2; 121 } 122 123 124 uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UErrorCode *status) { 125 if(U_FAILURE(*status)) { 126 return 0; 127 } 128 uint32_t value = 0; 129 char primsave = '\0'; 130 char secsave = '\0'; 131 char tersave = '\0'; 132 char *primend = primary+4; 133 if(strlen(primary) > 4) { 134 primsave = *primend; 135 *primend = '\0'; 136 } 137 char *secend = secondary+2; 138 if(strlen(secondary) > 2) { 139 secsave = *secend; 140 *secend = '\0'; 141 } 142 char *terend = tertiary+2; 143 if(strlen(tertiary) > 2) { 144 tersave = *terend; 145 *terend = '\0'; 146 } 147 uint32_t primvalue = (uint32_t)((*primary!='\0')?strtoul(primary, &primend, 16):0); 148 uint32_t secvalue = (uint32_t)((*secondary!='\0')?strtoul(secondary, &secend, 16):0); 149 uint32_t tervalue = (uint32_t)((*tertiary!='\0')?strtoul(tertiary, &terend, 16):0); 150 if(primvalue <= 0xFF) { 151 primvalue <<= 8; 152 } 153 154 value = ((primvalue<<UCOL_PRIMARYORDERSHIFT)&UCOL_PRIMARYORDERMASK)| 155 ((secvalue<<UCOL_SECONDARYORDERSHIFT)&UCOL_SECONDARYORDERMASK)| 156 (tervalue&UCOL_TERTIARYORDERMASK); 157 158 if(primsave!='\0') { 159 *primend = primsave; 160 } 161 if(secsave!='\0') { 162 *secend = secsave; 163 } 164 if(tersave!='\0') { 165 *terend = tersave; 166 } 167 return value; 168 } 169 170 static uint32_t inverseTable[0xFFFF][3]; 171 static uint32_t inversePos = 0; 172 static UChar stringContinue[0xFFFF]; 173 static uint32_t sContPos = 0; 174 175 static void addNewInverse(UCAElements *element, UErrorCode *status) { 176 if(U_FAILURE(*status)) { 177 return; 178 } 179 if(VERBOSE && isContinuation(element->CEs[1])) { 180 //fprintf(stdout, "+"); 181 } 182 inversePos++; 183 inverseTable[inversePos][0] = element->CEs[0]; 184 if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) { 185 inverseTable[inversePos][1] = element->CEs[1]; 186 } else { 187 inverseTable[inversePos][1] = 0; 188 } 189 if(element->cSize < 2) { 190 inverseTable[inversePos][2] = element->cPoints[0]; 191 } else { /* add a new store of cruft */ 192 inverseTable[inversePos][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos; 193 memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar)); 194 sContPos += element->cSize+1; 195 } 196 } 197 198 static void insertInverse(UCAElements *element, uint32_t position, UErrorCode *status) { 199 if(U_FAILURE(*status)) { 200 return; 201 } 202 203 if(VERBOSE && isContinuation(element->CEs[1])) { 204 //fprintf(stdout, "+"); 205 } 206 if(position <= inversePos) { 207 /*move stuff around */ 208 uint32_t amountToMove = (inversePos - position+1)*sizeof(inverseTable[0]); 209 uprv_memmove(inverseTable[position+1], inverseTable[position], amountToMove); 210 } 211 inverseTable[position][0] = element->CEs[0]; 212 if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) { 213 inverseTable[position][1] = element->CEs[1]; 214 } else { 215 inverseTable[position][1] = 0; 216 } 217 if(element->cSize < 2) { 218 inverseTable[position][2] = element->cPoints[0]; 219 } else { /* add a new store of cruft */ 220 inverseTable[position][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos; 221 memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar)); 222 sContPos += element->cSize+1; 223 } 224 inversePos++; 225 } 226 227 static void addToExistingInverse(UCAElements *element, uint32_t position, UErrorCode *status) { 228 229 if(U_FAILURE(*status)) { 230 return; 231 } 232 233 if((inverseTable[position][2] & UCOL_INV_SIZEMASK) == 0) { /* single element, have to make new extension place and put both guys there */ 234 stringContinue[sContPos] = (UChar)inverseTable[position][2]; 235 inverseTable[position][2] = ((element->cSize+3) << UCOL_INV_SHIFTVALUE) | sContPos; 236 sContPos++; 237 stringContinue[sContPos++] = 0xFFFF; 238 memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar)); 239 sContPos += element->cSize; 240 stringContinue[sContPos++] = 0xFFFE; 241 } else { /* adding to the already existing continuing table */ 242 uint32_t contIndex = inverseTable[position][2] & UCOL_INV_OFFSETMASK; 243 uint32_t contSize = (inverseTable[position][2] & UCOL_INV_SIZEMASK) >> UCOL_INV_SHIFTVALUE; 244 245 if(contIndex+contSize < sContPos) { 246 /*fprintf(stderr, ".", sContPos, contIndex+contSize);*/ 247 memcpy(stringContinue+contIndex+contSize+element->cSize+1, stringContinue+contIndex+contSize, (element->cSize+1)*sizeof(UChar)); 248 } 249 250 stringContinue[contIndex+contSize-1] = 0xFFFF; 251 memcpy(stringContinue+contIndex+contSize, element->cPoints, element->cSize*sizeof(UChar)); 252 sContPos += element->cSize+1; 253 stringContinue[contIndex+contSize+element->cSize] = 0xFFFE; 254 255 inverseTable[position][2] = ((contSize+element->cSize+1) << UCOL_INV_SHIFTVALUE) | contIndex; 256 } 257 } 258 259 /* 260 * Takes two CEs (lead and continuation) and 261 * compares them as CEs should be compared: 262 * primary vs. primary, secondary vs. secondary 263 * tertiary vs. tertiary 264 */ 265 static int32_t compareCEs(uint32_t *source, uint32_t *target) { 266 uint32_t s1 = source[0], s2, t1 = target[0], t2; 267 if(isContinuation(source[1])) { 268 s2 = source[1]; 269 } else { 270 s2 = 0; 271 } 272 if(isContinuation(target[1])) { 273 t2 = target[1]; 274 } else { 275 t2 = 0; 276 } 277 278 uint32_t s = 0, t = 0; 279 if(s1 == t1 && s2 == t2) { 280 return 0; 281 } 282 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16); 283 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16); 284 if(s < t) { 285 return -1; 286 } else if(s > t) { 287 return 1; 288 } else { 289 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8; 290 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8; 291 if(s < t) { 292 return -1; 293 } else if(s > t) { 294 return 1; 295 } else { 296 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF); 297 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF); 298 if(s < t) { 299 return -1; 300 } else { 301 return 1; 302 } 303 } 304 } 305 } 306 307 static uint32_t addToInverse(UCAElements *element, UErrorCode *status) { 308 uint32_t position = inversePos; 309 uint32_t saveElement = element->CEs[0]; 310 int32_t compResult = 0; 311 element->CEs[0] &= 0xFFFFFF3F; 312 if(element->noOfCEs == 1) { 313 element->CEs[1] = 0; 314 } 315 if(inversePos == 0) { 316 inverseTable[0][0] = inverseTable[0][1] = inverseTable[0][2] = 0; 317 addNewInverse(element, status); 318 } else if(compareCEs(inverseTable[inversePos], element->CEs) > 0) { 319 while((compResult = compareCEs(inverseTable[--position], element->CEs)) > 0); 320 if(VERBOSE) { fprintf(stdout, "p:%u ", (int)position); } 321 if(compResult == 0) { 322 addToExistingInverse(element, position, status); 323 } else { 324 insertInverse(element, position+1, status); 325 } 326 } else if(compareCEs(inverseTable[inversePos], element->CEs) == 0) { 327 addToExistingInverse(element, inversePos, status); 328 } else { 329 addNewInverse(element, status); 330 } 331 element->CEs[0] = saveElement; 332 if(VERBOSE) { fprintf(stdout, "+"); } 333 return inversePos; 334 } 335 336 static InverseUCATableHeader *assembleInverseTable(UErrorCode *status) 337 { 338 InverseUCATableHeader *result = NULL; 339 uint32_t headerByteSize = paddedsize(sizeof(InverseUCATableHeader)); 340 uint32_t inverseTableByteSize = (inversePos+2)*sizeof(uint32_t)*3; 341 uint32_t contsByteSize = sContPos * sizeof(UChar); 342 uint32_t i = 0; 343 344 result = (InverseUCATableHeader *)uprv_malloc(headerByteSize + inverseTableByteSize + contsByteSize); 345 uprv_memset(result, 0, headerByteSize + inverseTableByteSize + contsByteSize); 346 if(result != NULL) { 347 result->byteSize = headerByteSize + inverseTableByteSize + contsByteSize; 348 349 inversePos++; 350 inverseTable[inversePos][0] = 0xFFFFFFFF; 351 inverseTable[inversePos][1] = 0xFFFFFFFF; 352 inverseTable[inversePos][2] = 0x0000FFFF; 353 inversePos++; 354 355 for(i = 2; i<inversePos; i++) { 356 if(compareCEs(inverseTable[i-1], inverseTable[i]) > 0) { 357 fprintf(stderr, "Error at %i: %08X & %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i][0]); 358 } else if(inverseTable[i-1][0] == inverseTable[i][0] && !(inverseTable[i-1][1] < inverseTable[i][1])) { 359 fprintf(stderr, "Continuation error at %i: %08X %08X & %08X %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i-1][1], (int)inverseTable[i][0], (int)inverseTable[i][1]); 360 } 361 } 362 363 result->tableSize = inversePos; 364 result->contsSize = sContPos; 365 366 result->table = headerByteSize; 367 result->conts = headerByteSize + inverseTableByteSize; 368 369 memcpy((uint8_t *)result + result->table, inverseTable, inverseTableByteSize); 370 memcpy((uint8_t *)result + result->conts, stringContinue, contsByteSize); 371 372 } else { 373 *status = U_MEMORY_ALLOCATION_ERROR; 374 return NULL; 375 } 376 377 return result; 378 } 379 380 381 static void writeOutInverseData(InverseUCATableHeader *data, 382 const char *outputDir, 383 const char *copyright, 384 UErrorCode *status) 385 { 386 UNewDataMemory *pData; 387 388 long dataLength; 389 390 UDataInfo invUcaInfo; 391 uprv_memcpy(&invUcaInfo, &invUcaDataInfo, sizeof(UDataInfo)); 392 u_getUnicodeVersion(invUcaInfo.dataVersion); 393 394 pData=udata_create(outputDir, INVC_DATA_TYPE, INVC_DATA_NAME, &invUcaInfo, 395 copyright, status); 396 397 if(U_FAILURE(*status)) { 398 fprintf(stderr, "Error: unable to create %s"INVC_DATA_NAME", error %s\n", outputDir, u_errorName(*status)); 399 return; 400 } 401 402 /* write the data to the file */ 403 if (VERBOSE) { 404 fprintf(stdout, "Writing out inverse UCA table: %s%c%s.%s\n", outputDir, U_FILE_SEP_CHAR, 405 INVC_DATA_NAME, 406 INVC_DATA_TYPE); 407 } 408 udata_writeBlock(pData, data, data->byteSize); 409 410 /* finish up */ 411 dataLength=udata_finish(pData, status); 412 if(U_FAILURE(*status)) { 413 fprintf(stderr, "Error: error %d writing the output file\n", *status); 414 return; 415 } 416 } 417 418 419 420 static int32_t hex2num(char hex) { 421 if(hex>='0' && hex <='9') { 422 return hex-'0'; 423 } else if(hex>='a' && hex<='f') { 424 return hex-'a'+10; 425 } else if(hex>='A' && hex<='F') { 426 return hex-'A'+10; 427 } else { 428 return 0; 429 } 430 } 431 432 UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UErrorCode *status) { 433 char buffer[2048], primary[100], secondary[100], tertiary[100]; 434 UBool detectedContraction; 435 int32_t i = 0; 436 unsigned int theValue; 437 char *pointer = NULL; 438 char *commentStart = NULL; 439 char *startCodePoint = NULL; 440 char *endCodePoint = NULL; 441 char *spacePointer = NULL; 442 char *dashPointer = NULL; 443 char *result = fgets(buffer, 2048, data); 444 int32_t buflen = (int32_t)uprv_strlen(buffer); 445 if(U_FAILURE(*status)) { 446 return 0; 447 } 448 *primary = *secondary = *tertiary = '\0'; 449 if(result == NULL) { 450 if(feof(data)) { 451 return NULL; 452 } else { 453 fprintf(stderr, "empty line but no EOF!\n"); 454 *status = U_INVALID_FORMAT_ERROR; 455 return NULL; 456 } 457 } 458 while(buflen>0 && (buffer[buflen-1] == '\r' || buffer[buflen-1] == '\n')) { 459 buffer[--buflen] = 0; 460 } 461 462 if(buffer[0] == 0 || buffer[0] == '#') { 463 return NULL; // just a comment, skip whole line 464 } 465 466 UCAElements *element = ≤ //(UCAElements *)malloc(sizeof(UCAElements)); 467 468 enum ActionType { 469 READCE, 470 READHEX, 471 READUCAVERSION 472 }; 473 474 // Directives. 475 if(buffer[0] == '[') { 476 uint32_t cnt = 0; 477 static const struct { 478 char name[128]; 479 uint32_t *what; 480 ActionType what_to_do; 481 } vt[] = { {"[first tertiary ignorable", consts->UCA_FIRST_TERTIARY_IGNORABLE, READCE}, 482 {"[last tertiary ignorable", consts->UCA_LAST_TERTIARY_IGNORABLE, READCE}, 483 {"[first secondary ignorable", consts->UCA_FIRST_SECONDARY_IGNORABLE, READCE}, 484 {"[last secondary ignorable", consts->UCA_LAST_SECONDARY_IGNORABLE, READCE}, 485 {"[first primary ignorable", consts->UCA_FIRST_PRIMARY_IGNORABLE, READCE}, 486 {"[last primary ignorable", consts->UCA_LAST_PRIMARY_IGNORABLE, READCE}, 487 {"[first variable", consts->UCA_FIRST_VARIABLE, READCE}, 488 {"[last variable", consts->UCA_LAST_VARIABLE, READCE}, 489 {"[first regular", consts->UCA_FIRST_NON_VARIABLE, READCE}, 490 {"[last regular", consts->UCA_LAST_NON_VARIABLE, READCE}, 491 {"[first implicit", consts->UCA_FIRST_IMPLICIT, READCE}, 492 {"[last implicit", consts->UCA_LAST_IMPLICIT, READCE}, 493 {"[first trailing", consts->UCA_FIRST_TRAILING, READCE}, 494 {"[last trailing", consts->UCA_LAST_TRAILING, READCE}, 495 496 {"[fixed top", &consts->UCA_PRIMARY_TOP_MIN, READHEX}, 497 {"[fixed first implicit byte", &consts->UCA_PRIMARY_IMPLICIT_MIN, READHEX}, 498 {"[fixed last implicit byte", &consts->UCA_PRIMARY_IMPLICIT_MAX, READHEX}, 499 {"[fixed first trail byte", &consts->UCA_PRIMARY_TRAILING_MIN, READHEX}, 500 {"[fixed last trail byte", &consts->UCA_PRIMARY_TRAILING_MAX, READHEX}, 501 {"[fixed first special byte", &consts->UCA_PRIMARY_SPECIAL_MIN, READHEX}, 502 {"[fixed last special byte", &consts->UCA_PRIMARY_SPECIAL_MAX, READHEX}, 503 {"[variable top = ", &t->options->variableTopValue, READHEX}, 504 {"[UCA version = ", NULL, READUCAVERSION} 505 }; 506 for (cnt = 0; cnt<sizeof(vt)/sizeof(vt[0]); cnt++) { 507 uint32_t vtLen = (uint32_t)uprv_strlen(vt[cnt].name); 508 if(uprv_strncmp(buffer, vt[cnt].name, vtLen) == 0) { 509 element->variableTop = TRUE; 510 if(vt[cnt].what_to_do == READHEX) { 511 if(sscanf(buffer+vtLen, "%4x", &theValue) != 1) /* read first code point */ 512 { 513 fprintf(stderr, " scanf(hex) failed on !\n "); 514 } 515 *(vt[cnt].what) = (UChar)theValue; 516 //if(cnt == 1) { // first implicit 517 // we need to set the value for top next 518 //uint32_t nextTop = ucol_prv_calculateImplicitPrimary(0x4E00); // CJK base 519 //consts->UCA_NEXT_TOP_VALUE = theValue<<24 | 0x030303; 520 //} 521 } else if (vt[cnt].what_to_do == READCE) { /* vt[cnt].what_to_do == READCE */ 522 // TODO: combine & clean up the two CE parsers 523 pointer = strchr(buffer+vtLen, '['); 524 if(pointer) { 525 pointer++; 526 element->sizePrim[0]=readElement(&pointer, primary, ',', status); 527 element->sizeSec[0]=readElement(&pointer, secondary, ',', status); 528 element->sizeTer[0]=readElement(&pointer, tertiary, ']', status); 529 530 vt[cnt].what[0] = getSingleCEValue(primary, secondary, tertiary, status); 531 if(element->sizePrim[0] > 2 || element->sizeSec[0] > 1 || element->sizeTer[0] > 1) { 532 uint32_t CEi = 1; 533 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */ 534 if(2*CEi<element->sizePrim[i]) { 535 value |= ((hex2num(*(primary+4*CEi))&0xF)<<28); 536 value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24); 537 } 538 539 if(2*CEi+1<element->sizePrim[i]) { 540 value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20); 541 value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16); 542 } 543 544 if(CEi<element->sizeSec[i]) { 545 value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12); 546 value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8); 547 } 548 549 if(CEi<element->sizeTer[i]) { 550 value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4); 551 value |= (hex2num(*(tertiary+2*CEi+1))&0xF); 552 } 553 554 CEi++; 555 556 vt[cnt].what[1] = value; 557 //element->CEs[CEindex++] = value; 558 } else { 559 vt[cnt].what[1] = 0; 560 } 561 } else { 562 fprintf(stderr, "Failed to read a CE from line %s\n", buffer); 563 } 564 } else { //vt[cnt].what_to_do == READUCAVERSION 565 u_versionFromString(UCAVersion, buffer+vtLen); 566 if(VERBOSE) { 567 fprintf(stdout, "UCA version [%hu.%hu.%hu.%hu]\n", UCAVersion[0], UCAVersion[1], UCAVersion[2], UCAVersion[3]); 568 } 569 } 570 //element->cPoints[0] = (UChar)theValue; 571 //return element; 572 return NULL; 573 } 574 } 575 fprintf(stderr, "Warning: unrecognized option: %s\n", buffer); 576 //*status = U_INVALID_FORMAT_ERROR; 577 return NULL; 578 } 579 element->variableTop = FALSE; 580 581 startCodePoint = buffer; 582 endCodePoint = strchr(startCodePoint, ';'); 583 584 if(endCodePoint == 0) { 585 fprintf(stderr, "error - line with no code point!\n"); 586 *status = U_INVALID_FORMAT_ERROR; /* No code point - could be an error, but probably only an empty line */ 587 return NULL; 588 } else { 589 *(endCodePoint) = 0; 590 } 591 592 memset(element, 0, sizeof(*element)); 593 594 element->cPoints = element->uchars; 595 596 spacePointer = strchr(buffer, ' '); 597 if(sscanf(buffer, "%4x", &theValue) != 1) /* read first code point */ 598 { 599 fprintf(stderr, " scanf(hex) failed!\n "); 600 } 601 element->cPoints[0] = (UChar)theValue; 602 603 if(spacePointer == 0) { 604 detectedContraction = FALSE; 605 element->cSize = 1; 606 } else { 607 dashPointer = strchr(buffer, '|'); 608 if (dashPointer != NULL) { 609 // prefix characters 610 element->prefixChars[0] = (UChar)theValue; 611 element->prefixSize = 1; 612 element->prefix = element->prefixChars; 613 sscanf(dashPointer+1, "%4x", &theValue); 614 element->cPoints[0] = (UChar)theValue; 615 element->cSize = 1; 616 } 617 else { 618 // Contractions or surrogate characters. 619 i = 1; 620 detectedContraction = TRUE; 621 while(spacePointer != NULL) { 622 sscanf(spacePointer+1, "%4x", &theValue); 623 element->cPoints[i++] = (UChar)theValue; 624 spacePointer = strchr(spacePointer+1, ' '); 625 } 626 element->cSize = i; 627 } 628 629 630 //fprintf(stderr, "Number of codepoints in contraction: %i\n", i); 631 } 632 633 startCodePoint = endCodePoint+1; 634 635 commentStart = strchr(startCodePoint, '#'); 636 if(commentStart == NULL) { 637 commentStart = strlen(startCodePoint) + startCodePoint; 638 } 639 640 i = 0; 641 uint32_t CEindex = 0; 642 element->noOfCEs = 0; 643 for(;;) { 644 endCodePoint = strchr(startCodePoint, ']'); 645 if(endCodePoint == NULL || endCodePoint >= commentStart) { 646 break; 647 } 648 pointer = strchr(startCodePoint, '['); 649 pointer++; 650 651 element->sizePrim[i]=readElement(&pointer, primary, ',', status); 652 element->sizeSec[i]=readElement(&pointer, secondary, ',', status); 653 element->sizeTer[i]=readElement(&pointer, tertiary, ']', status); 654 655 656 /* I want to get the CEs entered right here, including continuation */ 657 element->CEs[CEindex++] = getSingleCEValue(primary, secondary, tertiary, status); 658 659 uint32_t CEi = 1; 660 while(2*CEi<element->sizePrim[i] || CEi<element->sizeSec[i] || CEi<element->sizeTer[i]) { 661 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */ 662 if(2*CEi<element->sizePrim[i]) { 663 value |= ((hex2num(*(primary+4*CEi))&0xF)<<28); 664 value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24); 665 } 666 667 if(2*CEi+1<element->sizePrim[i]) { 668 value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20); 669 value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16); 670 } 671 672 if(CEi<element->sizeSec[i]) { 673 value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12); 674 value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8); 675 } 676 677 if(CEi<element->sizeTer[i]) { 678 value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4); 679 value |= (hex2num(*(tertiary+2*CEi+1))&0xF); 680 } 681 682 CEi++; 683 684 element->CEs[CEindex++] = value; 685 } 686 687 startCodePoint = endCodePoint+1; 688 i++; 689 } 690 element->noOfCEs = CEindex; 691 #if 0 692 element->isThai = UCOL_ISTHAIPREVOWEL(element->cPoints[0]); 693 #endif 694 // we don't want any strange stuff after useful data! 695 if (pointer == NULL) { 696 /* huh? Did we get ']' without the '['? Pair your brackets! */ 697 *status=U_INVALID_FORMAT_ERROR; 698 } 699 else { 700 while(pointer < commentStart) { 701 if(*pointer != ' ' && *pointer != '\t') 702 { 703 *status=U_INVALID_FORMAT_ERROR; 704 break; 705 } 706 pointer++; 707 } 708 } 709 // Check for valid bytes in CE weights. 710 // TODO: Tighten this so that it allows 03 & 04 in intermediate bytes 711 // but not in final bytes. 712 // See http://bugs.icu-project.org/trac/ticket/7167 713 for (i = 0; i < (int32_t)CEindex; ++i) { 714 uint32_t value = element->CEs[i]; 715 uint8_t bytes[4] = { 716 (uint8_t)(value >> 24), 717 (uint8_t)(value >> 16), 718 (uint8_t)(value >> 8), 719 (uint8_t)(value & UCOL_NEW_TERTIARYORDERMASK) 720 }; 721 for (int j = 0; j < 4; ++j) { 722 uint8_t maxByte = 723 (isContinuation(value) || j == 1) ? 724 UCOL_BYTE_FIRST_TAILORED : 725 UCOL_BYTE_COMMON; 726 if (0 != bytes[j] && bytes[j] < maxByte) { 727 fprintf(stderr, "Warning: invalid UCA weight byte %02X for %s\n", bytes[j], buffer); 728 // TODO: return NULL; 729 } 730 } 731 } 732 733 if(U_FAILURE(*status)) { 734 fprintf(stderr, "problem putting stuff in hash table %s\n", u_errorName(*status)); 735 *status = U_INTERNAL_PROGRAM_ERROR; 736 return NULL; 737 } 738 739 return element; 740 } 741 742 743 void writeOutData(UCATableHeader *data, 744 UCAConstants *consts, 745 UChar contractions[][3], 746 uint32_t noOfcontractions, 747 const char *outputDir, 748 const char *copyright, 749 UErrorCode *status) 750 { 751 if(U_FAILURE(*status)) { 752 return; 753 } 754 755 uint32_t size = data->size; 756 757 data->UCAConsts = data->size; 758 data->size += paddedsize(sizeof(UCAConstants)); 759 760 if(noOfcontractions != 0) { 761 contractions[noOfcontractions][0] = 0; 762 contractions[noOfcontractions][1] = 0; 763 contractions[noOfcontractions][2] = 0; 764 noOfcontractions++; 765 766 767 data->contractionUCACombos = data->size; 768 data->contractionUCACombosWidth = 3; 769 data->contractionUCACombosSize = noOfcontractions; 770 data->size += paddedsize((noOfcontractions*3*sizeof(UChar))); 771 } 772 773 UNewDataMemory *pData; 774 775 long dataLength; 776 UDataInfo ucaInfo; 777 uprv_memcpy(&ucaInfo, &ucaDataInfo, sizeof(UDataInfo)); 778 u_getUnicodeVersion(ucaInfo.dataVersion); 779 780 pData=udata_create(outputDir, UCA_DATA_TYPE, UCA_DATA_NAME, &ucaInfo, 781 copyright, status); 782 783 if(U_FAILURE(*status)) { 784 fprintf(stderr, "Error: unable to create %s"UCA_DATA_NAME", error %s\n", outputDir, u_errorName(*status)); 785 return; 786 } 787 788 /* write the data to the file */ 789 if (VERBOSE) { 790 fprintf(stdout, "Writing out UCA table: %s%c%s.%s\n", outputDir, 791 U_FILE_SEP_CHAR, 792 U_ICUDATA_NAME "_" UCA_DATA_NAME, 793 UCA_DATA_TYPE); 794 } 795 udata_writeBlock(pData, data, size); 796 797 // output the constants here 798 udata_writeBlock(pData, consts, sizeof(UCAConstants)); 799 800 if(noOfcontractions != 0) { 801 udata_writeBlock(pData, contractions, noOfcontractions*3*sizeof(UChar)); 802 udata_writePadding(pData, paddedsize((noOfcontractions*3*sizeof(UChar))) - noOfcontractions*3*sizeof(uint16_t)); 803 } 804 805 /* finish up */ 806 dataLength=udata_finish(pData, status); 807 if(U_FAILURE(*status)) { 808 fprintf(stderr, "Error: error %d writing the output file\n", *status); 809 return; 810 } 811 } 812 813 enum { 814 /* 815 * Maximum number of UCA contractions we can store. 816 * May need to be increased for a new Unicode version. 817 */ 818 MAX_UCA_CONTRACTION_CES=2048 819 }; 820 821 static int32_t 822 write_uca_table(const char *filename, 823 const char *outputDir, 824 const char *copyright, 825 UErrorCode *status) 826 { 827 FILE *data = fopen(filename, "r"); 828 if(data == NULL) { 829 fprintf(stderr, "Couldn't open file: %s\n", filename); 830 return -1; 831 } 832 uint32_t line = 0; 833 UCAElements *element = NULL; 834 UChar variableTopValue = 0; 835 UCATableHeader *myD = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader)); 836 /* test for NULL */ 837 if(myD == NULL) { 838 *status = U_MEMORY_ALLOCATION_ERROR; 839 fclose(data); 840 return 0; 841 } 842 uprv_memset(myD, 0, sizeof(UCATableHeader)); 843 UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet)); 844 /* test for NULL */ 845 if(opts == NULL) { 846 *status = U_MEMORY_ALLOCATION_ERROR; 847 uprv_free(myD); 848 fclose(data); 849 return 0; 850 } 851 uprv_memset(opts, 0, sizeof(UColOptionSet)); 852 UChar contractionCEs[MAX_UCA_CONTRACTION_CES][3]; 853 uprv_memset(contractionCEs, 0, sizeof(contractionCEs)); 854 uint32_t noOfContractions = 0; 855 UCAConstants consts; 856 uprv_memset(&consts, 0, sizeof(consts)); 857 #if 0 858 UCAConstants consts = { 859 UCOL_RESET_TOP_VALUE, 860 UCOL_FIRST_PRIMARY_IGNORABLE, 861 UCOL_LAST_PRIMARY_IGNORABLE, 862 UCOL_LAST_PRIMARY_IGNORABLE_CONT, 863 UCOL_FIRST_SECONDARY_IGNORABLE, 864 UCOL_LAST_SECONDARY_IGNORABLE, 865 UCOL_FIRST_TERTIARY_IGNORABLE, 866 UCOL_LAST_TERTIARY_IGNORABLE, 867 UCOL_FIRST_VARIABLE, 868 UCOL_LAST_VARIABLE, 869 UCOL_FIRST_NON_VARIABLE, 870 UCOL_LAST_NON_VARIABLE, 871 872 UCOL_NEXT_TOP_VALUE, 873 /* 874 UCOL_NEXT_FIRST_PRIMARY_IGNORABLE, 875 UCOL_NEXT_LAST_PRIMARY_IGNORABLE, 876 UCOL_NEXT_FIRST_SECONDARY_IGNORABLE, 877 UCOL_NEXT_LAST_SECONDARY_IGNORABLE, 878 UCOL_NEXT_FIRST_TERTIARY_IGNORABLE, 879 UCOL_NEXT_LAST_TERTIARY_IGNORABLE, 880 UCOL_NEXT_FIRST_VARIABLE, 881 UCOL_NEXT_LAST_VARIABLE, 882 */ 883 884 PRIMARY_IMPLICIT_MIN, 885 PRIMARY_IMPLICIT_MAX 886 }; 887 #endif 888 889 890 uprv_memset(inverseTable, 0xDA, sizeof(int32_t)*3*0xFFFF); 891 892 opts->variableTopValue = variableTopValue; 893 opts->strength = UCOL_TERTIARY; 894 opts->frenchCollation = UCOL_OFF; 895 opts->alternateHandling = UCOL_NON_IGNORABLE; /* attribute for handling variable elements*/ 896 opts->caseFirst = UCOL_OFF; /* who goes first, lower case or uppercase */ 897 opts->caseLevel = UCOL_OFF; /* do we have an extra case level */ 898 opts->normalizationMode = UCOL_OFF; /* attribute for normalization */ 899 opts->hiraganaQ = UCOL_OFF; /* attribute for JIS X 4061, used only in Japanese */ 900 opts->numericCollation = UCOL_OFF; 901 myD->jamoSpecial = FALSE; 902 903 tempUCATable *t = uprv_uca_initTempTable(myD, opts, NULL, IMPLICIT_TAG, LEAD_SURROGATE_TAG, status); 904 if(U_FAILURE(*status)) 905 { 906 fprintf(stderr, "Failed to init UCA temp table: %s\n", u_errorName(*status)); 907 uprv_free(opts); 908 uprv_free(myD); 909 fclose(data); 910 return -1; 911 } 912 913 #if 0 914 IMPLICIT_TAG = 9, 915 /* 916 ***************************************************************************************** 917 * NON_CHARACTER FDD0 - FDEF, FFFE, FFFF, 1FFFE, 1FFFF, 2FFFE, 2FFFF,...e.g. **FFFE, **FFFF 918 ****************************************************************************************** 919 */ 920 #endif 921 922 // * set to zero 923 struct { 924 UChar32 start; 925 UChar32 end; 926 int32_t value; 927 } ranges[] = 928 { 929 #if 0 930 {0xAC00, 0xD7AF, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) }, //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/ 931 {0xD800, 0xDBFF, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24) }, //1 LEAD_SURROGATE_TAG, /* D800-DBFF*/ 932 {0xDC00, 0xDFFF, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) }, //2 TRAIL_SURROGATE DC00-DFFF 933 {0x3400, 0x4DB5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //3 CJK_IMPLICIT_TAG, /* 0x3400-0x4DB5*/ 934 {0x4E00, 0x9FA5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //4 CJK_IMPLICIT_TAG, /* 0x4E00-0x9FA5*/ 935 {0xF900, 0xFA2D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //5 CJK_IMPLICIT_TAG, /* 0xF900-0xFA2D*/ 936 {0x20000, 0x2A6D6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //6 CJK_IMPLICIT_TAG, /* 0x20000-0x2A6D6*/ 937 {0x2F800, 0x2FA1D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //7 CJK_IMPLICIT_TAG, /* 0x2F800-0x2FA1D*/ 938 #endif 939 {0xAC00, 0xD7B0, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) }, //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/ 940 //{0xD800, 0xDC00, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24) }, //1 LEAD_SURROGATE_TAG, /* D800-DBFF*/ 941 {0xDC00, 0xE000, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) }, //2 TRAIL_SURROGATE DC00-DFFF 942 // Now directly handled in the collation code by the swapCJK function. 943 //{0x3400, 0x4DB6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //3 CJK_IMPLICIT_TAG, /* 0x3400-0x4DB5*/ 944 //{0x4E00, 0x9FA6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //4 CJK_IMPLICIT_TAG, /* 0x4E00-0x9FA5*/ 945 //{0xF900, 0xFA2E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //5 CJK_IMPLICIT_TAG, /* 0xF900-0xFA2D*/ 946 //{0x20000, 0x2A6D7, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //6 CJK_IMPLICIT_TAG, /* 0x20000-0x2A6D6*/ 947 //{0x2F800, 0x2FA1E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //7 CJK_IMPLICIT_TAG, /* 0x2F800-0x2FA1D*/ 948 }; 949 uint32_t i = 0; 950 951 for(i = 0; i<sizeof(ranges)/sizeof(ranges[0]); i++) { 952 /*ucmpe32_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value); */ 953 utrie_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value, TRUE); 954 } 955 956 957 int32_t surrogateCount = 0; 958 while(!feof(data)) { 959 if(U_FAILURE(*status)) { 960 fprintf(stderr, "Something returned an error %i (%s) while processing line %u of %s. Exiting...\n", 961 *status, u_errorName(*status), (int)line, filename); 962 exit(*status); 963 } 964 965 element = readAnElement(data, t, &consts, status); 966 line++; 967 if(VERBOSE) { 968 fprintf(stdout, "%u ", (int)line); 969 } 970 if(element != NULL) { 971 // we have read the line, now do something sensible with the read data! 972 973 // Below stuff was taken care of in readAnElement 974 //if(element->variableTop == TRUE && variableTopValue == 0) { 975 // t->options->variableTopValue = element->cPoints[0]; 976 //} 977 978 // if element is a contraction, we want to add it to contractions 979 if(element->cSize > 1 && element->cPoints[0] != 0xFDD0) { // this is a contraction 980 if(UTF_IS_LEAD(element->cPoints[0]) && UTF_IS_TRAIL(element->cPoints[1]) && element->cSize == 2) { 981 surrogateCount++; 982 } else { 983 if(noOfContractions>=MAX_UCA_CONTRACTION_CES) { 984 fprintf(stderr, 985 "\nMore than %d contractions. Please increase MAX_UCA_CONTRACTION_CES in genuca.cpp. " 986 "Exiting...\n", 987 (int)MAX_UCA_CONTRACTION_CES); 988 exit(*status); 989 } 990 contractionCEs[noOfContractions][0] = element->cPoints[0]; 991 contractionCEs[noOfContractions][1] = element->cPoints[1]; 992 if(element->cSize > 2) { // the third one 993 contractionCEs[noOfContractions][2] = element->cPoints[2]; 994 } else { 995 contractionCEs[noOfContractions][2] = 0; 996 } 997 noOfContractions++; 998 } 999 } 1000 else { 1001 // TODO (claireho): does this work? Need more tests 1002 // The following code is to handle the UCA pre-context rules 1003 // for L/l with middle dot. We share the structures for contractionCombos. 1004 // The format for pre-context character is 1005 // contractionCEs[0]: codepoint in element->cPoints[0] 1006 // contractionCEs[1]: '\0' to differentiate with contractions. 1007 // contractionCEs[2]: prefix char 1008 if (element->prefixSize>0) { 1009 if(noOfContractions>=MAX_UCA_CONTRACTION_CES) { 1010 fprintf(stderr, 1011 "\nMore than %d contractions. Please increase MAX_UCA_CONTRACTION_CES in genuca.cpp. " 1012 "Exiting...\n", 1013 (int)MAX_UCA_CONTRACTION_CES); 1014 exit(*status); 1015 } 1016 contractionCEs[noOfContractions][0]=element->cPoints[0]; 1017 contractionCEs[noOfContractions][1]='\0'; 1018 contractionCEs[noOfContractions][2]=element->prefixChars[0]; 1019 noOfContractions++; 1020 } 1021 1022 } 1023 1024 /* we're first adding to inverse, because addAnElement will reverse the order */ 1025 /* of code points and stuff... we don't want that to happen */ 1026 addToInverse(element, status); 1027 if(!(element->cSize > 1 && element->cPoints[0] == 0xFDD0)) { 1028 uprv_uca_addAnElement(t, element, status); 1029 } 1030 } 1031 } 1032 1033 if(UCAVersion[0] == 0 && UCAVersion[1] == 0 && UCAVersion[2] == 0 && UCAVersion[3] == 0) { 1034 fprintf(stderr, "UCA version not specified. Cannot create data file!\n"); 1035 uprv_uca_closeTempTable(t); 1036 uprv_free(opts); 1037 uprv_free(myD); 1038 fclose(data); 1039 return -1; 1040 } 1041 /* { 1042 uint32_t trieWord = utrie_get32(t->mapping, 0xDC01, NULL); 1043 }*/ 1044 1045 if (VERBOSE) { 1046 fprintf(stdout, "\nLines read: %u\n", (int)line); 1047 fprintf(stdout, "Surrogate count: %i\n", (int)surrogateCount); 1048 fprintf(stdout, "Raw data breakdown:\n"); 1049 /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/ 1050 fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions); 1051 fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize); 1052 fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position); 1053 } 1054 1055 1056 /* produce canonical closure for table */ 1057 /* first set up constants for implicit calculation */ 1058 uprv_uca_initImplicitConstants(status); 1059 /* do the closure */ 1060 int32_t noOfClosures = uprv_uca_canonicalClosure(t, NULL, status); 1061 if(noOfClosures != 0) { 1062 fprintf(stderr, "Warning: %i canonical closures occured!\n", (int)noOfClosures); 1063 } 1064 1065 /* test */ 1066 UCATableHeader *myData = uprv_uca_assembleTable(t, status); 1067 1068 if (VERBOSE) { 1069 fprintf(stdout, "Compacted data breakdown:\n"); 1070 /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/ 1071 fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions); 1072 fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize); 1073 fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position); 1074 } 1075 1076 if(U_FAILURE(*status)) { 1077 fprintf(stderr, "Error creating table: %s\n", u_errorName(*status)); 1078 uprv_uca_closeTempTable(t); 1079 uprv_free(opts); 1080 uprv_free(myD); 1081 fclose(data); 1082 return -1; 1083 } 1084 1085 /* populate the version info struct with version info*/ 1086 myData->version[0] = UCOL_BUILDER_VERSION; 1087 myData->version[1] = UCAVersion[0]; 1088 myData->version[2] = UCAVersion[1]; 1089 myData->version[3] = UCAVersion[2]; 1090 /*TODO:The fractional rules version should be taken from FractionalUCA.txt*/ 1091 // Removed this macro. Instead, we use the fields below 1092 //myD->version[1] = UCOL_FRACTIONAL_UCA_VERSION; 1093 //myD->UCAVersion = UCAVersion; // out of FractionalUCA.txt 1094 uprv_memcpy(myData->UCAVersion, UCAVersion, sizeof(UVersionInfo)); 1095 u_getUnicodeVersion(myData->UCDVersion); 1096 1097 writeOutData(myData, &consts, contractionCEs, noOfContractions, outputDir, copyright, status); 1098 1099 InverseUCATableHeader *inverse = assembleInverseTable(status); 1100 uprv_memcpy(inverse->UCAVersion, UCAVersion, sizeof(UVersionInfo)); 1101 writeOutInverseData(inverse, outputDir, copyright, status); 1102 1103 uprv_uca_closeTempTable(t); 1104 uprv_free(myD); 1105 uprv_free(opts); 1106 1107 1108 uprv_free(myData); 1109 uprv_free(inverse); 1110 fclose(data); 1111 1112 return 0; 1113 } 1114 1115 #endif /* #if !UCONFIG_NO_COLLATION */ 1116 1117 static UOption options[]={ 1118 UOPTION_HELP_H, /* 0 Numbers for those who*/ 1119 UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */ 1120 UOPTION_COPYRIGHT, /* 2 */ 1121 UOPTION_VERSION, /* 3 */ 1122 UOPTION_DESTDIR, /* 4 */ 1123 UOPTION_SOURCEDIR, /* 5 */ 1124 UOPTION_VERBOSE, /* 6 */ 1125 UOPTION_ICUDATADIR /* 7 */ 1126 /* weiv can't count :))))) */ 1127 }; 1128 1129 int main(int argc, char* argv[]) { 1130 UErrorCode status = U_ZERO_ERROR; 1131 const char* destdir = NULL; 1132 const char* srcDir = NULL; 1133 char filename[300]; 1134 char *basename = NULL; 1135 const char *copyright = NULL; 1136 uprv_memset(&UCAVersion, 0, 4); 1137 1138 U_MAIN_INIT_ARGS(argc, argv); 1139 1140 /* preset then read command line options */ 1141 options[4].value=u_getDataDirectory(); 1142 options[5].value=""; 1143 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); 1144 1145 /* error handling, printing usage message */ 1146 if(argc<0) { 1147 fprintf(stderr, 1148 "error in command line argument \"%s\"\n", 1149 argv[-argc]); 1150 } else if(argc<2) { 1151 argc=-1; 1152 } 1153 if(options[0].doesOccur || options[1].doesOccur) { 1154 fprintf(stderr, 1155 "usage: %s [-options] file\n" 1156 "\tRead in UCA collation text data and write out the binary collation data\n" 1157 "options:\n" 1158 "\t-h or -? or --help this usage text\n" 1159 "\t-V or --version show a version message\n" 1160 "\t-c or --copyright include a copyright notice\n" 1161 "\t-d or --destdir destination directory, followed by the path\n" 1162 "\t-s or --sourcedir source directory, followed by the path\n" 1163 "\t-v or --verbose turn on verbose output\n" 1164 "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" 1165 "\t followed by path, defaults to %s\n", 1166 argv[0], u_getDataDirectory()); 1167 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; 1168 } 1169 if(options[3].doesOccur) { 1170 fprintf(stdout, "genuca version %hu.%hu, ICU tool to read UCA text data and create UCA data tables for collation.\n", 1171 #if UCONFIG_NO_COLLATION 1172 0, 0 1173 #else 1174 UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1 1175 #endif 1176 ); 1177 fprintf(stdout, U_COPYRIGHT_STRING"\n"); 1178 exit(0); 1179 } 1180 1181 /* get the options values */ 1182 destdir = options[4].value; 1183 srcDir = options[5].value; 1184 VERBOSE = options[6].doesOccur; 1185 1186 if (options[2].doesOccur) { 1187 copyright = U_COPYRIGHT_STRING; 1188 } 1189 1190 if (options[7].doesOccur) { 1191 u_setDataDirectory(options[7].value); 1192 } 1193 /* Initialize ICU */ 1194 u_init(&status); 1195 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { 1196 fprintf(stderr, "%s: can not initialize ICU. status = %s\n", 1197 argv[0], u_errorName(status)); 1198 exit(1); 1199 } 1200 status = U_ZERO_ERROR; 1201 1202 1203 /* prepare the filename beginning with the source dir */ 1204 uprv_strcpy(filename, srcDir); 1205 basename=filename+uprv_strlen(filename); 1206 1207 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) { 1208 *basename++ = U_FILE_SEP_CHAR; 1209 } 1210 1211 if(argc < 0) { 1212 uprv_strcpy(basename, "FractionalUCA.txt"); 1213 } else { 1214 argv++; 1215 uprv_strcpy(basename, getLongPathname(*argv)); 1216 } 1217 1218 #if 0 1219 if(u_getCombiningClass(0x0053) == 0) 1220 { 1221 fprintf(stderr, "SEVERE ERROR: Normalization data is not functioning! Bailing out. Was not able to load unorm.dat.\n"); 1222 exit(1); 1223 } 1224 #endif 1225 1226 #if UCONFIG_NO_COLLATION 1227 1228 UNewDataMemory *pData; 1229 const char *msg; 1230 1231 msg = "genuca writes dummy " UCA_DATA_NAME "." UCA_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h"; 1232 fprintf(stderr, "%s\n", msg); 1233 pData = udata_create(destdir, UCA_DATA_TYPE, UCA_DATA_NAME, &dummyDataInfo, 1234 NULL, &status); 1235 udata_writeBlock(pData, msg, strlen(msg)); 1236 udata_finish(pData, &status); 1237 1238 msg = "genuca writes dummy " INVC_DATA_NAME "." INVC_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h"; 1239 fprintf(stderr, "%s\n", msg); 1240 pData = udata_create(destdir, INVC_DATA_TYPE, INVC_DATA_NAME, &dummyDataInfo, 1241 NULL, &status); 1242 udata_writeBlock(pData, msg, strlen(msg)); 1243 udata_finish(pData, &status); 1244 1245 return (int)status; 1246 1247 #else 1248 1249 return write_uca_table(filename, destdir, copyright, &status); 1250 1251 #endif 1252 } 1253 1254 /* 1255 * Hey, Emacs, please set the following: 1256 * 1257 * Local Variables: 1258 * indent-tabs-mode: nil 1259 * End: 1260 * 1261 */ 1262