Home | History | Annotate | Download | only in genuca
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2000-2008, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  genuca.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created at the end of XX century
     14 *   created by: Vladimir Weinstein
     15 *
     16 *   This program reads the Franctional UCA table and generates
     17 *   internal format for UCA table as well as inverse UCA table.
     18 *   It then writes binary files containing the data: ucadata.dat
     19 *   & invuca.dat
     20 *   Change history:
     21 *   02/23/2001  grhoten                 Made it into a tool
     22 *   02/23/2001  weiv                    Moved element & table handling code to i18n
     23 *   05/09/2001  weiv                    Case bits are now in the CEs, not in front
     24 */
     25 
     26 #include "unicode/utypes.h"
     27 #include "unicode/putil.h"
     28 #include "unicode/udata.h"
     29 #include "unicode/uclean.h"
     30 #include "ucol_imp.h"
     31 #include "genuca.h"
     32 #include "uoptions.h"
     33 #include "toolutil.h"
     34 #include "unewdata.h"
     35 #include "cstring.h"
     36 #include "cmemory.h"
     37 
     38 #include <stdio.h>
     39 
     40 /*
     41  * Global - verbosity
     42  */
     43 UBool VERBOSE = FALSE;
     44 
     45 static UVersionInfo UCAVersion;
     46 
     47 #if UCONFIG_NO_COLLATION
     48 
     49 /* dummy UDataInfo cf. udata.h */
     50 static UDataInfo dummyDataInfo = {
     51     sizeof(UDataInfo),
     52     0,
     53 
     54     U_IS_BIG_ENDIAN,
     55     U_CHARSET_FAMILY,
     56     U_SIZEOF_UCHAR,
     57     0,
     58 
     59     { 0, 0, 0, 0 },                 /* dummy dataFormat */
     60     { 0, 0, 0, 0 },                 /* dummy formatVersion */
     61     { 0, 0, 0, 0 }                  /* dummy dataVersion */
     62 };
     63 
     64 #else
     65 
     66 static const UDataInfo ucaDataInfo={
     67     sizeof(UDataInfo),
     68     0,
     69 
     70     U_IS_BIG_ENDIAN,
     71     U_CHARSET_FAMILY,
     72     sizeof(UChar),
     73     0,
     74 
     75     {UCA_DATA_FORMAT_0, UCA_DATA_FORMAT_1, UCA_DATA_FORMAT_2, UCA_DATA_FORMAT_3},     /* dataFormat="UCol"            */
     76     /* 03/26/2002 bumped up version since format has changed */
     77     /* 09/16/2002 bumped up version since we went from UColAttributeValue */
     78     /*            to int32_t in UColOptionSet */
     79     /* 05/13/2003 This one also updated since we added UCA and UCD versions */
     80     /*            to header */
     81     /* 09/11/2003 Adding information required by data swapper */
     82     {UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1, UCA_FORMAT_VERSION_2, UCA_FORMAT_VERSION_3},                 /* formatVersion                */
     83     {0, 0, 0, 0}                  /* dataVersion = Unicode Version*/
     84 };
     85 
     86 static const UDataInfo invUcaDataInfo={
     87     sizeof(UDataInfo),
     88     0,
     89 
     90     U_IS_BIG_ENDIAN,
     91     U_CHARSET_FAMILY,
     92     sizeof(UChar),
     93     0,
     94 
     95     {INVUCA_DATA_FORMAT_0, INVUCA_DATA_FORMAT_1, INVUCA_DATA_FORMAT_2, INVUCA_DATA_FORMAT_3},     /* dataFormat="InvC"            */
     96     /* 03/26/2002 bumped up version since format has changed */
     97     /* 04/29/2003 2.1 format - we have added UCA version to header */
     98     {INVUCA_FORMAT_VERSION_0, INVUCA_FORMAT_VERSION_1, INVUCA_FORMAT_VERSION_2, INVUCA_FORMAT_VERSION_3},                 /* formatVersion                */
     99     {0, 0, 0, 0}                  /* dataVersion = Unicode Version*/
    100 };
    101 
    102 UCAElements le;
    103 
    104 int32_t readElement(char **from, char *to, char separator, UErrorCode *status) {
    105     if(U_FAILURE(*status)) {
    106         return 0;
    107     }
    108     char buffer[1024];
    109     int32_t i = 0;
    110     while(**from != separator) {
    111         if(**from != ' ') {
    112             *(buffer+i++) = **from;
    113         }
    114         (*from)++;
    115     }
    116     (*from)++;
    117     *(buffer + i) = 0;
    118     //*to = (char *)malloc(strlen(buffer)+1);
    119     strcpy(to, buffer);
    120     return i/2;
    121 }
    122 
    123 
    124 uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UErrorCode *status) {
    125     if(U_FAILURE(*status)) {
    126         return 0;
    127     }
    128     uint32_t value = 0;
    129     char primsave = '\0';
    130     char secsave = '\0';
    131     char tersave = '\0';
    132     char *primend = primary+4;
    133     if(strlen(primary) > 4) {
    134         primsave = *primend;
    135         *primend = '\0';
    136     }
    137     char *secend = secondary+2;
    138     if(strlen(secondary) > 2) {
    139         secsave = *secend;
    140         *secend = '\0';
    141     }
    142     char *terend = tertiary+2;
    143     if(strlen(tertiary) > 2) {
    144         tersave = *terend;
    145         *terend = '\0';
    146     }
    147     uint32_t primvalue = (uint32_t)((*primary!='\0')?strtoul(primary, &primend, 16):0);
    148     uint32_t secvalue = (uint32_t)((*secondary!='\0')?strtoul(secondary, &secend, 16):0);
    149     uint32_t tervalue = (uint32_t)((*tertiary!='\0')?strtoul(tertiary, &terend, 16):0);
    150     if(primvalue <= 0xFF) {
    151       primvalue <<= 8;
    152     }
    153 
    154     value = ((primvalue<<UCOL_PRIMARYORDERSHIFT)&UCOL_PRIMARYORDERMASK)|
    155         ((secvalue<<UCOL_SECONDARYORDERSHIFT)&UCOL_SECONDARYORDERMASK)|
    156         (tervalue&UCOL_TERTIARYORDERMASK);
    157 
    158     if(primsave!='\0') {
    159         *primend = primsave;
    160     }
    161     if(secsave!='\0') {
    162         *secend = secsave;
    163     }
    164     if(tersave!='\0') {
    165         *terend = tersave;
    166     }
    167     return value;
    168 }
    169 
    170 static uint32_t inverseTable[0xFFFF][3];
    171 static uint32_t inversePos = 0;
    172 static UChar stringContinue[0xFFFF];
    173 static uint32_t sContPos = 0;
    174 
    175 static void addNewInverse(UCAElements *element, UErrorCode *status) {
    176   if(U_FAILURE(*status)) {
    177     return;
    178   }
    179   if(VERBOSE && isContinuation(element->CEs[1])) {
    180     //fprintf(stdout, "+");
    181   }
    182   inversePos++;
    183   inverseTable[inversePos][0] = element->CEs[0];
    184   if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
    185     inverseTable[inversePos][1] = element->CEs[1];
    186   } else {
    187     inverseTable[inversePos][1] = 0;
    188   }
    189   if(element->cSize < 2) {
    190     inverseTable[inversePos][2] = element->cPoints[0];
    191   } else { /* add a new store of cruft */
    192     inverseTable[inversePos][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
    193     memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
    194     sContPos += element->cSize+1;
    195   }
    196 }
    197 
    198 static void insertInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
    199   if(U_FAILURE(*status)) {
    200     return;
    201   }
    202 
    203   if(VERBOSE && isContinuation(element->CEs[1])) {
    204     //fprintf(stdout, "+");
    205   }
    206   if(position <= inversePos) {
    207     /*move stuff around */
    208     uint32_t amountToMove = (inversePos - position+1)*sizeof(inverseTable[0]);
    209     uprv_memmove(inverseTable[position+1], inverseTable[position], amountToMove);
    210   }
    211   inverseTable[position][0] = element->CEs[0];
    212   if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
    213     inverseTable[position][1] = element->CEs[1];
    214   } else {
    215     inverseTable[position][1] = 0;
    216   }
    217   if(element->cSize < 2) {
    218     inverseTable[position][2] = element->cPoints[0];
    219   } else { /* add a new store of cruft */
    220     inverseTable[position][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
    221     memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
    222     sContPos += element->cSize+1;
    223   }
    224   inversePos++;
    225 }
    226 
    227 static void addToExistingInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
    228 
    229   if(U_FAILURE(*status)) {
    230     return;
    231   }
    232 
    233       if((inverseTable[position][2] & UCOL_INV_SIZEMASK) == 0) { /* single element, have to make new extension place and put both guys there */
    234         stringContinue[sContPos] = (UChar)inverseTable[position][2];
    235         inverseTable[position][2] = ((element->cSize+3) << UCOL_INV_SHIFTVALUE) | sContPos;
    236         sContPos++;
    237         stringContinue[sContPos++] = 0xFFFF;
    238         memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
    239         sContPos += element->cSize;
    240         stringContinue[sContPos++] = 0xFFFE;
    241       } else { /* adding to the already existing continuing table */
    242         uint32_t contIndex = inverseTable[position][2] & UCOL_INV_OFFSETMASK;
    243         uint32_t contSize = (inverseTable[position][2] & UCOL_INV_SIZEMASK) >> UCOL_INV_SHIFTVALUE;
    244 
    245         if(contIndex+contSize < sContPos) {
    246           /*fprintf(stderr, ".", sContPos, contIndex+contSize);*/
    247           memcpy(stringContinue+contIndex+contSize+element->cSize+1, stringContinue+contIndex+contSize, (element->cSize+1)*sizeof(UChar));
    248         }
    249 
    250         stringContinue[contIndex+contSize-1] = 0xFFFF;
    251         memcpy(stringContinue+contIndex+contSize, element->cPoints, element->cSize*sizeof(UChar));
    252         sContPos += element->cSize+1;
    253         stringContinue[contIndex+contSize+element->cSize] = 0xFFFE;
    254 
    255         inverseTable[position][2] = ((contSize+element->cSize+1) << UCOL_INV_SHIFTVALUE) | contIndex;
    256       }
    257 }
    258 
    259 /*
    260  * Takes two CEs (lead and continuation) and
    261  * compares them as CEs should be compared:
    262  * primary vs. primary, secondary vs. secondary
    263  * tertiary vs. tertiary
    264  */
    265 static int32_t compareCEs(uint32_t *source, uint32_t *target) {
    266   uint32_t s1 = source[0], s2, t1 = target[0], t2;
    267   if(isContinuation(source[1])) {
    268     s2 = source[1];
    269   } else {
    270     s2 = 0;
    271   }
    272   if(isContinuation(target[1])) {
    273     t2 = target[1];
    274   } else {
    275     t2 = 0;
    276   }
    277 
    278   uint32_t s = 0, t = 0;
    279   if(s1 == t1 && s2 == t2) {
    280     return 0;
    281   }
    282   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
    283   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
    284   if(s < t) {
    285     return -1;
    286   } else if(s > t) {
    287     return 1;
    288   } else {
    289     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
    290     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
    291     if(s < t) {
    292       return -1;
    293     } else if(s > t) {
    294       return 1;
    295     } else {
    296       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
    297       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
    298       if(s < t) {
    299         return -1;
    300       } else {
    301         return 1;
    302       }
    303     }
    304   }
    305 }
    306 
    307 static uint32_t addToInverse(UCAElements *element, UErrorCode *status) {
    308   uint32_t position = inversePos;
    309   uint32_t saveElement = element->CEs[0];
    310   int32_t compResult = 0;
    311   element->CEs[0] &= 0xFFFFFF3F;
    312   if(element->noOfCEs == 1) {
    313     element->CEs[1] = 0;
    314   }
    315   if(inversePos == 0) {
    316     inverseTable[0][0] = inverseTable[0][1] = inverseTable[0][2] = 0;
    317     addNewInverse(element, status);
    318   } else if(compareCEs(inverseTable[inversePos], element->CEs) > 0) {
    319     while((compResult = compareCEs(inverseTable[--position], element->CEs)) > 0);
    320     if(VERBOSE) { fprintf(stdout, "p:%u ", (int)position); }
    321     if(compResult == 0) {
    322       addToExistingInverse(element, position, status);
    323     } else {
    324       insertInverse(element, position+1, status);
    325     }
    326   } else if(compareCEs(inverseTable[inversePos], element->CEs) == 0) {
    327     addToExistingInverse(element, inversePos, status);
    328   } else {
    329     addNewInverse(element, status);
    330   }
    331   element->CEs[0] = saveElement;
    332   if(VERBOSE) { fprintf(stdout, "+"); }
    333   return inversePos;
    334 }
    335 
    336 static InverseUCATableHeader *assembleInverseTable(UErrorCode *status)
    337 {
    338   InverseUCATableHeader *result = NULL;
    339   uint32_t headerByteSize = paddedsize(sizeof(InverseUCATableHeader));
    340   uint32_t inverseTableByteSize = (inversePos+2)*sizeof(uint32_t)*3;
    341   uint32_t contsByteSize = sContPos * sizeof(UChar);
    342   uint32_t i = 0;
    343 
    344   result = (InverseUCATableHeader *)uprv_malloc(headerByteSize + inverseTableByteSize + contsByteSize);
    345   uprv_memset(result, 0, headerByteSize + inverseTableByteSize + contsByteSize);
    346   if(result != NULL) {
    347     result->byteSize = headerByteSize + inverseTableByteSize + contsByteSize;
    348 
    349     inversePos++;
    350     inverseTable[inversePos][0] = 0xFFFFFFFF;
    351     inverseTable[inversePos][1] = 0xFFFFFFFF;
    352     inverseTable[inversePos][2] = 0x0000FFFF;
    353     inversePos++;
    354 
    355     for(i = 2; i<inversePos; i++) {
    356       if(compareCEs(inverseTable[i-1], inverseTable[i]) > 0) {
    357         fprintf(stderr, "Error at %i: %08X & %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i][0]);
    358       } else if(inverseTable[i-1][0] == inverseTable[i][0] && !(inverseTable[i-1][1] < inverseTable[i][1])) {
    359         fprintf(stderr, "Continuation error at %i: %08X %08X & %08X %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i-1][1], (int)inverseTable[i][0], (int)inverseTable[i][1]);
    360       }
    361     }
    362 
    363     result->tableSize = inversePos;
    364     result->contsSize = sContPos;
    365 
    366     result->table = headerByteSize;
    367     result->conts = headerByteSize + inverseTableByteSize;
    368 
    369     memcpy((uint8_t *)result + result->table, inverseTable, inverseTableByteSize);
    370     memcpy((uint8_t *)result + result->conts, stringContinue, contsByteSize);
    371 
    372   } else {
    373     *status = U_MEMORY_ALLOCATION_ERROR;
    374     return NULL;
    375   }
    376 
    377   return result;
    378 }
    379 
    380 
    381 static void writeOutInverseData(InverseUCATableHeader *data,
    382                   const char *outputDir,
    383                   const char *copyright,
    384                   UErrorCode *status)
    385 {
    386     UNewDataMemory *pData;
    387 
    388     long dataLength;
    389 
    390     UDataInfo invUcaInfo;
    391     uprv_memcpy(&invUcaInfo, &invUcaDataInfo, sizeof(UDataInfo));
    392     u_getUnicodeVersion(invUcaInfo.dataVersion);
    393 
    394     pData=udata_create(outputDir, INVC_DATA_TYPE, INVC_DATA_NAME, &invUcaInfo,
    395                        copyright, status);
    396 
    397     if(U_FAILURE(*status)) {
    398         fprintf(stderr, "Error: unable to create %s"INVC_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
    399         return;
    400     }
    401 
    402     /* write the data to the file */
    403     if (VERBOSE) {
    404         fprintf(stdout, "Writing out inverse UCA table: %s%c%s.%s\n", outputDir, U_FILE_SEP_CHAR,
    405                                                                 INVC_DATA_NAME,
    406                                                                 INVC_DATA_TYPE);
    407     }
    408     udata_writeBlock(pData, data, data->byteSize);
    409 
    410     /* finish up */
    411     dataLength=udata_finish(pData, status);
    412     if(U_FAILURE(*status)) {
    413         fprintf(stderr, "Error: error %d writing the output file\n", *status);
    414         return;
    415     }
    416 }
    417 
    418 
    419 
    420 static int32_t hex2num(char hex) {
    421     if(hex>='0' && hex <='9') {
    422         return hex-'0';
    423     } else if(hex>='a' && hex<='f') {
    424         return hex-'a'+10;
    425     } else if(hex>='A' && hex<='F') {
    426         return hex-'A'+10;
    427     } else {
    428         return 0;
    429     }
    430 }
    431 
    432 UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UErrorCode *status) {
    433     char buffer[2048], primary[100], secondary[100], tertiary[100];
    434     UBool detectedContraction;
    435     int32_t i = 0;
    436     unsigned int theValue;
    437     char *pointer = NULL;
    438     char *commentStart = NULL;
    439     char *startCodePoint = NULL;
    440     char *endCodePoint = NULL;
    441     char *spacePointer = NULL;
    442     char *dashPointer = NULL;
    443     char *result = fgets(buffer, 2048, data);
    444     int32_t buflen = (int32_t)uprv_strlen(buffer);
    445     if(U_FAILURE(*status)) {
    446         return 0;
    447     }
    448     *primary = *secondary = *tertiary = '\0';
    449     if(result == NULL) {
    450         if(feof(data)) {
    451             return NULL;
    452         } else {
    453             fprintf(stderr, "empty line but no EOF!\n");
    454             *status = U_INVALID_FORMAT_ERROR;
    455             return NULL;
    456         }
    457     }
    458     while(buflen>0 && (buffer[buflen-1] == '\r' || buffer[buflen-1] == '\n')) {
    459       buffer[--buflen] = 0;
    460     }
    461 
    462     if(buffer[0] == 0 || buffer[0] == '#') {
    463         return NULL; // just a comment, skip whole line
    464     }
    465 
    466     UCAElements *element = &le; //(UCAElements *)malloc(sizeof(UCAElements));
    467 
    468     enum ActionType {
    469       READCE,
    470       READHEX,
    471       READUCAVERSION
    472     };
    473 
    474     // Directives.
    475     if(buffer[0] == '[') {
    476       uint32_t cnt = 0;
    477       static const struct {
    478         char name[128];
    479         uint32_t *what;
    480         ActionType what_to_do;
    481       } vt[]  = { {"[first tertiary ignorable",  consts->UCA_FIRST_TERTIARY_IGNORABLE,  READCE},
    482                   {"[last tertiary ignorable",   consts->UCA_LAST_TERTIARY_IGNORABLE,   READCE},
    483                   {"[first secondary ignorable", consts->UCA_FIRST_SECONDARY_IGNORABLE, READCE},
    484                   {"[last secondary ignorable",  consts->UCA_LAST_SECONDARY_IGNORABLE,  READCE},
    485                   {"[first primary ignorable",   consts->UCA_FIRST_PRIMARY_IGNORABLE,   READCE},
    486                   {"[last primary ignorable",    consts->UCA_LAST_PRIMARY_IGNORABLE,    READCE},
    487                   {"[first variable",            consts->UCA_FIRST_VARIABLE,            READCE},
    488                   {"[last variable",             consts->UCA_LAST_VARIABLE,             READCE},
    489                   {"[first regular",             consts->UCA_FIRST_NON_VARIABLE,        READCE},
    490                   {"[last regular",              consts->UCA_LAST_NON_VARIABLE,         READCE},
    491                   {"[first implicit",            consts->UCA_FIRST_IMPLICIT,            READCE},
    492                   {"[last implicit",             consts->UCA_LAST_IMPLICIT,             READCE},
    493                   {"[first trailing",            consts->UCA_FIRST_TRAILING,            READCE},
    494                   {"[last trailing",             consts->UCA_LAST_TRAILING,             READCE},
    495 
    496                   {"[fixed top",                       &consts->UCA_PRIMARY_TOP_MIN,           READHEX},
    497                   {"[fixed first implicit byte",       &consts->UCA_PRIMARY_IMPLICIT_MIN,      READHEX},
    498                   {"[fixed last implicit byte",        &consts->UCA_PRIMARY_IMPLICIT_MAX,      READHEX},
    499                   {"[fixed first trail byte",          &consts->UCA_PRIMARY_TRAILING_MIN,      READHEX},
    500                   {"[fixed last trail byte",           &consts->UCA_PRIMARY_TRAILING_MAX,      READHEX},
    501                   {"[fixed first special byte",        &consts->UCA_PRIMARY_SPECIAL_MIN,       READHEX},
    502                   {"[fixed last special byte",         &consts->UCA_PRIMARY_SPECIAL_MAX,       READHEX},
    503                   {"[variable top = ",                &t->options->variableTopValue,          READHEX},
    504                   {"[UCA version = ",                 NULL,                          READUCAVERSION}
    505       };
    506       for (cnt = 0; cnt<sizeof(vt)/sizeof(vt[0]); cnt++) {
    507         uint32_t vtLen = (uint32_t)uprv_strlen(vt[cnt].name);
    508         if(uprv_strncmp(buffer, vt[cnt].name, vtLen) == 0) {
    509             element->variableTop = TRUE;
    510             if(vt[cnt].what_to_do == READHEX) {
    511               if(sscanf(buffer+vtLen, "%4x", &theValue) != 1) /* read first code point */
    512               {
    513                   fprintf(stderr, " scanf(hex) failed on !\n ");
    514               }
    515               *(vt[cnt].what) = (UChar)theValue;
    516               //if(cnt == 1) { // first implicit
    517                 // we need to set the value for top next
    518                 //uint32_t nextTop = ucol_prv_calculateImplicitPrimary(0x4E00); // CJK base
    519                 //consts->UCA_NEXT_TOP_VALUE = theValue<<24 | 0x030303;
    520               //}
    521             } else if (vt[cnt].what_to_do == READCE) { /* vt[cnt].what_to_do == READCE */
    522               pointer = strchr(buffer+vtLen, '[');
    523               if(pointer) {
    524                 pointer++;
    525                 element->sizePrim[0]=readElement(&pointer, primary, ',', status);
    526                 element->sizeSec[0]=readElement(&pointer, secondary, ',', status);
    527                 element->sizeTer[0]=readElement(&pointer, tertiary, ']', status);
    528 
    529                 vt[cnt].what[0] = getSingleCEValue(primary, secondary, tertiary, status);
    530                 if(element->sizePrim[0] > 2 || element->sizeSec[0] > 1 || element->sizeTer[0] > 1) {
    531                   uint32_t CEi = 1;
    532                   uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
    533                     if(2*CEi<element->sizePrim[i]) {
    534                         value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
    535                         value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
    536                     }
    537 
    538                     if(2*CEi+1<element->sizePrim[i]) {
    539                         value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
    540                         value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
    541                     }
    542 
    543                     if(CEi<element->sizeSec[i]) {
    544                         value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
    545                         value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
    546                     }
    547 
    548                     if(CEi<element->sizeTer[i]) {
    549                         value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
    550                         value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
    551                     }
    552 
    553                     CEi++;
    554 
    555                     vt[cnt].what[1] = value;
    556                     //element->CEs[CEindex++] = value;
    557                 } else {
    558                   vt[cnt].what[1] = 0;
    559                 }
    560               } else {
    561                 fprintf(stderr, "Failed to read a CE from line %s\n", buffer);
    562               }
    563             } else { //vt[cnt].what_to_do == READUCAVERSION
    564               u_versionFromString(UCAVersion, buffer+vtLen);
    565               if(VERBOSE) {
    566                 fprintf(stdout, "UCA version [%hu.%hu.%hu.%hu]\n", UCAVersion[0], UCAVersion[1], UCAVersion[2], UCAVersion[3]);
    567               }
    568             }
    569             //element->cPoints[0] = (UChar)theValue;
    570             //return element;
    571             return NULL;
    572         }
    573       }
    574       fprintf(stderr, "Warning: unrecognized option: %s\n", buffer);
    575       //*status = U_INVALID_FORMAT_ERROR;
    576       return NULL;
    577     }
    578     element->variableTop = FALSE;
    579 
    580     startCodePoint = buffer;
    581     endCodePoint = strchr(startCodePoint, ';');
    582 
    583     if(endCodePoint == 0) {
    584         fprintf(stderr, "error - line with no code point!\n");
    585         *status = U_INVALID_FORMAT_ERROR; /* No code point - could be an error, but probably only an empty line */
    586         return NULL;
    587     } else {
    588         *(endCodePoint) = 0;
    589     }
    590 
    591     memset(element, 0, sizeof(*element));
    592 
    593     element->cPoints = element->uchars;
    594 
    595     spacePointer = strchr(buffer, ' ');
    596     if(sscanf(buffer, "%4x", &theValue) != 1) /* read first code point */
    597     {
    598       fprintf(stderr, " scanf(hex) failed!\n ");
    599     }
    600     element->cPoints[0] = (UChar)theValue;
    601 
    602     if(spacePointer == 0) {
    603         detectedContraction = FALSE;
    604         element->cSize = 1;
    605     } else {
    606         dashPointer = strchr(buffer, '|');
    607         if (dashPointer != NULL) {
    608             // prefix characters
    609             element->prefixChars[0] = (UChar)theValue;
    610             element->prefixSize = 1;
    611             element->prefix = element->prefixChars;
    612             sscanf(dashPointer+1, "%4x", &theValue);
    613             element->cPoints[0] = (UChar)theValue;
    614             element->cSize = 1;
    615         }
    616         else {
    617           // Contractions or surrogate characters.
    618             i = 1;
    619             detectedContraction = TRUE;
    620             while(spacePointer != NULL) {
    621                 sscanf(spacePointer+1, "%4x", &theValue);
    622                 element->cPoints[i++] = (UChar)theValue;
    623                 spacePointer = strchr(spacePointer+1, ' ');
    624             }
    625             element->cSize = i;
    626         }
    627 
    628 
    629         //fprintf(stderr, "Number of codepoints in contraction: %i\n", i);
    630     }
    631 
    632     startCodePoint = endCodePoint+1;
    633 
    634     commentStart = strchr(startCodePoint, '#');
    635     if(commentStart == NULL) {
    636         commentStart = strlen(startCodePoint) + startCodePoint;
    637     }
    638 
    639     i = 0;
    640     uint32_t CEindex = 0;
    641     element->noOfCEs = 0;
    642     for(;;) {
    643         endCodePoint = strchr(startCodePoint, ']');
    644         if(endCodePoint == NULL || endCodePoint >= commentStart) {
    645             break;
    646         }
    647         pointer = strchr(startCodePoint, '[');
    648         pointer++;
    649 
    650         element->sizePrim[i]=readElement(&pointer, primary, ',', status);
    651         element->sizeSec[i]=readElement(&pointer, secondary, ',', status);
    652         element->sizeTer[i]=readElement(&pointer, tertiary, ']', status);
    653 
    654 
    655         /* I want to get the CEs entered right here, including continuation */
    656         element->CEs[CEindex++] = getSingleCEValue(primary, secondary, tertiary, status);
    657 
    658         uint32_t CEi = 1;
    659         while(2*CEi<element->sizePrim[i] || CEi<element->sizeSec[i] || CEi<element->sizeTer[i]) {
    660           uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
    661             if(2*CEi<element->sizePrim[i]) {
    662                 value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
    663                 value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
    664             }
    665 
    666             if(2*CEi+1<element->sizePrim[i]) {
    667                 value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
    668                 value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
    669             }
    670 
    671             if(CEi<element->sizeSec[i]) {
    672                 value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
    673                 value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
    674             }
    675 
    676             if(CEi<element->sizeTer[i]) {
    677                 value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
    678                 value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
    679             }
    680 
    681             CEi++;
    682 
    683             element->CEs[CEindex++] = value;
    684         }
    685 
    686       startCodePoint = endCodePoint+1;
    687       i++;
    688     }
    689     element->noOfCEs = CEindex;
    690 #if 0
    691     element->isThai = UCOL_ISTHAIPREVOWEL(element->cPoints[0]);
    692 #endif
    693     // we don't want any strange stuff after useful data!
    694     if (pointer == NULL) {
    695         /* huh? Did we get ']' without the '['? Pair your brackets! */
    696         *status=U_INVALID_FORMAT_ERROR;
    697     }
    698     else {
    699         while(pointer < commentStart)  {
    700             if(*pointer != ' ' && *pointer != '\t')
    701             {
    702                 *status=U_INVALID_FORMAT_ERROR;
    703                 break;
    704             }
    705             pointer++;
    706         }
    707     }
    708 
    709     if(U_FAILURE(*status)) {
    710         fprintf(stderr, "problem putting stuff in hash table %s\n", u_errorName(*status));
    711         *status = U_INTERNAL_PROGRAM_ERROR;
    712         return NULL;
    713     }
    714 
    715     return element;
    716 }
    717 
    718 
    719 void writeOutData(UCATableHeader *data,
    720                   UCAConstants *consts,
    721                   UChar contractions[][3],
    722                   uint32_t noOfcontractions,
    723                   const char *outputDir,
    724                   const char *copyright,
    725                   UErrorCode *status)
    726 {
    727     if(U_FAILURE(*status)) {
    728         return;
    729     }
    730 
    731     uint32_t size = data->size;
    732 
    733     data->UCAConsts = data->size;
    734     data->size += paddedsize(sizeof(UCAConstants));
    735 
    736     if(noOfcontractions != 0) {
    737       contractions[noOfcontractions][0] = 0;
    738       contractions[noOfcontractions][1] = 0;
    739       contractions[noOfcontractions][2] = 0;
    740       noOfcontractions++;
    741 
    742 
    743       data->contractionUCACombos = data->size;
    744       data->contractionUCACombosWidth = 3;
    745       data->contractionUCACombosSize = noOfcontractions;
    746       data->size += paddedsize((noOfcontractions*3*sizeof(UChar)));
    747     }
    748 
    749     UNewDataMemory *pData;
    750 
    751     long dataLength;
    752     UDataInfo ucaInfo;
    753     uprv_memcpy(&ucaInfo, &ucaDataInfo, sizeof(UDataInfo));
    754     u_getUnicodeVersion(ucaInfo.dataVersion);
    755 
    756     pData=udata_create(outputDir, UCA_DATA_TYPE, UCA_DATA_NAME, &ucaInfo,
    757                        copyright, status);
    758 
    759     if(U_FAILURE(*status)) {
    760         fprintf(stderr, "Error: unable to create %s"UCA_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
    761         return;
    762     }
    763 
    764     /* write the data to the file */
    765     if (VERBOSE) {
    766         fprintf(stdout, "Writing out UCA table: %s%c%s.%s\n", outputDir,
    767                                                         U_FILE_SEP_CHAR,
    768                                                         U_ICUDATA_NAME "_" UCA_DATA_NAME,
    769                                                         UCA_DATA_TYPE);
    770     }
    771     udata_writeBlock(pData, data, size);
    772 
    773     // output the constants here
    774     udata_writeBlock(pData, consts, sizeof(UCAConstants));
    775 
    776     if(noOfcontractions != 0) {
    777       udata_writeBlock(pData, contractions, noOfcontractions*3*sizeof(UChar));
    778       udata_writePadding(pData, paddedsize((noOfcontractions*3*sizeof(UChar))) - noOfcontractions*3*sizeof(uint16_t));
    779     }
    780 
    781     /* finish up */
    782     dataLength=udata_finish(pData, status);
    783     if(U_FAILURE(*status)) {
    784         fprintf(stderr, "Error: error %d writing the output file\n", *status);
    785         return;
    786     }
    787 }
    788 
    789 static int32_t
    790 write_uca_table(const char *filename,
    791                 const char *outputDir,
    792                 const char *copyright,
    793                 UErrorCode *status)
    794 {
    795     FILE *data = fopen(filename, "r");
    796     if(data == NULL) {
    797         fprintf(stderr, "Couldn't open file: %s\n", filename);
    798         return -1;
    799     }
    800     uint32_t line = 0;
    801     UCAElements *element = NULL;
    802     UChar variableTopValue = 0;
    803     UCATableHeader *myD = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
    804     /* test for NULL */
    805     if(myD == NULL) {
    806         *status = U_MEMORY_ALLOCATION_ERROR;
    807         fclose(data);
    808         return 0;
    809     }
    810     uprv_memset(myD, 0, sizeof(UCATableHeader));
    811     UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
    812     /* test for NULL */
    813     if(opts == NULL) {
    814         *status = U_MEMORY_ALLOCATION_ERROR;
    815         uprv_free(myD);
    816         fclose(data);
    817         return 0;
    818     }
    819     uprv_memset(opts, 0, sizeof(UColOptionSet));
    820     UChar contractionCEs[512][3];
    821     uprv_memset(contractionCEs, 0, 512*3*sizeof(UChar));
    822     uint32_t noOfContractions = 0;
    823     UCAConstants consts;
    824     uprv_memset(&consts, 0, sizeof(consts));
    825 #if 0
    826     UCAConstants consts = {
    827       UCOL_RESET_TOP_VALUE,
    828       UCOL_FIRST_PRIMARY_IGNORABLE,
    829       UCOL_LAST_PRIMARY_IGNORABLE,
    830       UCOL_LAST_PRIMARY_IGNORABLE_CONT,
    831       UCOL_FIRST_SECONDARY_IGNORABLE,
    832       UCOL_LAST_SECONDARY_IGNORABLE,
    833       UCOL_FIRST_TERTIARY_IGNORABLE,
    834       UCOL_LAST_TERTIARY_IGNORABLE,
    835       UCOL_FIRST_VARIABLE,
    836       UCOL_LAST_VARIABLE,
    837       UCOL_FIRST_NON_VARIABLE,
    838       UCOL_LAST_NON_VARIABLE,
    839 
    840       UCOL_NEXT_TOP_VALUE,
    841 /*
    842       UCOL_NEXT_FIRST_PRIMARY_IGNORABLE,
    843       UCOL_NEXT_LAST_PRIMARY_IGNORABLE,
    844       UCOL_NEXT_FIRST_SECONDARY_IGNORABLE,
    845       UCOL_NEXT_LAST_SECONDARY_IGNORABLE,
    846       UCOL_NEXT_FIRST_TERTIARY_IGNORABLE,
    847       UCOL_NEXT_LAST_TERTIARY_IGNORABLE,
    848       UCOL_NEXT_FIRST_VARIABLE,
    849       UCOL_NEXT_LAST_VARIABLE,
    850 */
    851 
    852       PRIMARY_IMPLICIT_MIN,
    853       PRIMARY_IMPLICIT_MAX
    854     };
    855 #endif
    856 
    857 
    858     uprv_memset(inverseTable, 0xDA, sizeof(int32_t)*3*0xFFFF);
    859 
    860     opts->variableTopValue = variableTopValue;
    861     opts->strength = UCOL_TERTIARY;
    862     opts->frenchCollation = UCOL_OFF;
    863     opts->alternateHandling = UCOL_NON_IGNORABLE; /* attribute for handling variable elements*/
    864     opts->caseFirst = UCOL_OFF;         /* who goes first, lower case or uppercase */
    865     opts->caseLevel = UCOL_OFF;         /* do we have an extra case level */
    866     opts->normalizationMode = UCOL_OFF; /* attribute for normalization */
    867     opts->hiraganaQ = UCOL_OFF; /* attribute for JIS X 4061, used only in Japanese */
    868     opts->numericCollation = UCOL_OFF;
    869     myD->jamoSpecial = FALSE;
    870 
    871     tempUCATable *t = uprv_uca_initTempTable(myD, opts, NULL, IMPLICIT_TAG, LEAD_SURROGATE_TAG, status);
    872     if(U_FAILURE(*status))
    873     {
    874         fprintf(stderr, "Failed to init UCA temp table: %s\n", u_errorName(*status));
    875         uprv_free(opts);
    876         uprv_free(myD);
    877         fclose(data);
    878         return -1;
    879     }
    880 
    881 #if 0
    882     IMPLICIT_TAG = 9,
    883 /*
    884  *****************************************************************************************
    885  * NON_CHARACTER FDD0 - FDEF, FFFE, FFFF, 1FFFE, 1FFFF, 2FFFE, 2FFFF,...e.g. **FFFE, **FFFF
    886  ******************************************************************************************
    887  */
    888 #endif
    889 
    890 // * set to zero
    891 struct {
    892       UChar32 start;
    893       UChar32 end;
    894       int32_t value;
    895     } ranges[] =
    896     {
    897 #if 0
    898       {0xAC00, 0xD7AF, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) },  //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
    899       {0xD800, 0xDBFF, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24)  },  //1 LEAD_SURROGATE_TAG,  /* D800-DBFF*/
    900       {0xDC00, 0xDFFF, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) },  //2 TRAIL_SURROGATE DC00-DFFF
    901       {0x3400, 0x4DB5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //3 CJK_IMPLICIT_TAG,   /* 0x3400-0x4DB5*/
    902       {0x4E00, 0x9FA5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //4 CJK_IMPLICIT_TAG,   /* 0x4E00-0x9FA5*/
    903       {0xF900, 0xFA2D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //5 CJK_IMPLICIT_TAG,   /* 0xF900-0xFA2D*/
    904       {0x20000, 0x2A6D6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //6 CJK_IMPLICIT_TAG,   /* 0x20000-0x2A6D6*/
    905       {0x2F800, 0x2FA1D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //7 CJK_IMPLICIT_TAG,   /* 0x2F800-0x2FA1D*/
    906 #endif
    907       {0xAC00, 0xD7B0, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) },  //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
    908       //{0xD800, 0xDC00, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24)  },  //1 LEAD_SURROGATE_TAG,  /* D800-DBFF*/
    909       {0xDC00, 0xE000, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) },  //2 TRAIL_SURROGATE DC00-DFFF
    910       // Now directly handled in the collation code by the swapCJK function.
    911       //{0x3400, 0x4DB6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //3 CJK_IMPLICIT_TAG,   /* 0x3400-0x4DB5*/
    912       //{0x4E00, 0x9FA6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //4 CJK_IMPLICIT_TAG,   /* 0x4E00-0x9FA5*/
    913       //{0xF900, 0xFA2E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)    },  //5 CJK_IMPLICIT_TAG,   /* 0xF900-0xFA2D*/
    914       //{0x20000, 0x2A6D7, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //6 CJK_IMPLICIT_TAG,   /* 0x20000-0x2A6D6*/
    915       //{0x2F800, 0x2FA1E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24)  },  //7 CJK_IMPLICIT_TAG,   /* 0x2F800-0x2FA1D*/
    916     };
    917     uint32_t i = 0;
    918 
    919     for(i = 0; i<sizeof(ranges)/sizeof(ranges[0]); i++) {
    920       /*ucmpe32_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value); */
    921       utrie_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value, TRUE);
    922     }
    923 
    924 
    925     int32_t surrogateCount = 0;
    926     while(!feof(data)) {
    927         if(U_FAILURE(*status)) {
    928             fprintf(stderr, "Something returned an error %i (%s) while processing line %u of %s. Exiting...\n",
    929                 *status, u_errorName(*status), (int)line, filename);
    930             exit(*status);
    931         }
    932 
    933         element = readAnElement(data, t, &consts, status);
    934         line++;
    935         if(VERBOSE) {
    936           fprintf(stdout, "%u ", (int)line);
    937         }
    938         if(element != NULL) {
    939             // we have read the line, now do something sensible with the read data!
    940 
    941             // Below stuff was taken care of in readAnElement
    942             //if(element->variableTop == TRUE && variableTopValue == 0) {
    943             //    t->options->variableTopValue = element->cPoints[0];
    944             //}
    945 
    946             // if element is a contraction, we want to add it to contractions
    947             if(element->cSize > 1 && element->cPoints[0] != 0xFDD0) { // this is a contraction
    948               if(UTF_IS_LEAD(element->cPoints[0]) && UTF_IS_TRAIL(element->cPoints[1]) && element->cSize == 2) {
    949                 surrogateCount++;
    950               } else {
    951                 contractionCEs[noOfContractions][0] = element->cPoints[0];
    952                 contractionCEs[noOfContractions][1] = element->cPoints[1];
    953                 if(element->cSize > 2) { // the third one
    954                   contractionCEs[noOfContractions][2] = element->cPoints[2];
    955                 } else {
    956                   contractionCEs[noOfContractions][2] = 0;
    957                 }
    958                 noOfContractions++;
    959               }
    960             }
    961             else {
    962                 // TODO (claireho): does this work? Need more tests
    963                 // The following code is to handle the UCA pre-context rules
    964                 // for L/l with middle dot. We share the structures for contractionCombos.
    965                 // The format for pre-context character is
    966                 // contractionCEs[0]: codepoint in element->cPoints[0]
    967                 // contractionCEs[1]: '\0' to differentiate with contractions.
    968                 // contractionCEs[2]: prefix char
    969                 if (element->prefixSize>0) {
    970                     contractionCEs[noOfContractions][0]=element->cPoints[0];
    971                     contractionCEs[noOfContractions][1]='\0';
    972                     contractionCEs[noOfContractions][2]=element->prefixChars[0];
    973                     noOfContractions++;
    974                 }
    975 
    976             }
    977 
    978             /* we're first adding to inverse, because addAnElement will reverse the order */
    979             /* of code points and stuff... we don't want that to happen */
    980             addToInverse(element, status);
    981             if(!(element->cSize > 1 && element->cPoints[0] == 0xFDD0)) {
    982               uprv_uca_addAnElement(t, element, status);
    983             }
    984         }
    985     }
    986 
    987     if(UCAVersion[0] == 0 && UCAVersion[1] == 0 && UCAVersion[2] == 0 && UCAVersion[3] == 0) {
    988         fprintf(stderr, "UCA version not specified. Cannot create data file!\n");
    989         uprv_uca_closeTempTable(t);
    990         uprv_free(opts);
    991         uprv_free(myD);
    992         fclose(data);
    993         return -1;
    994     }
    995 /*    {
    996         uint32_t trieWord = utrie_get32(t->mapping, 0xDC01, NULL);
    997     }*/
    998 
    999     if (VERBOSE) {
   1000         fprintf(stdout, "\nLines read: %u\n", (int)line);
   1001         fprintf(stdout, "Surrogate count: %i\n", (int)surrogateCount);
   1002         fprintf(stdout, "Raw data breakdown:\n");
   1003         /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
   1004         fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
   1005         fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
   1006         fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
   1007     }
   1008 
   1009 
   1010     /* produce canonical closure for table */
   1011     /* first set up constants for implicit calculation */
   1012     uprv_uca_initImplicitConstants(status);
   1013     /* do the closure */
   1014     int32_t noOfClosures = uprv_uca_canonicalClosure(t, NULL, status);
   1015     if(noOfClosures != 0) {
   1016       fprintf(stderr, "Warning: %i canonical closures occured!\n", (int)noOfClosures);
   1017     }
   1018 
   1019     /* test */
   1020     UCATableHeader *myData = uprv_uca_assembleTable(t, status);
   1021 
   1022     if (VERBOSE) {
   1023         fprintf(stdout, "Compacted data breakdown:\n");
   1024         /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
   1025         fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
   1026         fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
   1027         fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
   1028     }
   1029 
   1030     if(U_FAILURE(*status)) {
   1031         fprintf(stderr, "Error creating table: %s\n", u_errorName(*status));
   1032         uprv_uca_closeTempTable(t);
   1033         uprv_free(opts);
   1034         uprv_free(myD);
   1035         fclose(data);
   1036         return -1;
   1037     }
   1038 
   1039     /* populate the version info struct with version info*/
   1040     myData->version[0] = UCOL_BUILDER_VERSION;
   1041     myData->version[1] = UCAVersion[0];
   1042     myData->version[2] = UCAVersion[1];
   1043     myData->version[3] = UCAVersion[2];
   1044     /*TODO:The fractional rules version should be taken from FractionalUCA.txt*/
   1045     // Removed this macro. Instead, we use the fields below
   1046     //myD->version[1] = UCOL_FRACTIONAL_UCA_VERSION;
   1047     //myD->UCAVersion = UCAVersion; // out of FractionalUCA.txt
   1048     uprv_memcpy(myData->UCAVersion, UCAVersion, sizeof(UVersionInfo));
   1049     u_getUnicodeVersion(myData->UCDVersion);
   1050 
   1051     writeOutData(myData, &consts, contractionCEs, noOfContractions, outputDir, copyright, status);
   1052 
   1053     InverseUCATableHeader *inverse = assembleInverseTable(status);
   1054     uprv_memcpy(inverse->UCAVersion, UCAVersion, sizeof(UVersionInfo));
   1055     writeOutInverseData(inverse, outputDir, copyright, status);
   1056 
   1057     uprv_uca_closeTempTable(t);
   1058     uprv_free(myD);
   1059     uprv_free(opts);
   1060 
   1061 
   1062     uprv_free(myData);
   1063     uprv_free(inverse);
   1064     fclose(data);
   1065 
   1066     return 0;
   1067 }
   1068 
   1069 #endif /* #if !UCONFIG_NO_COLLATION */
   1070 
   1071 static UOption options[]={
   1072     UOPTION_HELP_H,              /* 0  Numbers for those who*/
   1073     UOPTION_HELP_QUESTION_MARK,  /* 1   can't count. */
   1074     UOPTION_COPYRIGHT,           /* 2 */
   1075     UOPTION_VERSION,             /* 3 */
   1076     UOPTION_DESTDIR,             /* 4 */
   1077     UOPTION_SOURCEDIR,           /* 5 */
   1078     UOPTION_VERBOSE,             /* 6 */
   1079     UOPTION_ICUDATADIR           /* 7 */
   1080     /* weiv can't count :))))) */
   1081 };
   1082 
   1083 int main(int argc, char* argv[]) {
   1084     UErrorCode status = U_ZERO_ERROR;
   1085     const char* destdir = NULL;
   1086     const char* srcDir = NULL;
   1087     char filename[300];
   1088     char *basename = NULL;
   1089     const char *copyright = NULL;
   1090     uprv_memset(&UCAVersion, 0, 4);
   1091 
   1092     U_MAIN_INIT_ARGS(argc, argv);
   1093 
   1094     /* preset then read command line options */
   1095     options[4].value=u_getDataDirectory();
   1096     options[5].value="";
   1097     argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
   1098 
   1099     /* error handling, printing usage message */
   1100     if(argc<0) {
   1101         fprintf(stderr,
   1102             "error in command line argument \"%s\"\n",
   1103             argv[-argc]);
   1104     } else if(argc<2) {
   1105         argc=-1;
   1106     }
   1107     if(options[0].doesOccur || options[1].doesOccur) {
   1108         fprintf(stderr,
   1109             "usage: %s [-options] file\n"
   1110             "\tRead in UCA collation text data and write out the binary collation data\n"
   1111             "options:\n"
   1112             "\t-h or -? or --help  this usage text\n"
   1113             "\t-V or --version     show a version message\n"
   1114             "\t-c or --copyright   include a copyright notice\n"
   1115             "\t-d or --destdir     destination directory, followed by the path\n"
   1116             "\t-s or --sourcedir   source directory, followed by the path\n"
   1117             "\t-v or --verbose     turn on verbose output\n"
   1118             "\t-i or --icudatadir  directory for locating any needed intermediate data files,\n"
   1119             "\t                    followed by path, defaults to %s\n",
   1120             argv[0], u_getDataDirectory());
   1121         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
   1122     }
   1123     if(options[3].doesOccur) {
   1124         fprintf(stdout, "genuca version %hu.%hu, ICU tool to read UCA text data and create UCA data tables for collation.\n",
   1125 #if UCONFIG_NO_COLLATION
   1126             0, 0
   1127 #else
   1128             UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1
   1129 #endif
   1130             );
   1131         fprintf(stdout, U_COPYRIGHT_STRING"\n");
   1132         exit(0);
   1133     }
   1134 
   1135     /* get the options values */
   1136     destdir = options[4].value;
   1137     srcDir = options[5].value;
   1138     VERBOSE = options[6].doesOccur;
   1139 
   1140     if (options[2].doesOccur) {
   1141         copyright = U_COPYRIGHT_STRING;
   1142     }
   1143 
   1144     if (options[7].doesOccur) {
   1145         u_setDataDirectory(options[7].value);
   1146     }
   1147     /* Initialize ICU */
   1148     u_init(&status);
   1149     if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
   1150         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
   1151             argv[0], u_errorName(status));
   1152         exit(1);
   1153     }
   1154     status = U_ZERO_ERROR;
   1155 
   1156 
   1157     /* prepare the filename beginning with the source dir */
   1158     uprv_strcpy(filename, srcDir);
   1159     basename=filename+uprv_strlen(filename);
   1160 
   1161     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
   1162         *basename++ = U_FILE_SEP_CHAR;
   1163     }
   1164 
   1165     if(argc < 0) {
   1166       uprv_strcpy(basename, "FractionalUCA.txt");
   1167     } else {
   1168       argv++;
   1169       uprv_strcpy(basename, getLongPathname(*argv));
   1170     }
   1171 
   1172 #if 0
   1173     if(u_getCombiningClass(0x0053) == 0)
   1174     {
   1175         fprintf(stderr, "SEVERE ERROR: Normalization data is not functioning! Bailing out.  Was not able to load unorm.dat.\n");
   1176         exit(1);
   1177     }
   1178 #endif
   1179 
   1180 #if UCONFIG_NO_COLLATION
   1181 
   1182     UNewDataMemory *pData;
   1183     const char *msg;
   1184 
   1185     msg = "genuca writes dummy " UCA_DATA_NAME "." UCA_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
   1186     fprintf(stderr, "%s\n", msg);
   1187     pData = udata_create(destdir, UCA_DATA_TYPE, UCA_DATA_NAME, &dummyDataInfo,
   1188                          NULL, &status);
   1189     udata_writeBlock(pData, msg, strlen(msg));
   1190     udata_finish(pData, &status);
   1191 
   1192     msg = "genuca writes dummy " INVC_DATA_NAME "." INVC_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
   1193     fprintf(stderr, "%s\n", msg);
   1194     pData = udata_create(destdir, INVC_DATA_TYPE, INVC_DATA_NAME, &dummyDataInfo,
   1195                          NULL, &status);
   1196     udata_writeBlock(pData, msg, strlen(msg));
   1197     udata_finish(pData, &status);
   1198 
   1199     return (int)status;
   1200 
   1201 #else
   1202 
   1203     return write_uca_table(filename, destdir, copyright, &status);
   1204 
   1205 #endif
   1206 }
   1207 
   1208 /*
   1209  * Hey, Emacs, please set the following:
   1210  *
   1211  * Local Variables:
   1212  * indent-tabs-mode: nil
   1213  * End:
   1214  *
   1215  */
   1216