Home | History | Annotate | Download | only in gencase
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2004-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  store.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004aug28
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Store Unicode case mapping properties efficiently for
     17 *   random access.
     18 */
     19 
     20 #include <stdio.h>
     21 #include <stdlib.h>
     22 #include "unicode/utypes.h"
     23 #include "unicode/uchar.h"
     24 #include "unicode/ustring.h"
     25 #include "cmemory.h"
     26 #include "cstring.h"
     27 #include "filestrm.h"
     28 #include "utrie.h"
     29 #include "utrie2.h"
     30 #include "uarrsort.h"
     31 #include "unicode/udata.h"
     32 #include "unewdata.h"
     33 #include "propsvec.h"
     34 #include "writesrc.h"
     35 #include "gencase.h"
     36 
     37 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
     38 
     39 /* Unicode case mapping properties file format ---------------------------------
     40 
     41 The file format prepared and written here contains several data
     42 structures that store indexes or data.
     43 
     44 Before the data contents described below, there are the headers required by
     45 the udata API for loading ICU data. Especially, a UDataInfo structure
     46 precedes the actual data. It contains platform properties values and the
     47 file format version.
     48 
     49 The following is a description of format version 1.2 .
     50 
     51 Format version 1.1 adds data for case closure.
     52 
     53 Format version 1.2 adds an exception bit for case-ignorable. Needed because
     54 the Cased and Case_Ignorable properties are not disjoint.
     55 
     56 The file contains the following structures:
     57 
     58     const int32_t indexes[i0] with values i0, i1, ...:
     59     (see UCASE_IX_... constants for names of indexes)
     60 
     61     i0 indexLength; -- length of indexes[] (UCASE_IX_TOP)
     62     i1 dataLength; -- length in bytes of the post-header data (incl. indexes[])
     63     i2 trieSize; -- size in bytes of the case mapping properties trie
     64     i3 exceptionsLength; -- length in uint16_t of the exceptions array
     65     i4 unfoldLength; -- length in uint16_t of the reverse-folding array (new in format version 1.1)
     66 
     67     i5..i14 reservedIndexes; -- reserved values; 0 for now
     68 
     69     i15 maxFullLength; -- maximum length of a full case mapping/folding string
     70 
     71 
     72     Serialized trie, see utrie.h;
     73 
     74     const uint16_t exceptions[exceptionsLength];
     75 
     76     const UChar unfold[unfoldLength];
     77 
     78 
     79 Trie data word:
     80 Bits
     81 if(exception) {
     82     15..4   unsigned exception index
     83 } else {
     84     if(not uncased) {
     85         15..6   signed delta to simple case mapping code point
     86                 (add delta to input code point)
     87     } else {
     88             6   the code point is case-ignorable
     89                 (U+0307 is also case-ignorable but has an exception)
     90     }
     91      5..4   0 normal character with cc=0
     92             1 soft-dotted character
     93             2 cc=230
     94             3 other cc
     95 }
     96     3   exception
     97     2   case sensitive
     98  1..0   0 uncased
     99         1 lowercase
    100         2 uppercase
    101         3 titlecase
    102 
    103 
    104 Exceptions:
    105 A sub-array of the exceptions array is indexed by the exception index in a
    106 trie word.
    107 The sub-array consists of the following fields:
    108     uint16_t excWord;
    109     uint16_t optional values [];
    110     UTF-16 strings for full (string) mappings for lowercase, case folding, uppercase, titlecase
    111 
    112 excWord: (see UCASE_EXC_...)
    113 Bits
    114     15  conditional case folding
    115     14  conditional special casing
    116 13..12  same as non-exception trie data bits 5..4
    117         moved here because the exception index needs more bits than the delta
    118         0 normal character with cc=0
    119         1 soft-dotted character
    120         2 cc=230
    121         3 other cc
    122 11      case-ignorable (used when the character is cased or has another exception)
    123         (new in formatVersion 1.2/ICU 4.4)
    124 10.. 9  reserved
    125      8  if set, then for each optional-value slot there are 2 uint16_t values
    126         (high and low parts of 32-bit values)
    127         instead of single ones
    128  7.. 0  bits for which optional value is present
    129 
    130 Optional-value slots:
    131 0   lowercase mapping (code point)
    132 1   case folding (code point)
    133 2   uppercase mapping (code point)
    134 3   titlecase mapping (code point)
    135 4   reserved
    136 5   reserved
    137 6   closure mappings (new in format version 1.1)
    138 7   there is at least one full (string) case mapping
    139     the length of each is encoded in a nibble of this optional value,
    140     and the strings follow this optional value in the same order:
    141     lower/fold/upper/title
    142 
    143 The optional closure mappings value is used as follows:
    144 Bits 0..3 contain the length of a string of code points for case closure.
    145 The string immediately follows the full case mappings, or the closure value
    146 slot if there are no full case mappings.
    147 Bits 4..15 are reserved and could be used in the future to indicate the
    148 number of strings for case closure.
    149 Complete case closure for a code point is given by the union of all simple
    150 and full case mappings and foldings, plus the case closure code points
    151 (and potentially, in the future, case closure strings).
    152 
    153 For space saving, some values are not stored. Lookups are as follows:
    154 - If special casing is conditional, then no full lower/upper/title mapping
    155   strings are stored.
    156 - If case folding is conditional, then no simple or full case foldings are
    157   stored.
    158 - Fall back in this order:
    159     full (string) mapping -- if full mappings are used
    160     simple (code point) mapping of the same type
    161     simple fold->simple lower
    162     simple title->simple upper
    163     finally, the original code point (no mapping)
    164 
    165 This fallback order is strict:
    166 In particular, the fallback from full case folding is to simple case folding,
    167 not to full lowercase mapping.
    168 
    169 Reverse case folding data ("unfold") array: (new in format version 1.1)
    170 
    171 This array stores some miscellaneous values followed by a table. The data maps
    172 back from multi-character strings to their original code points, for use
    173 in case closure.
    174 
    175 The table contains two columns of strings.
    176 The string in the first column is the case folding of each of the code points
    177 in the second column. The strings are terminated with NUL or by the end of the
    178 column, whichever comes first.
    179 
    180 The miscellaneous data takes up one pseudo-row and includes:
    181 - number of rows
    182 - number of UChars per row
    183 - number of UChars in the left (folding string) column
    184 
    185 The table is sorted by its first column. Values in the first column are unique.
    186 
    187 ----------------------------------------------------------------------------- */
    188 
    189 /* UDataInfo cf. udata.h */
    190 static UDataInfo dataInfo={
    191     sizeof(UDataInfo),
    192     0,
    193 
    194     U_IS_BIG_ENDIAN,
    195     U_CHARSET_FAMILY,
    196     U_SIZEOF_UCHAR,
    197     0,
    198 
    199     /* dataFormat="cAsE" */
    200     { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
    201     { 1, 1, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
    202     { 4, 0, 1, 0 }                              /* dataVersion */
    203 };
    204 
    205 enum {
    206     /* maximum number of exceptions expected */
    207     MAX_EXC_COUNT=1000
    208 };
    209 
    210 /* exceptions values */
    211 static uint16_t exceptions[UCASE_MAX_EXCEPTIONS+100];
    212 static uint16_t exceptionsTop=0;
    213 static Props excProps[MAX_EXC_COUNT];
    214 static uint16_t exceptionsCount=0;
    215 
    216 /* becomes indexes[UCASE_IX_MAX_FULL_LENGTH] */
    217 static int32_t maxFullLength=U16_MAX_LENGTH;
    218 
    219 /* reverse case folding ("unfold") data */
    220 static UChar unfold[UGENCASE_UNFOLD_MAX_ROWS*UGENCASE_UNFOLD_WIDTH]={
    221     0, UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH, 0, 0
    222 };
    223 static uint16_t unfoldRows=0;
    224 static uint16_t unfoldTop=UGENCASE_UNFOLD_WIDTH;
    225 
    226 /* Unicode versions --------------------------------------------------------- */
    227 
    228 static const UVersionInfo
    229 unicodeVersions[]={
    230     { 1, 0, 0, 0 },
    231     { 1, 1, 0, 0 },
    232     { 2, 0, 0, 0 },
    233     { 3, 0, 0, 0 },
    234     { 3, 1, 0, 0 },
    235     { 3, 2, 0, 0 },
    236     { 4, 0, 0, 0 },
    237     { 4, 0, 1, 0 },
    238     { 4, 1, 0, 0 },
    239     { 5, 1, 0, 0 },
    240     { 5, 2, 0, 0 },
    241     { 6, 0, 0, 0 }
    242 };
    243 
    244 int32_t ucdVersion=UNI_4_1;
    245 
    246 static int32_t
    247 findUnicodeVersion(const UVersionInfo version) {
    248     int32_t i;
    249 
    250     for(i=0; /* while(version>unicodeVersions[i]) {} */
    251         i<UNI_VER_COUNT && uprv_memcmp(version, unicodeVersions[i], 4)>0;
    252         ++i) {}
    253     if(0<i && i<UNI_VER_COUNT && uprv_memcmp(version, unicodeVersions[i], 4)<0) {
    254         --i; /* fix 4.0.2 to land before 4.1, for valid x>=ucdVersion comparisons */
    255     }
    256     return i; /* version>=unicodeVersions[i] && version<unicodeVersions[i+1]; possible: i==UNI_VER_COUNT */
    257 }
    258 
    259 extern void
    260 setUnicodeVersion(const char *v) {
    261     UVersionInfo version;
    262     u_versionFromString(version, v);
    263     uprv_memcpy(dataInfo.dataVersion, version, 4);
    264     ucdVersion=findUnicodeVersion(version);
    265 }
    266 
    267 /* -------------------------------------------------------------------------- */
    268 
    269 static void
    270 addUnfolding(UChar32 c, const UChar *s, int32_t length) {
    271     int32_t i;
    272 
    273     if(length>UGENCASE_UNFOLD_STRING_WIDTH) {
    274         fprintf(stderr, "gencase error: case folding too long (length=%ld>%d=UGENCASE_UNFOLD_STRING_WIDTH)\n",
    275                 (long)length, UGENCASE_UNFOLD_STRING_WIDTH);
    276         exit(U_INTERNAL_PROGRAM_ERROR);
    277     }
    278     if(unfoldTop >= (LENGTHOF(unfold) - UGENCASE_UNFOLD_STRING_WIDTH)) {
    279         fprintf(stderr, "gencase error: too many multi-character case foldings\n");
    280         exit(U_BUFFER_OVERFLOW_ERROR);
    281     }
    282     u_memset(unfold+unfoldTop, 0, UGENCASE_UNFOLD_WIDTH);
    283     u_memcpy(unfold+unfoldTop, s, length);
    284 
    285     i=unfoldTop+UGENCASE_UNFOLD_STRING_WIDTH;
    286     U16_APPEND_UNSAFE(unfold, i, c);
    287 
    288     ++unfoldRows;
    289     unfoldTop+=UGENCASE_UNFOLD_WIDTH;
    290 }
    291 
    292 /* store a character's properties ------------------------------------------- */
    293 
    294 extern void
    295 setProps(Props *p) {
    296     UErrorCode errorCode;
    297     uint32_t value, oldValue;
    298     int32_t delta;
    299 
    300     /* get the non-UnicodeData.txt properties */
    301     value=oldValue=upvec_getValue(pv, p->code, 0);
    302 
    303     /* default: map to self */
    304     delta=0;
    305 
    306     if(p->gc==U_TITLECASE_LETTER) {
    307         /* the Titlecase property is read late, from UnicodeData.txt */
    308         value|=UCASE_TITLE;
    309     }
    310 
    311     if(p->upperCase!=0) {
    312         /* uppercase mapping as delta if the character is lowercase */
    313         if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
    314             delta=p->upperCase-p->code;
    315         } else {
    316             value|=UCASE_EXCEPTION;
    317         }
    318     }
    319     if(p->lowerCase!=0) {
    320         /* lowercase mapping as delta if the character is uppercase or titlecase */
    321         if((value&UCASE_TYPE_MASK)>=UCASE_UPPER) {
    322             delta=p->lowerCase-p->code;
    323         } else {
    324             value|=UCASE_EXCEPTION;
    325         }
    326     }
    327     if(p->upperCase!=p->titleCase) {
    328         value|=UCASE_EXCEPTION;
    329     }
    330     if(p->closure[0]!=0) {
    331         value|=UCASE_EXCEPTION;
    332     }
    333     if(p->specialCasing!=NULL) {
    334         value|=UCASE_EXCEPTION;
    335     }
    336     if(p->caseFolding!=NULL) {
    337         value|=UCASE_EXCEPTION;
    338     }
    339 
    340     if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
    341         value|=UCASE_EXCEPTION;
    342     }
    343 
    344     if(p->cc!=0) {
    345         if(value&UCASE_DOT_MASK) {
    346             fprintf(stderr, "gencase: a soft-dotted character has cc!=0\n");
    347             exit(U_INTERNAL_PROGRAM_ERROR);
    348         }
    349         if(p->cc==230) {
    350             value|=UCASE_ABOVE;
    351         } else {
    352             value|=UCASE_OTHER_ACCENT;
    353         }
    354     }
    355 
    356     /*
    357      * Encode case-ignorable as delta==1 on uncased characters,
    358      * and with an exception bit on cased characters and characters with another exception.
    359      */
    360     if(ucdVersion>=UNI_4_1) {
    361         /*
    362          * Unicode 4.1 & 5.0: (D47a) Word_Break=MidLetter or Mn, Me, Cf, Lm, Sk
    363          * Unicode 5.1: Word_Break=(MidLetter or MidNumLet) or Mn, Me, Cf, Lm, Sk
    364          *   The UGENCASE_IS_MID_LETTER_SHIFT bit is set for both WB=MidLetter and WB=MidNumLet.
    365          * Unicode 5.2: The definition (Unicode Standard Definition D121) is unchanged,
    366          *   but now Case_Ignorable is a public property
    367          *   with its values listed in DerivedCoreProperties.txt.
    368          *   gencase.c parses those values as well, just in case the definition changes
    369          *   in the future. gencase.c sets the UGENCASE_IS_MID_LETTER_SHIFT bit
    370          *   for each Case_Ignorable entry. (It never resets that bit.)
    371          */
    372         if(
    373             (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
    374             (upvec_getValue(pv, p->code, 1)&U_MASK(UGENCASE_IS_MID_LETTER_SHIFT))!=0
    375         ) {
    376             p->isCaseIgnorable=TRUE;
    377         }
    378     } else {
    379         /* before Unicode 4.1: Mn, Me, Cf, Lm, Sk or 0027 or 00AD or 2019 */
    380         if(
    381             (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
    382             p->code==0x27 || p->code==0xad || p->code==0x2019
    383         ) {
    384             p->isCaseIgnorable=TRUE;
    385         }
    386     }
    387     if(p->isCaseIgnorable) {
    388         if((value&UCASE_TYPE_MASK)==UCASE_NONE) {
    389             /*
    390              * We use one of the delta/exception bits for
    391              * the case-ignorable flag for uncased characters.
    392              * There is no delta for uncased characters (see checks above).
    393              */
    394             delta=1;
    395         } else {
    396             /*
    397              * If the character is cased or has another exception,
    398              * then we store the case-ignorable flag as an exception bit.
    399              */
    400             value|=UCASE_EXCEPTION;
    401         }
    402     }
    403 
    404     /* handle exceptions */
    405     if(value&UCASE_EXCEPTION) {
    406         /* simply store exceptions for later processing and encoding */
    407         value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
    408         uprv_memcpy(excProps+exceptionsCount, p, sizeof(*p));
    409         if(++exceptionsCount==MAX_EXC_COUNT) {
    410             fprintf(stderr, "gencase: too many exceptions\n");
    411             exit(U_INDEX_OUTOFBOUNDS_ERROR);
    412         }
    413     } else {
    414         /* store the simple case mapping delta */
    415         value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK;
    416     }
    417 
    418     errorCode=U_ZERO_ERROR;
    419     if(value!=oldValue) {
    420         upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode);
    421         if(U_FAILURE(errorCode)) {
    422             fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
    423                             u_errorName(errorCode));
    424             exit(errorCode);
    425         }
    426     }
    427 
    428     /* add the multi-character case folding to the "unfold" data */
    429     if(p->caseFolding!=NULL) {
    430         int32_t length=p->caseFolding->full[0];
    431         if(length>1 && u_strHasMoreChar32Than(p->caseFolding->full+1, length, 1)) {
    432             addUnfolding(p->code, p->caseFolding->full+1, length);
    433         }
    434     }
    435 }
    436 
    437 extern void
    438 addCaseSensitive(UChar32 first, UChar32 last) {
    439     UErrorCode errorCode=U_ZERO_ERROR;
    440     upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode);
    441     if(U_FAILURE(errorCode)) {
    442         fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n",
    443                         u_errorName(errorCode));
    444         exit(errorCode);
    445     }
    446 }
    447 
    448 /* finalize reverse case folding ("unfold") data ---------------------------- */
    449 
    450 static int32_t U_CALLCONV
    451 compareUnfold(const void *context, const void *left, const void *right) {
    452     return u_memcmp((const UChar *)left, (const UChar *)right, UGENCASE_UNFOLD_WIDTH);
    453 }
    454 
    455 static void
    456 makeUnfoldData() {
    457     static const UChar
    458         iDot[2]=        { 0x69, 0x307 };
    459 
    460     UChar *p, *q;
    461     int32_t i, j, k;
    462     UErrorCode errorCode;
    463 
    464     /*
    465      * add a case folding that we missed because it's conditional:
    466      * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
    467      */
    468     addUnfolding(0x130, iDot, 2);
    469 
    470     /* sort the data */
    471     errorCode=U_ZERO_ERROR;
    472     uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
    473                    compareUnfold, NULL, FALSE, &errorCode);
    474 
    475     /* make unique-string rows by merging adjacent ones' code point columns */
    476 
    477     /* make p point to row i-1 */
    478     p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH;
    479 
    480     for(i=1; i<unfoldRows;) {
    481         if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
    482             /* concatenate code point columns */
    483             q=p+UGENCASE_UNFOLD_STRING_WIDTH;
    484             for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
    485             for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
    486                 q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
    487             }
    488             if(j>UGENCASE_UNFOLD_CP_WIDTH) {
    489                 fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
    490                         (long)j, UGENCASE_UNFOLD_CP_WIDTH);
    491                 exit(U_BUFFER_OVERFLOW_ERROR);
    492             }
    493 
    494             /* move following rows up one */
    495             --unfoldRows;
    496             unfoldTop-=UGENCASE_UNFOLD_WIDTH;
    497             u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
    498         } else {
    499             p+=UGENCASE_UNFOLD_WIDTH;
    500             ++i;
    501         }
    502     }
    503 
    504     unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;
    505 
    506     if(beVerbose) {
    507         puts("unfold data:");
    508 
    509         p=(UChar *)unfold;
    510         for(i=0; i<unfoldRows; ++i) {
    511             p+=UGENCASE_UNFOLD_WIDTH;
    512             printf("[%2d] %04x %04x %04x <- %04x %04x\n",
    513                    (int)i, p[0], p[1], p[2], p[3], p[4]);
    514         }
    515     }
    516 }
    517 
    518 /* case closure ------------------------------------------------------------- */
    519 
    520 static void
    521 addClosureMapping(UChar32 src, UChar32 dest) {
    522     uint32_t value;
    523 
    524     if(beVerbose) {
    525         printf("add closure mapping U+%04lx->U+%04lx\n",
    526                 (unsigned long)src, (unsigned long)dest);
    527     }
    528 
    529     value=upvec_getValue(pv, src, 0);
    530     if(value&UCASE_EXCEPTION) {
    531         Props *p=excProps+(value>>UGENCASE_EXC_SHIFT);
    532         int32_t i;
    533 
    534         /* append dest to src's closure array */
    535         for(i=0;; ++i) {
    536             if(i==LENGTHOF(p->closure)) {
    537                 fprintf(stderr, "closure[] overflow for U+%04lx->U+%04lx\n",
    538                                 (unsigned long)src, (unsigned long)dest);
    539                 exit(U_BUFFER_OVERFLOW_ERROR);
    540             } else if(p->closure[i]==dest) {
    541                 break; /* do not store duplicates */
    542             } else if(p->closure[i]==0) {
    543                 p->closure[i]=dest;
    544                 break;
    545             }
    546         }
    547     } else {
    548         Props p2={ 0 };
    549         UChar32 next;
    550         UErrorCode errorCode;
    551 
    552         /*
    553          * decode value into p2 (enough for makeException() to work properly),
    554          * add the closure mapping,
    555          * and set the new exception for src
    556          */
    557         p2.code=src;
    558         p2.closure[0]=dest;
    559 
    560         if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
    561             /* one simple case mapping, don't care which one */
    562             next=src+((int16_t)value>>UCASE_DELTA_SHIFT);
    563             if(next!=src) {
    564                 if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
    565                     p2.upperCase=p2.titleCase=next;
    566                 } else {
    567                     p2.lowerCase=next;
    568                 }
    569             }
    570         } else if(value&UCASE_DELTA_MASK) {
    571             fprintf(stderr, "gencase error: unable to add case closure exception to case-ignorable U+%04lx\n",
    572                             (unsigned long)src);
    573             exit(U_INTERNAL_PROGRAM_ERROR);
    574         }
    575 
    576         value&=~(UGENCASE_EXC_MASK|UCASE_DELTA_MASK); /* remove previous simple mapping */
    577         value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
    578         value|=UCASE_EXCEPTION;
    579         uprv_memcpy(excProps+exceptionsCount, &p2, sizeof(p2));
    580         if(++exceptionsCount==MAX_EXC_COUNT) {
    581             fprintf(stderr, "gencase: too many exceptions\n");
    582             exit(U_INDEX_OUTOFBOUNDS_ERROR);
    583         }
    584 
    585         errorCode=U_ZERO_ERROR;
    586         upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode);
    587         if(U_FAILURE(errorCode)) {
    588             fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
    589                             u_errorName(errorCode));
    590             exit(errorCode);
    591         }
    592     }
    593 }
    594 
    595 /*
    596  * Find missing case mapping relationships and add mappings for case closure.
    597  * This function starts from an "original" code point and recursively
    598  * finds its case mappings and the case mappings of where it maps to.
    599  *
    600  * The recursion depth is capped at 3 nested calls of this function.
    601  * In each call, the current code point is c, and the function enumerates
    602  * all of c's simple (single-code point) case mappings.
    603  * prev is the code point that case-mapped to c.
    604  * prev2 is the code point that case-mapped to prev.
    605  *
    606  * The initial function call has prev2<0, prev<0, and c==orig
    607  * (marking no code points).
    608  * It enumerates c's case mappings and recurses without further action.
    609  *
    610  * The second-level function call has prev2<0, prev==orig, and c is
    611  * the destination code point of one of prev's case mappings.
    612  * The function checks if any of c's case mappings go back to orig
    613  * and adds a closure mapping if not.
    614  * In other words, it turns a case mapping relationship of
    615  *   orig->c
    616  * into
    617  *   orig<->c
    618  *
    619  * The third-level function call has prev2==orig, prev>=0, and c is
    620  * the destination code point of one of prev's case mappings.
    621  * (And prev is the destination of one of prev2's case mappings.)
    622  * The function checks if any of c's case mappings go back to orig
    623  * and adds a closure mapping if not.
    624  * In other words, it turns case mapping relationships of
    625  *   orig->prev->c or orig->prev<->c
    626  * into
    627  *   orig->prev->c->orig or orig->prev<->c->orig
    628  * etc.
    629  * (Graphically, this closes a triangle.)
    630  *
    631  * With repeated application on all code points until no more closure mappings
    632  * are added, all case equivalence groups get complete mappings.
    633  * That is, in each group of code points with case relationships
    634  * each code point will in the end have some mapping to each other
    635  * code point in the group.
    636  *
    637  * @return TRUE if a closure mapping was added
    638  */
    639 static UBool
    640 addClosure(UChar32 orig, UChar32 prev2, UChar32 prev, UChar32 c, uint32_t value) {
    641     UChar32 next;
    642     UBool someMappingsAdded=FALSE;
    643 
    644     if(c!=orig) {
    645         /* get the properties for c */
    646         value=upvec_getValue(pv, c, 0);
    647     }
    648     /* else if c==orig then c's value was passed in */
    649 
    650     if(value&UCASE_EXCEPTION) {
    651         UChar32 set[32];
    652         int32_t i, count=0;
    653 
    654         Props *p=excProps+(value>>UGENCASE_EXC_SHIFT);
    655 
    656         /*
    657          * marker for whether any of c's mappings goes to orig
    658          * c==orig: prevent adding a closure mapping when getting orig's own, direct mappings
    659          */
    660         UBool mapsToOrig=(UBool)(c==orig);
    661 
    662         /* collect c's case mapping destinations in set[] */
    663         if((next=p->upperCase)!=0 && next!=c) {
    664             set[count++]=next;
    665         }
    666         if((next=p->lowerCase)!=0 && next!=c) {
    667             set[count++]=next;
    668         }
    669         if(p->upperCase!=(next=p->titleCase) && next!=c) {
    670             set[count++]=next;
    671         }
    672         if(p->caseFolding!=NULL && (next=p->caseFolding->simple)!=0 && next!=c) {
    673             set[count++]=next;
    674         }
    675 
    676         /* append c's current closure mappings to set[] */
    677         for(i=0; i<LENGTHOF(p->closure) && (next=p->closure[i])!=0; ++i) {
    678             set[count++]=next;
    679         }
    680 
    681         /* process all code points to which c case-maps */
    682         for(i=0; i<count; ++i) {
    683             next=set[i]; /* next!=c */
    684 
    685             if(next==orig) {
    686                 mapsToOrig=TRUE; /* remember that we map to orig */
    687             } else if(prev2<0 && next!=prev) {
    688                 /*
    689                  * recurse unless
    690                  * we have reached maximum depth (prev2>=0) or
    691                  * this is a mapping to one of the previous code points (orig, prev, c)
    692                  */
    693                 someMappingsAdded|=addClosure(orig, prev, c, next, 0);
    694             }
    695         }
    696 
    697         if(!mapsToOrig) {
    698             addClosureMapping(c, orig);
    699             return TRUE;
    700         }
    701     } else {
    702         if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
    703             /* one simple case mapping, don't care which one */
    704             next=c+((int16_t)value>>UCASE_DELTA_SHIFT);
    705             if(next!=c) {
    706                 /*
    707                  * recurse unless
    708                  * we have reached maximum depth (prev2>=0) or
    709                  * this is a mapping to one of the previous code points (orig, prev, c)
    710                  */
    711                 if(prev2<0 && next!=orig && next!=prev) {
    712                     someMappingsAdded|=addClosure(orig, prev, c, next, 0);
    713                 }
    714 
    715                 if(c!=orig && next!=orig) {
    716                     /* c does not map to orig, add a closure mapping c->orig */
    717                     addClosureMapping(c, orig);
    718                     return TRUE;
    719                 }
    720             }
    721         }
    722     }
    723 
    724     return someMappingsAdded;
    725 }
    726 
    727 extern void
    728 makeCaseClosure() {
    729     UChar *p;
    730     uint32_t *row;
    731     uint32_t value;
    732     UChar32 start, end, c, c2;
    733     int32_t i, j;
    734     UBool someMappingsAdded;
    735 
    736     /*
    737      * finalize the "unfold" data because we need to use it to add closure mappings
    738      * for situations like FB05->"st"<-FB06
    739      * where we would otherwise miss the FB05<->FB06 relationship
    740      */
    741     makeUnfoldData();
    742 
    743     /* use the "unfold" data to add mappings */
    744 
    745     /* p always points to the code points; this loop ignores the strings completely */
    746     p=unfold+UGENCASE_UNFOLD_WIDTH+UGENCASE_UNFOLD_STRING_WIDTH;
    747 
    748     for(i=0; i<unfoldRows; p+=UGENCASE_UNFOLD_WIDTH, ++i) {
    749         j=0;
    750         U16_NEXT_UNSAFE(p, j, c);
    751         while(j<UGENCASE_UNFOLD_CP_WIDTH && p[j]!=0) {
    752             U16_NEXT_UNSAFE(p, j, c2);
    753             addClosure(c, U_SENTINEL, c, c2, 0);
    754         }
    755     }
    756 
    757     if(beVerbose) {
    758         puts("---- ---- ---- ---- (done with closures from unfolding)");
    759     }
    760 
    761     /* add further closure mappings from analyzing simple mappings */
    762     do {
    763         someMappingsAdded=FALSE;
    764 
    765         i=0;
    766         while((row=upvec_getRow(pv, i, &start, &end))!=NULL && start<UPVEC_FIRST_SPECIAL_CP) {
    767             value=*row;
    768             if(value!=0) {
    769                 while(start<=end) {
    770                     if(addClosure(start, U_SENTINEL, U_SENTINEL, start, value)) {
    771                         someMappingsAdded=TRUE;
    772 
    773                         /*
    774                          * stop this loop because pv was changed and row is not valid any more
    775                          * skip all rows below the current start
    776                          */
    777                         while((row=upvec_getRow(pv, i, NULL, &end))!=NULL && start>end) {
    778                             ++i;
    779                         }
    780                         row=NULL; /* signal to continue with outer loop, without further ++i */
    781                         break;
    782                     }
    783                     ++start;
    784                 }
    785                 if(row==NULL) {
    786                     continue; /* see row=NULL above */
    787                 }
    788             }
    789             ++i;
    790         }
    791 
    792         if(beVerbose && someMappingsAdded) {
    793             puts("---- ---- ---- ----");
    794         }
    795     } while(someMappingsAdded);
    796 }
    797 
    798 /* exceptions --------------------------------------------------------------- */
    799 
    800 /* get the string length from zero-terminated code points in a limited-length array */
    801 static int32_t
    802 getLengthOfCodePoints(const UChar32 *s, int32_t maxLength) {
    803     int32_t i, length;
    804 
    805     for(i=length=0; i<maxLength && s[i]!=0; ++i) {
    806         length+=U16_LENGTH(s[i]);
    807     }
    808     return length;
    809 }
    810 
    811 static UBool
    812 fullMappingEqualsSimple(const UChar *s, UChar32 simple, UChar32 c) {
    813     int32_t i, length;
    814     UChar32 full;
    815 
    816     length=*s++;
    817     if(length==0 || length>U16_MAX_LENGTH) {
    818         return FALSE;
    819     }
    820     i=0;
    821     U16_NEXT(s, i, length, full);
    822 
    823     if(simple==0) {
    824         simple=c; /* UCD has no simple mapping if it's the same as the code point itself */
    825     }
    826     return (UBool)(i==length && full==simple);
    827 }
    828 
    829 static uint16_t
    830 makeException(uint32_t value, Props *p) {
    831     uint32_t slots[8];
    832     uint32_t slotBits;
    833     uint16_t excWord, i, count, length, fullLengths;
    834     UBool doubleSlots;
    835 
    836     /* exceptionsTop might be returned for storing in the trie word */
    837     if(exceptionsTop>=UCASE_MAX_EXCEPTIONS) {
    838         fprintf(stderr, "gencase error: too many exceptions words\n");
    839         exit(U_BUFFER_OVERFLOW_ERROR);
    840     }
    841 
    842     /* copy and shift the soft-dotted bits */
    843     excWord=((uint16_t)value&UCASE_DOT_MASK)<<UCASE_EXC_DOT_SHIFT;
    844 
    845     if(p->isCaseIgnorable) {
    846         excWord|=UCASE_EXC_CASE_IGNORABLE;
    847     }
    848 
    849     /* update maxFullLength */
    850     if(p->specialCasing!=NULL) {
    851         length=p->specialCasing->lowerCase[0];
    852         if(length>maxFullLength) {
    853             maxFullLength=length;
    854         }
    855         length=p->specialCasing->upperCase[0];
    856         if(length>maxFullLength) {
    857             maxFullLength=length;
    858         }
    859         length=p->specialCasing->titleCase[0];
    860         if(length>maxFullLength) {
    861             maxFullLength=length;
    862         }
    863     }
    864     if(p->caseFolding!=NULL) {
    865         length=p->caseFolding->full[0];
    866         if(length>maxFullLength) {
    867             maxFullLength=length;
    868         }
    869     }
    870 
    871     /* set the bits for conditional mappings */
    872     if(p->specialCasing!=NULL && p->specialCasing->isComplex) {
    873         excWord|=UCASE_EXC_CONDITIONAL_SPECIAL;
    874         p->specialCasing=NULL;
    875     }
    876     if(p->caseFolding!=NULL && p->caseFolding->simple==0 && p->caseFolding->full[0]==0) {
    877         excWord|=UCASE_EXC_CONDITIONAL_FOLD;
    878         p->caseFolding=NULL;
    879     }
    880 
    881     /*
    882      * Note:
    883      * UCD stores no simple mappings when they are the same as the code point itself.
    884      * SpecialCasing and CaseFolding do store simple mappings even if they are
    885      * the same as the code point itself.
    886      * Comparisons between simple regular mappings and simple special/folding
    887      * mappings need to compensate for the difference by comparing with the
    888      * original code point if a simple UCD mapping is missing (0).
    889      */
    890 
    891     /* remove redundant data */
    892     if(p->specialCasing!=NULL) {
    893         /* do not store full mappings if they are the same as the simple ones */
    894         if(fullMappingEqualsSimple(p->specialCasing->lowerCase, p->lowerCase, p->code)) {
    895             p->specialCasing->lowerCase[0]=0;
    896         }
    897         if(fullMappingEqualsSimple(p->specialCasing->upperCase, p->upperCase, p->code)) {
    898             p->specialCasing->upperCase[0]=0;
    899         }
    900         if(fullMappingEqualsSimple(p->specialCasing->titleCase, p->titleCase, p->code)) {
    901             p->specialCasing->titleCase[0]=0;
    902         }
    903     }
    904     if( p->caseFolding!=NULL &&
    905         fullMappingEqualsSimple(p->caseFolding->full, p->caseFolding->simple, p->code)
    906     ) {
    907         p->caseFolding->full[0]=0;
    908     }
    909 
    910     /* write the optional slots */
    911     slotBits=0;
    912     count=0;
    913 
    914     if(p->lowerCase!=0) {
    915         slots[count]=(uint32_t)p->lowerCase;
    916         slotBits|=slots[count];
    917         ++count;
    918         excWord|=U_MASK(UCASE_EXC_LOWER);
    919     }
    920     if( p->caseFolding!=NULL &&
    921         p->caseFolding->simple!=0 &&
    922         (p->lowerCase!=0 ?
    923             p->caseFolding->simple!=p->lowerCase :
    924             p->caseFolding->simple!=p->code)
    925     ) {
    926         slots[count]=(uint32_t)p->caseFolding->simple;
    927         slotBits|=slots[count];
    928         ++count;
    929         excWord|=U_MASK(UCASE_EXC_FOLD);
    930     }
    931     if(p->upperCase!=0) {
    932         slots[count]=(uint32_t)p->upperCase;
    933         slotBits|=slots[count];
    934         ++count;
    935         excWord|=U_MASK(UCASE_EXC_UPPER);
    936     }
    937     if(p->upperCase!=p->titleCase) {
    938         if(p->titleCase!=0) {
    939             slots[count]=(uint32_t)p->titleCase;
    940         } else {
    941             slots[count]=(uint32_t)p->code;
    942         }
    943         slotBits|=slots[count];
    944         ++count;
    945         excWord|=U_MASK(UCASE_EXC_TITLE);
    946     }
    947 
    948     /* length of case closure */
    949     if(p->closure[0]!=0) {
    950         length=getLengthOfCodePoints(p->closure, LENGTHOF(p->closure));
    951         slots[count]=(uint32_t)length; /* must be 1..UCASE_CLOSURE_MAX_LENGTH */
    952         slotBits|=slots[count];
    953         ++count;
    954         excWord|=U_MASK(UCASE_EXC_CLOSURE);
    955     }
    956 
    957     /* lengths of full case mapping strings, stored in the last slot */
    958     fullLengths=0;
    959     if(p->specialCasing!=NULL) {
    960         fullLengths=p->specialCasing->lowerCase[0];
    961         fullLengths|=p->specialCasing->upperCase[0]<<8;
    962         fullLengths|=p->specialCasing->titleCase[0]<<12;
    963     }
    964     if(p->caseFolding!=NULL) {
    965         fullLengths|=p->caseFolding->full[0]<<4;
    966     }
    967     if(fullLengths!=0) {
    968         slots[count]=fullLengths;
    969         slotBits|=slots[count];
    970         ++count;
    971         excWord|=U_MASK(UCASE_EXC_FULL_MAPPINGS);
    972     }
    973 
    974     if(count==0) {
    975         /* No optional slots: Try to share excWord entries. */
    976         uint16_t excIndex;
    977         for(excIndex=0; excIndex<exceptionsTop; ++excIndex) {
    978             if(excWord==exceptions[excIndex]) {
    979                 return excIndex;
    980             }
    981         }
    982         /* not found */
    983         ++exceptionsTop;
    984         exceptions[excIndex]=excWord;
    985         return excIndex;
    986     } else {
    987         /* write slots */
    988         uint16_t excIndex=exceptionsTop;
    989         uint16_t excTop=excIndex+1; /* +1 for excWord which will be stored at excIndex */
    990 
    991         doubleSlots=(UBool)(slotBits>0xffff);
    992         if(!doubleSlots) {
    993             for(i=0; i<count; ++i) {
    994                 exceptions[excTop++]=(uint16_t)slots[i];
    995             }
    996         } else {
    997             excWord|=UCASE_EXC_DOUBLE_SLOTS;
    998             for(i=0; i<count; ++i) {
    999                 exceptions[excTop++]=(uint16_t)(slots[i]>>16);
   1000                 exceptions[excTop++]=(uint16_t)slots[i];
   1001             }
   1002         }
   1003 
   1004         /* write the full case mapping strings */
   1005         if(p->specialCasing!=NULL) {
   1006             length=(uint16_t)p->specialCasing->lowerCase[0];
   1007             u_memcpy((UChar *)exceptions+excTop, p->specialCasing->lowerCase+1, length);
   1008             excTop+=length;
   1009         }
   1010         if(p->caseFolding!=NULL) {
   1011             length=(uint16_t)p->caseFolding->full[0];
   1012             u_memcpy((UChar *)exceptions+excTop, p->caseFolding->full+1, length);
   1013             excTop+=length;
   1014         }
   1015         if(p->specialCasing!=NULL) {
   1016             length=(uint16_t)p->specialCasing->upperCase[0];
   1017             u_memcpy((UChar *)exceptions+excTop, p->specialCasing->upperCase+1, length);
   1018             excTop+=length;
   1019 
   1020             length=(uint16_t)p->specialCasing->titleCase[0];
   1021             u_memcpy((UChar *)exceptions+excTop, p->specialCasing->titleCase+1, length);
   1022             excTop+=length;
   1023         }
   1024 
   1025         /* write the closure data */
   1026         if(p->closure[0]!=0) {
   1027             UChar32 c;
   1028 
   1029             for(i=0; i<LENGTHOF(p->closure) && (c=p->closure[i])!=0; ++i) {
   1030                 U16_APPEND_UNSAFE((UChar *)exceptions, excTop, c);
   1031             }
   1032         }
   1033 
   1034         exceptionsTop=excTop;
   1035 
   1036         /* write the main exceptions word */
   1037         exceptions[excIndex]=excWord;
   1038 
   1039         return excIndex;
   1040     }
   1041 }
   1042 
   1043 extern void
   1044 makeExceptions() {
   1045     uint32_t *row;
   1046     uint32_t value;
   1047     int32_t i;
   1048     uint16_t excIndex;
   1049 
   1050     i=0;
   1051     while((row=upvec_getRow(pv, i, NULL, NULL))!=NULL) {
   1052         value=*row;
   1053         if(value&UCASE_EXCEPTION) {
   1054             excIndex=makeException(value, excProps+(value>>UGENCASE_EXC_SHIFT));
   1055             *row=(value&~(UGENCASE_EXC_MASK|UCASE_EXC_MASK))|(excIndex<<UCASE_EXC_SHIFT);
   1056         }
   1057         ++i;
   1058     }
   1059 }
   1060 
   1061 /* generate output data ----------------------------------------------------- */
   1062 
   1063 extern void
   1064 generateData(const char *dataDir, UBool csource) {
   1065     static int32_t indexes[UCASE_IX_TOP]={
   1066         UCASE_IX_TOP
   1067     };
   1068     static uint8_t trieBlock[40000];
   1069 
   1070     const uint32_t *row;
   1071     UChar32 start, end;
   1072     int32_t i;
   1073 
   1074     UNewDataMemory *pData;
   1075     UNewTrie *pTrie;
   1076     UErrorCode errorCode=U_ZERO_ERROR;
   1077     int32_t trieSize;
   1078     long dataLength;
   1079 
   1080     pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
   1081     if(pTrie==NULL) {
   1082         fprintf(stderr, "gencase error: unable to create a UNewTrie\n");
   1083         exit(U_MEMORY_ALLOCATION_ERROR);
   1084     }
   1085 
   1086     for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL; ++i) {
   1087         if(start<UPVEC_FIRST_SPECIAL_CP && !utrie_setRange32(pTrie, start, end+1, *row, TRUE)) {
   1088             fprintf(stderr, "gencase error: unable to set trie value (overflow)\n");
   1089             exit(U_BUFFER_OVERFLOW_ERROR);
   1090         }
   1091     }
   1092 
   1093     trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
   1094     if(U_FAILURE(errorCode)) {
   1095         fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
   1096         exit(errorCode);
   1097     }
   1098 
   1099     indexes[UCASE_IX_EXC_LENGTH]=exceptionsTop;
   1100     indexes[UCASE_IX_TRIE_SIZE]=trieSize;
   1101     indexes[UCASE_IX_UNFOLD_LENGTH]=unfoldTop;
   1102     indexes[UCASE_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+2*exceptionsTop+2*unfoldTop;
   1103 
   1104     indexes[UCASE_IX_MAX_FULL_LENGTH]=maxFullLength;
   1105 
   1106     if(beVerbose) {
   1107         printf("trie size in bytes:                    %5d\n", (int)trieSize);
   1108         printf("number of code points with exceptions: %5d\n", exceptionsCount);
   1109         printf("size in bytes of exceptions:           %5d\n", 2*exceptionsTop);
   1110         printf("size in bytes of reverse foldings:     %5d\n", 2*unfoldTop);
   1111         printf("data size:                             %5d\n", (int)indexes[UCASE_IX_LENGTH]);
   1112     }
   1113 
   1114     if(csource) {
   1115         /* write .c file for hardcoded data */
   1116         UTrie trie={ NULL };
   1117         UTrie2 *trie2;
   1118         FILE *f;
   1119 
   1120         utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
   1121         if(U_FAILURE(errorCode)) {
   1122             fprintf(
   1123                 stderr,
   1124                 "gencase error: failed to utrie_unserialize(ucase.icu trie) - %s\n",
   1125                 u_errorName(errorCode));
   1126             exit(errorCode);
   1127         }
   1128 
   1129         /* use UTrie2 */
   1130         dataInfo.formatVersion[0]=2;
   1131         dataInfo.formatVersion[2]=0;
   1132         dataInfo.formatVersion[3]=0;
   1133         trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
   1134         if(U_FAILURE(errorCode)) {
   1135             fprintf(
   1136                 stderr,
   1137                 "gencase error: utrie2_fromUTrie() failed - %s\n",
   1138                 u_errorName(errorCode));
   1139             exit(errorCode);
   1140         }
   1141         {
   1142             /* delete lead surrogate code unit values */
   1143             UChar lead;
   1144             trie2=utrie2_cloneAsThawed(trie2, &errorCode);
   1145             for(lead=0xd800; lead<0xdc00; ++lead) {
   1146                 utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
   1147             }
   1148             utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
   1149             if(U_FAILURE(errorCode)) {
   1150                 fprintf(
   1151                     stderr,
   1152                     "gencase error: deleting lead surrogate code unit values failed - %s\n",
   1153                     u_errorName(errorCode));
   1154                 exit(errorCode);
   1155             }
   1156         }
   1157 
   1158         f=usrc_create(dataDir, "ucase_props_data.c");
   1159         if(f!=NULL) {
   1160             usrc_writeArray(f,
   1161                 "static const UVersionInfo ucase_props_dataVersion={",
   1162                 dataInfo.dataVersion, 8, 4,
   1163                 "};\n\n");
   1164             usrc_writeArray(f,
   1165                 "static const int32_t ucase_props_indexes[UCASE_IX_TOP]={",
   1166                 indexes, 32, UCASE_IX_TOP,
   1167                 "};\n\n");
   1168             usrc_writeUTrie2Arrays(f,
   1169                 "static const uint16_t ucase_props_trieIndex[%ld]={\n", NULL,
   1170                 trie2,
   1171                 "\n};\n\n");
   1172             usrc_writeArray(f,
   1173                 "static const uint16_t ucase_props_exceptions[%ld]={\n",
   1174                 exceptions, 16, exceptionsTop,
   1175                 "\n};\n\n");
   1176             usrc_writeArray(f,
   1177                 "static const uint16_t ucase_props_unfold[%ld]={\n",
   1178                 unfold, 16, unfoldTop,
   1179                 "\n};\n\n");
   1180             fputs(
   1181                 "static const UCaseProps ucase_props_singleton={\n"
   1182                 "  NULL,\n"
   1183                 "  ucase_props_indexes,\n"
   1184                 "  ucase_props_exceptions,\n"
   1185                 "  ucase_props_unfold,\n",
   1186                 f);
   1187             usrc_writeUTrie2Struct(f,
   1188                 "  {\n",
   1189                 trie2, "ucase_props_trieIndex", NULL,
   1190                 "  },\n");
   1191             usrc_writeArray(f, "  { ", dataInfo.formatVersion, 8, 4, " }\n");
   1192             fputs("};\n", f);
   1193             fclose(f);
   1194         }
   1195         utrie2_close(trie2);
   1196     } else {
   1197         /* write the data */
   1198         pData=udata_create(dataDir, UCASE_DATA_TYPE, UCASE_DATA_NAME, &dataInfo,
   1199                         haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
   1200         if(U_FAILURE(errorCode)) {
   1201             fprintf(stderr, "gencase: unable to create data memory, %s\n", u_errorName(errorCode));
   1202             exit(errorCode);
   1203         }
   1204 
   1205         udata_writeBlock(pData, indexes, sizeof(indexes));
   1206         udata_writeBlock(pData, trieBlock, trieSize);
   1207         udata_writeBlock(pData, exceptions, 2*exceptionsTop);
   1208         udata_writeBlock(pData, unfold, 2*unfoldTop);
   1209 
   1210         /* finish up */
   1211         dataLength=udata_finish(pData, &errorCode);
   1212         if(U_FAILURE(errorCode)) {
   1213             fprintf(stderr, "gencase: error %d writing the output file\n", errorCode);
   1214             exit(errorCode);
   1215         }
   1216 
   1217         if(dataLength!=indexes[UCASE_IX_LENGTH]) {
   1218             fprintf(stderr, "gencase: data length %ld != calculated size %d\n",
   1219                 dataLength, (int)indexes[UCASE_IX_LENGTH]);
   1220             exit(U_INTERNAL_PROGRAM_ERROR);
   1221         }
   1222     }
   1223 
   1224     utrie_close(pTrie);
   1225 }
   1226 
   1227 /*
   1228  * Hey, Emacs, please set the following:
   1229  *
   1230  * Local Variables:
   1231  * indent-tabs-mode: nil
   1232  * End:
   1233  *
   1234  */
   1235