Home | History | Annotate | Download | only in gencase
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2004-2008, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  store.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004aug28
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Store Unicode case mapping properties efficiently for
     17 *   random access.
     18 */
     19 
     20 #include <stdio.h>
     21 #include <stdlib.h>
     22 #include "unicode/utypes.h"
     23 #include "unicode/uchar.h"
     24 #include "unicode/ustring.h"
     25 #include "cmemory.h"
     26 #include "cstring.h"
     27 #include "filestrm.h"
     28 #include "utrie.h"
     29 #include "utrie2.h"
     30 #include "uarrsort.h"
     31 #include "unicode/udata.h"
     32 #include "unewdata.h"
     33 #include "propsvec.h"
     34 #include "writesrc.h"
     35 #include "gencase.h"
     36 
     37 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
     38 
     39 /* Unicode case mapping properties file format ---------------------------------
     40 
     41 The file format prepared and written here contains several data
     42 structures that store indexes or data.
     43 
     44 Before the data contents described below, there are the headers required by
     45 the udata API for loading ICU data. Especially, a UDataInfo structure
     46 precedes the actual data. It contains platform properties values and the
     47 file format version.
     48 
     49 The following is a description of format version 1.1 .
     50 
     51 Format version 1.1 adds data for case closure.
     52 
     53 The file contains the following structures:
     54 
     55     const int32_t indexes[i0] with values i0, i1, ...:
     56     (see UCASE_IX_... constants for names of indexes)
     57 
     58     i0 indexLength; -- length of indexes[] (UCASE_IX_TOP)
     59     i1 dataLength; -- length in bytes of the post-header data (incl. indexes[])
     60     i2 trieSize; -- size in bytes of the case mapping properties trie
     61     i3 exceptionsLength; -- length in uint16_t of the exceptions array
     62     i4 unfoldLength; -- length in uint16_t of the reverse-folding array (new in format version 1.1)
     63 
     64     i5..i14 reservedIndexes; -- reserved values; 0 for now
     65 
     66     i15 maxFullLength; -- maximum length of a full case mapping/folding string
     67 
     68 
     69     Serialized trie, see utrie.h;
     70 
     71     const uint16_t exceptions[exceptionsLength];
     72 
     73     const UChar unfold[unfoldLength];
     74 
     75 
     76 Trie data word:
     77 Bits
     78 if(exception) {
     79     15..4   unsigned exception index
     80 } else {
     81     if(not uncased) {
     82         15..6   signed delta to simple case mapping code point
     83                 (add delta to input code point)
     84     } else {
     85             6   the code point is case-ignorable
     86                 (U+0307 is also case-ignorable but has an exception)
     87     }
     88      5..4   0 normal character with cc=0
     89             1 soft-dotted character
     90             2 cc=230
     91             3 other cc
     92 }
     93     3   exception
     94     2   case sensitive
     95  1..0   0 uncased
     96         1 lowercase
     97         2 uppercase
     98         3 titlecase
     99 
    100 
    101 Exceptions:
    102 A sub-array of the exceptions array is indexed by the exception index in a
    103 trie word.
    104 The sub-array consists of the following fields:
    105     uint16_t excWord;
    106     uint16_t optional values [];
    107     UTF-16 strings for full (string) mappings for lowercase, case folding, uppercase, titlecase
    108 
    109 excWord: (see UCASE_EXC_...)
    110 Bits
    111     15  conditional case folding
    112     14  conditional special casing
    113 13..12  same as non-exception trie data bits 5..4
    114         moved here because the exception index needs more bits than the delta
    115         0 normal character with cc=0
    116         1 soft-dotted character
    117         2 cc=230
    118         3 other cc
    119 11.. 9  reserved
    120      8  if set, then for each optional-value slot there are 2 uint16_t values
    121         (high and low parts of 32-bit values)
    122         instead of single ones
    123  7.. 0  bits for which optional value is present
    124 
    125 Optional-value slots:
    126 0   lowercase mapping (code point)
    127 1   case folding (code point)
    128 2   uppercase mapping (code point)
    129 3   titlecase mapping (code point)
    130 4   reserved
    131 5   reserved
    132 6   closure mappings (new in format version 1.1)
    133 7   there is at least one full (string) case mapping
    134     the length of each is encoded in a nibble of this optional value,
    135     and the strings follow this optional value in the same order:
    136     lower/fold/upper/title
    137 
    138 The optional closure mappings value is used as follows:
    139 Bits 0..3 contain the length of a string of code points for case closure.
    140 The string immediately follows the full case mappings, or the closure value
    141 slot if there are no full case mappings.
    142 Bits 4..15 are reserved and could be used in the future to indicate the
    143 number of strings for case closure.
    144 Complete case closure for a code point is given by the union of all simple
    145 and full case mappings and foldings, plus the case closure code points
    146 (and potentially, in the future, case closure strings).
    147 
    148 For space saving, some values are not stored. Lookups are as follows:
    149 - If special casing is conditional, then no full lower/upper/title mapping
    150   strings are stored.
    151 - If case folding is conditional, then no simple or full case foldings are
    152   stored.
    153 - Fall back in this order:
    154     full (string) mapping -- if full mappings are used
    155     simple (code point) mapping of the same type
    156     simple fold->simple lower
    157     simple title->simple upper
    158     finally, the original code point (no mapping)
    159 
    160 This fallback order is strict:
    161 In particular, the fallback from full case folding is to simple case folding,
    162 not to full lowercase mapping.
    163 
    164 Reverse case folding data ("unfold") array: (new in format version 1.1)
    165 
    166 This array stores some miscellaneous values followed by a table. The data maps
    167 back from multi-character strings to their original code points, for use
    168 in case closure.
    169 
    170 The table contains two columns of strings.
    171 The string in the first column is the case folding of each of the code points
    172 in the second column. The strings are terminated with NUL or by the end of the
    173 column, whichever comes first.
    174 
    175 The miscellaneous data takes up one pseudo-row and includes:
    176 - number of rows
    177 - number of UChars per row
    178 - number of UChars in the left (folding string) column
    179 
    180 The table is sorted by its first column. Values in the first column are unique.
    181 
    182 ----------------------------------------------------------------------------- */
    183 
    184 /* UDataInfo cf. udata.h */
    185 static UDataInfo dataInfo={
    186     sizeof(UDataInfo),
    187     0,
    188 
    189     U_IS_BIG_ENDIAN,
    190     U_CHARSET_FAMILY,
    191     U_SIZEOF_UCHAR,
    192     0,
    193 
    194     /* dataFormat="cAsE" */
    195     { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
    196     { 1, 1, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
    197     { 4, 0, 1, 0 }                              /* dataVersion */
    198 };
    199 
    200 enum {
    201     /* maximum number of exceptions expected */
    202     MAX_EXC_COUNT=1000
    203 };
    204 
    205 /* exceptions values */
    206 static uint16_t exceptions[UCASE_MAX_EXCEPTIONS+100];
    207 static uint16_t exceptionsTop=0;
    208 static Props excProps[MAX_EXC_COUNT];
    209 static uint16_t exceptionsCount=0;
    210 
    211 /* becomes indexes[UCASE_IX_MAX_FULL_LENGTH] */
    212 static int32_t maxFullLength=U16_MAX_LENGTH;
    213 
    214 /* reverse case folding ("unfold") data */
    215 static UChar unfold[UGENCASE_UNFOLD_MAX_ROWS*UGENCASE_UNFOLD_WIDTH]={
    216     0, UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH, 0, 0
    217 };
    218 static uint16_t unfoldRows=0;
    219 static uint16_t unfoldTop=UGENCASE_UNFOLD_WIDTH;
    220 
    221 /* Unicode versions --------------------------------------------------------- */
    222 
    223 static const UVersionInfo
    224 unicodeVersions[]={
    225     { 1, 0, 0, 0 },
    226     { 1, 1, 0, 0 },
    227     { 2, 0, 0, 0 },
    228     { 3, 0, 0, 0 },
    229     { 3, 1, 0, 0 },
    230     { 3, 2, 0, 0 },
    231     { 4, 0, 0, 0 },
    232     { 4, 0, 1, 0 },
    233     { 4, 1, 0, 0 }
    234 };
    235 
    236 int32_t ucdVersion=UNI_4_1;
    237 
    238 static int32_t
    239 findUnicodeVersion(const UVersionInfo version) {
    240     int32_t i;
    241 
    242     for(i=0; /* while(version>unicodeVersions[i]) {} */
    243         i<UNI_VER_COUNT && uprv_memcmp(version, unicodeVersions[i], 4)>0;
    244         ++i) {}
    245     if(0<i && i<UNI_VER_COUNT && uprv_memcmp(version, unicodeVersions[i], 4)<0) {
    246         --i; /* fix 4.0.2 to land before 4.1, for valid x>=ucdVersion comparisons */
    247     }
    248     return i; /* version>=unicodeVersions[i] && version<unicodeVersions[i+1]; possible: i==UNI_VER_COUNT */
    249 }
    250 
    251 extern void
    252 setUnicodeVersion(const char *v) {
    253     UVersionInfo version;
    254     u_versionFromString(version, v);
    255     uprv_memcpy(dataInfo.dataVersion, version, 4);
    256     ucdVersion=findUnicodeVersion(version);
    257 }
    258 
    259 /* -------------------------------------------------------------------------- */
    260 
    261 static void
    262 addUnfolding(UChar32 c, const UChar *s, int32_t length) {
    263     int32_t i;
    264 
    265     if(length>UGENCASE_UNFOLD_STRING_WIDTH) {
    266         fprintf(stderr, "gencase error: case folding too long (length=%ld>%d=UGENCASE_UNFOLD_STRING_WIDTH)\n",
    267                 (long)length, UGENCASE_UNFOLD_STRING_WIDTH);
    268         exit(U_INTERNAL_PROGRAM_ERROR);
    269     }
    270     if(unfoldTop >= (LENGTHOF(unfold) - UGENCASE_UNFOLD_STRING_WIDTH)) {
    271         fprintf(stderr, "gencase error: too many multi-character case foldings\n");
    272         exit(U_BUFFER_OVERFLOW_ERROR);
    273     }
    274     u_memset(unfold+unfoldTop, 0, UGENCASE_UNFOLD_WIDTH);
    275     u_memcpy(unfold+unfoldTop, s, length);
    276 
    277     i=unfoldTop+UGENCASE_UNFOLD_STRING_WIDTH;
    278     U16_APPEND_UNSAFE(unfold, i, c);
    279 
    280     ++unfoldRows;
    281     unfoldTop+=UGENCASE_UNFOLD_WIDTH;
    282 }
    283 
    284 /* store a character's properties ------------------------------------------- */
    285 
    286 extern void
    287 setProps(Props *p) {
    288     UErrorCode errorCode;
    289     uint32_t value, oldValue;
    290     int32_t delta;
    291     UBool isCaseIgnorable;
    292 
    293     /* get the non-UnicodeData.txt properties */
    294     value=oldValue=upvec_getValue(pv, p->code, 0);
    295 
    296     /* default: map to self */
    297     delta=0;
    298 
    299     if(p->gc==U_TITLECASE_LETTER) {
    300         /* the Titlecase property is read late, from UnicodeData.txt */
    301         value|=UCASE_TITLE;
    302     }
    303 
    304     if(p->upperCase!=0) {
    305         /* uppercase mapping as delta if the character is lowercase */
    306         if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
    307             delta=p->upperCase-p->code;
    308         } else {
    309             value|=UCASE_EXCEPTION;
    310         }
    311     }
    312     if(p->lowerCase!=0) {
    313         /* lowercase mapping as delta if the character is uppercase or titlecase */
    314         if((value&UCASE_TYPE_MASK)>=UCASE_UPPER) {
    315             delta=p->lowerCase-p->code;
    316         } else {
    317             value|=UCASE_EXCEPTION;
    318         }
    319     }
    320     if(p->upperCase!=p->titleCase) {
    321         value|=UCASE_EXCEPTION;
    322     }
    323     if(p->closure[0]!=0) {
    324         value|=UCASE_EXCEPTION;
    325     }
    326     if(p->specialCasing!=NULL) {
    327         value|=UCASE_EXCEPTION;
    328     }
    329     if(p->caseFolding!=NULL) {
    330         value|=UCASE_EXCEPTION;
    331     }
    332 
    333     if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
    334         value|=UCASE_EXCEPTION;
    335     }
    336 
    337     if(p->cc!=0) {
    338         if(value&UCASE_DOT_MASK) {
    339             fprintf(stderr, "gencase: a soft-dotted character has cc!=0\n");
    340             exit(U_INTERNAL_PROGRAM_ERROR);
    341         }
    342         if(p->cc==230) {
    343             value|=UCASE_ABOVE;
    344         } else {
    345             value|=UCASE_OTHER_ACCENT;
    346         }
    347     }
    348 
    349     /* encode case-ignorable as delta==1 on uncased characters */
    350     isCaseIgnorable=FALSE;
    351     if((value&UCASE_TYPE_MASK)==UCASE_NONE) {
    352         if(ucdVersion>=UNI_4_1) {
    353             /*
    354              * Unicode 4.1 and up: (D47a) Word_Break=MidLetter or Mn, Me, Cf, Lm, Sk
    355              * Unicode 5.1 and up: Word_Break=(MidLetter or MidNumLet) or Mn, Me, Cf, Lm, Sk
    356              *   The UGENCASE_IS_MID_LETTER_SHIFT bit is set for both WB=MidLetter and WB=MidNumLet.
    357              */
    358             if(
    359                 (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
    360                 (upvec_getValue(pv, p->code, 1)&U_MASK(UGENCASE_IS_MID_LETTER_SHIFT))!=0
    361             ) {
    362                 isCaseIgnorable=TRUE;
    363             }
    364         } else {
    365             /* before Unicode 4.1: Mn, Me, Cf, Lm, Sk or 0027 or 00AD or 2019 */
    366             if(
    367                 (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
    368                 p->code==0x27 || p->code==0xad || p->code==0x2019
    369             ) {
    370                 isCaseIgnorable=TRUE;
    371             }
    372         }
    373     }
    374 
    375     if(isCaseIgnorable && p->code!=0x307) {
    376         /*
    377          * We use one of the delta/exception bits, which works because we only
    378          * store the case-ignorable flag for uncased characters.
    379          * There is no delta for uncased characters (see checks above).
    380          * If there is an exception for an uncased, case-ignorable character
    381          * (although there should not be any case mappings if it's uncased)
    382          * then we have a problem.
    383          * There is one character which is case-ignorable but has an exception:
    384          * U+0307 is uncased, Mn, has conditional special casing and
    385          * is therefore handled in code instead.
    386          */
    387         if(value&UCASE_EXCEPTION) {
    388             fprintf(stderr, "gencase error: unable to encode case-ignorable for U+%04lx with exceptions\n",
    389                             (unsigned long)p->code);
    390             exit(U_INTERNAL_PROGRAM_ERROR);
    391         }
    392 
    393         delta=1;
    394     }
    395 
    396     /* handle exceptions */
    397     if(value&UCASE_EXCEPTION) {
    398         /* simply store exceptions for later processing and encoding */
    399         value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
    400         uprv_memcpy(excProps+exceptionsCount, p, sizeof(*p));
    401         if(++exceptionsCount==MAX_EXC_COUNT) {
    402             fprintf(stderr, "gencase: too many exceptions\n");
    403             exit(U_INDEX_OUTOFBOUNDS_ERROR);
    404         }
    405     } else {
    406         /* store the simple case mapping delta */
    407         value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK;
    408     }
    409 
    410     errorCode=U_ZERO_ERROR;
    411     if(value!=oldValue) {
    412         upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode);
    413         if(U_FAILURE(errorCode)) {
    414             fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
    415                             u_errorName(errorCode));
    416             exit(errorCode);
    417         }
    418     }
    419 
    420     /* add the multi-character case folding to the "unfold" data */
    421     if(p->caseFolding!=NULL) {
    422         int32_t length=p->caseFolding->full[0];
    423         if(length>1 && u_strHasMoreChar32Than(p->caseFolding->full+1, length, 1)) {
    424             addUnfolding(p->code, p->caseFolding->full+1, length);
    425         }
    426     }
    427 }
    428 
    429 extern void
    430 addCaseSensitive(UChar32 first, UChar32 last) {
    431     UErrorCode errorCode=U_ZERO_ERROR;
    432     upvec_setValue(pv, first, last, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode);
    433     if(U_FAILURE(errorCode)) {
    434         fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n",
    435                         u_errorName(errorCode));
    436         exit(errorCode);
    437     }
    438 }
    439 
    440 /* finalize reverse case folding ("unfold") data ---------------------------- */
    441 
    442 static int32_t U_CALLCONV
    443 compareUnfold(const void *context, const void *left, const void *right) {
    444     return u_memcmp((const UChar *)left, (const UChar *)right, UGENCASE_UNFOLD_WIDTH);
    445 }
    446 
    447 static void
    448 makeUnfoldData() {
    449     static const UChar
    450         iDot[2]=        { 0x69, 0x307 };
    451 
    452     UChar *p, *q;
    453     int32_t i, j, k;
    454     UErrorCode errorCode;
    455 
    456     /*
    457      * add a case folding that we missed because it's conditional:
    458      * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
    459      */
    460     addUnfolding(0x130, iDot, 2);
    461 
    462     /* sort the data */
    463     errorCode=U_ZERO_ERROR;
    464     uprv_sortArray(unfold+UGENCASE_UNFOLD_WIDTH, unfoldRows, UGENCASE_UNFOLD_WIDTH*2,
    465                    compareUnfold, NULL, FALSE, &errorCode);
    466 
    467     /* make unique-string rows by merging adjacent ones' code point columns */
    468 
    469     /* make p point to row i-1 */
    470     p=(UChar *)unfold+UGENCASE_UNFOLD_WIDTH;
    471 
    472     for(i=1; i<unfoldRows;) {
    473         if(0==u_memcmp(p, p+UGENCASE_UNFOLD_WIDTH, UGENCASE_UNFOLD_STRING_WIDTH)) {
    474             /* concatenate code point columns */
    475             q=p+UGENCASE_UNFOLD_STRING_WIDTH;
    476             for(j=1; j<UGENCASE_UNFOLD_CP_WIDTH && q[j]!=0; ++j) {}
    477             for(k=0; k<UGENCASE_UNFOLD_CP_WIDTH && q[UGENCASE_UNFOLD_WIDTH+k]!=0; ++j, ++k) {
    478                 q[j]=q[UGENCASE_UNFOLD_WIDTH+k];
    479             }
    480             if(j>UGENCASE_UNFOLD_CP_WIDTH) {
    481                 fprintf(stderr, "gencase error: too many code points in unfold[]: %ld>%d=UGENCASE_UNFOLD_CP_WIDTH\n",
    482                         (long)j, UGENCASE_UNFOLD_CP_WIDTH);
    483                 exit(U_BUFFER_OVERFLOW_ERROR);
    484             }
    485 
    486             /* move following rows up one */
    487             --unfoldRows;
    488             unfoldTop-=UGENCASE_UNFOLD_WIDTH;
    489             u_memmove(p+UGENCASE_UNFOLD_WIDTH, p+UGENCASE_UNFOLD_WIDTH*2, (unfoldRows-i)*UGENCASE_UNFOLD_WIDTH);
    490         } else {
    491             p+=UGENCASE_UNFOLD_WIDTH;
    492             ++i;
    493         }
    494     }
    495 
    496     unfold[UCASE_UNFOLD_ROWS]=(UChar)unfoldRows;
    497 
    498     if(beVerbose) {
    499         puts("unfold data:");
    500 
    501         p=(UChar *)unfold;
    502         for(i=0; i<unfoldRows; ++i) {
    503             p+=UGENCASE_UNFOLD_WIDTH;
    504             printf("[%2d] %04x %04x %04x <- %04x %04x\n",
    505                    (int)i, p[0], p[1], p[2], p[3], p[4]);
    506         }
    507     }
    508 }
    509 
    510 /* case closure ------------------------------------------------------------- */
    511 
    512 static void
    513 addClosureMapping(UChar32 src, UChar32 dest) {
    514     uint32_t value;
    515 
    516     if(beVerbose) {
    517         printf("add closure mapping U+%04lx->U+%04lx\n",
    518                 (unsigned long)src, (unsigned long)dest);
    519     }
    520 
    521     value=upvec_getValue(pv, src, 0);
    522     if(value&UCASE_EXCEPTION) {
    523         Props *p=excProps+(value>>UGENCASE_EXC_SHIFT);
    524         int32_t i;
    525 
    526         /* append dest to src's closure array */
    527         for(i=0;; ++i) {
    528             if(i==LENGTHOF(p->closure)) {
    529                 fprintf(stderr, "closure[] overflow for U+%04lx->U+%04lx\n",
    530                                 (unsigned long)src, (unsigned long)dest);
    531                 exit(U_BUFFER_OVERFLOW_ERROR);
    532             } else if(p->closure[i]==dest) {
    533                 break; /* do not store duplicates */
    534             } else if(p->closure[i]==0) {
    535                 p->closure[i]=dest;
    536                 break;
    537             }
    538         }
    539     } else {
    540         Props p2={ 0 };
    541         UChar32 next;
    542         UErrorCode errorCode;
    543 
    544         /*
    545          * decode value into p2 (enough for makeException() to work properly),
    546          * add the closure mapping,
    547          * and set the new exception for src
    548          */
    549         p2.code=src;
    550         p2.closure[0]=dest;
    551 
    552         if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
    553             /* one simple case mapping, don't care which one */
    554             next=src+((int16_t)value>>UCASE_DELTA_SHIFT);
    555             if(next!=src) {
    556                 if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
    557                     p2.upperCase=p2.titleCase=next;
    558                 } else {
    559                     p2.lowerCase=next;
    560                 }
    561             }
    562         } else if(value&UCASE_DELTA_MASK) {
    563             fprintf(stderr, "gencase error: unable to add case closure exception to case-ignorable U+%04lx\n",
    564                             (unsigned long)src);
    565             exit(U_INTERNAL_PROGRAM_ERROR);
    566         }
    567 
    568         value&=~(UGENCASE_EXC_MASK|UCASE_DELTA_MASK); /* remove previous simple mapping */
    569         value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
    570         value|=UCASE_EXCEPTION;
    571         uprv_memcpy(excProps+exceptionsCount, &p2, sizeof(p2));
    572         if(++exceptionsCount==MAX_EXC_COUNT) {
    573             fprintf(stderr, "gencase: too many exceptions\n");
    574             exit(U_INDEX_OUTOFBOUNDS_ERROR);
    575         }
    576 
    577         errorCode=U_ZERO_ERROR;
    578         upvec_setValue(pv, src, src, 0, value, 0xffffffff, &errorCode);
    579         if(U_FAILURE(errorCode)) {
    580             fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
    581                             u_errorName(errorCode));
    582             exit(errorCode);
    583         }
    584     }
    585 }
    586 
    587 /*
    588  * Find missing case mapping relationships and add mappings for case closure.
    589  * This function starts from an "original" code point and recursively
    590  * finds its case mappings and the case mappings of where it maps to.
    591  *
    592  * The recursion depth is capped at 3 nested calls of this function.
    593  * In each call, the current code point is c, and the function enumerates
    594  * all of c's simple (single-code point) case mappings.
    595  * prev is the code point that case-mapped to c.
    596  * prev2 is the code point that case-mapped to prev.
    597  *
    598  * The initial function call has prev2<0, prev<0, and c==orig
    599  * (marking no code points).
    600  * It enumerates c's case mappings and recurses without further action.
    601  *
    602  * The second-level function call has prev2<0, prev==orig, and c is
    603  * the destination code point of one of prev's case mappings.
    604  * The function checks if any of c's case mappings go back to orig
    605  * and adds a closure mapping if not.
    606  * In other words, it turns a case mapping relationship of
    607  *   orig->c
    608  * into
    609  *   orig<->c
    610  *
    611  * The third-level function call has prev2==orig, prev>=0, and c is
    612  * the destination code point of one of prev's case mappings.
    613  * (And prev is the destination of one of prev2's case mappings.)
    614  * The function checks if any of c's case mappings go back to orig
    615  * and adds a closure mapping if not.
    616  * In other words, it turns case mapping relationships of
    617  *   orig->prev->c or orig->prev<->c
    618  * into
    619  *   orig->prev->c->orig or orig->prev<->c->orig
    620  * etc.
    621  * (Graphically, this closes a triangle.)
    622  *
    623  * With repeated application on all code points until no more closure mappings
    624  * are added, all case equivalence groups get complete mappings.
    625  * That is, in each group of code points with case relationships
    626  * each code point will in the end have some mapping to each other
    627  * code point in the group.
    628  *
    629  * @return TRUE if a closure mapping was added
    630  */
    631 static UBool
    632 addClosure(UChar32 orig, UChar32 prev2, UChar32 prev, UChar32 c, uint32_t value) {
    633     UChar32 next;
    634     UBool someMappingsAdded=FALSE;
    635 
    636     if(c!=orig) {
    637         /* get the properties for c */
    638         value=upvec_getValue(pv, c, 0);
    639     }
    640     /* else if c==orig then c's value was passed in */
    641 
    642     if(value&UCASE_EXCEPTION) {
    643         UChar32 set[32];
    644         int32_t i, count=0;
    645 
    646         Props *p=excProps+(value>>UGENCASE_EXC_SHIFT);
    647 
    648         /*
    649          * marker for whether any of c's mappings goes to orig
    650          * c==orig: prevent adding a closure mapping when getting orig's own, direct mappings
    651          */
    652         UBool mapsToOrig=(UBool)(c==orig);
    653 
    654         /* collect c's case mapping destinations in set[] */
    655         if((next=p->upperCase)!=0 && next!=c) {
    656             set[count++]=next;
    657         }
    658         if((next=p->lowerCase)!=0 && next!=c) {
    659             set[count++]=next;
    660         }
    661         if(p->upperCase!=(next=p->titleCase) && next!=c) {
    662             set[count++]=next;
    663         }
    664         if(p->caseFolding!=NULL && (next=p->caseFolding->simple)!=0 && next!=c) {
    665             set[count++]=next;
    666         }
    667 
    668         /* append c's current closure mappings to set[] */
    669         for(i=0; i<LENGTHOF(p->closure) && (next=p->closure[i])!=0; ++i) {
    670             set[count++]=next;
    671         }
    672 
    673         /* process all code points to which c case-maps */
    674         for(i=0; i<count; ++i) {
    675             next=set[i]; /* next!=c */
    676 
    677             if(next==orig) {
    678                 mapsToOrig=TRUE; /* remember that we map to orig */
    679             } else if(prev2<0 && next!=prev) {
    680                 /*
    681                  * recurse unless
    682                  * we have reached maximum depth (prev2>=0) or
    683                  * this is a mapping to one of the previous code points (orig, prev, c)
    684                  */
    685                 someMappingsAdded|=addClosure(orig, prev, c, next, 0);
    686             }
    687         }
    688 
    689         if(!mapsToOrig) {
    690             addClosureMapping(c, orig);
    691             return TRUE;
    692         }
    693     } else {
    694         if((value&UCASE_TYPE_MASK)>UCASE_NONE) {
    695             /* one simple case mapping, don't care which one */
    696             next=c+((int16_t)value>>UCASE_DELTA_SHIFT);
    697             if(next!=c) {
    698                 /*
    699                  * recurse unless
    700                  * we have reached maximum depth (prev2>=0) or
    701                  * this is a mapping to one of the previous code points (orig, prev, c)
    702                  */
    703                 if(prev2<0 && next!=orig && next!=prev) {
    704                     someMappingsAdded|=addClosure(orig, prev, c, next, 0);
    705                 }
    706 
    707                 if(c!=orig && next!=orig) {
    708                     /* c does not map to orig, add a closure mapping c->orig */
    709                     addClosureMapping(c, orig);
    710                     return TRUE;
    711                 }
    712             }
    713         }
    714     }
    715 
    716     return someMappingsAdded;
    717 }
    718 
    719 extern void
    720 makeCaseClosure() {
    721     UChar *p;
    722     uint32_t *row;
    723     uint32_t value;
    724     UChar32 start, end, c, c2;
    725     int32_t i, j;
    726     UBool someMappingsAdded;
    727 
    728     /*
    729      * finalize the "unfold" data because we need to use it to add closure mappings
    730      * for situations like FB05->"st"<-FB06
    731      * where we would otherwise miss the FB05<->FB06 relationship
    732      */
    733     makeUnfoldData();
    734 
    735     /* use the "unfold" data to add mappings */
    736 
    737     /* p always points to the code points; this loop ignores the strings completely */
    738     p=unfold+UGENCASE_UNFOLD_WIDTH+UGENCASE_UNFOLD_STRING_WIDTH;
    739 
    740     for(i=0; i<unfoldRows; p+=UGENCASE_UNFOLD_WIDTH, ++i) {
    741         j=0;
    742         U16_NEXT_UNSAFE(p, j, c);
    743         while(j<UGENCASE_UNFOLD_CP_WIDTH && p[j]!=0) {
    744             U16_NEXT_UNSAFE(p, j, c2);
    745             addClosure(c, U_SENTINEL, c, c2, 0);
    746         }
    747     }
    748 
    749     if(beVerbose) {
    750         puts("---- ---- ---- ---- (done with closures from unfolding)");
    751     }
    752 
    753     /* add further closure mappings from analyzing simple mappings */
    754     do {
    755         someMappingsAdded=FALSE;
    756 
    757         i=0;
    758         while((row=upvec_getRow(pv, i, &start, &end))!=NULL && start<UPVEC_FIRST_SPECIAL_CP) {
    759             value=*row;
    760             if(value!=0) {
    761                 while(start<=end) {
    762                     if(addClosure(start, U_SENTINEL, U_SENTINEL, start, value)) {
    763                         someMappingsAdded=TRUE;
    764 
    765                         /*
    766                          * stop this loop because pv was changed and row is not valid any more
    767                          * skip all rows below the current start
    768                          */
    769                         while((row=upvec_getRow(pv, i, NULL, &end))!=NULL && start>end) {
    770                             ++i;
    771                         }
    772                         row=NULL; /* signal to continue with outer loop, without further ++i */
    773                         break;
    774                     }
    775                     ++start;
    776                 }
    777                 if(row==NULL) {
    778                     continue; /* see row=NULL above */
    779                 }
    780             }
    781             ++i;
    782         }
    783 
    784         if(beVerbose && someMappingsAdded) {
    785             puts("---- ---- ---- ----");
    786         }
    787     } while(someMappingsAdded);
    788 }
    789 
    790 /* exceptions --------------------------------------------------------------- */
    791 
    792 /* get the string length from zero-terminated code points in a limited-length array */
    793 static int32_t
    794 getLengthOfCodePoints(const UChar32 *s, int32_t maxLength) {
    795     int32_t i, length;
    796 
    797     for(i=length=0; i<maxLength && s[i]!=0; ++i) {
    798         length+=U16_LENGTH(s[i]);
    799     }
    800     return length;
    801 }
    802 
    803 static UBool
    804 fullMappingEqualsSimple(const UChar *s, UChar32 simple, UChar32 c) {
    805     int32_t i, length;
    806     UChar32 full;
    807 
    808     length=*s++;
    809     if(length==0 || length>U16_MAX_LENGTH) {
    810         return FALSE;
    811     }
    812     i=0;
    813     U16_NEXT(s, i, length, full);
    814 
    815     if(simple==0) {
    816         simple=c; /* UCD has no simple mapping if it's the same as the code point itself */
    817     }
    818     return (UBool)(i==length && full==simple);
    819 }
    820 
    821 static uint16_t
    822 makeException(uint32_t value, Props *p) {
    823     uint32_t slots[8];
    824     uint32_t slotBits;
    825     uint16_t excWord, excIndex, excTop, i, count, length, fullLengths;
    826     UBool doubleSlots;
    827 
    828     /* excIndex will be returned for storing in the trie word */
    829     excIndex=exceptionsTop;
    830     if(excIndex>=UCASE_MAX_EXCEPTIONS) {
    831         fprintf(stderr, "gencase error: too many exceptions words\n");
    832         exit(U_BUFFER_OVERFLOW_ERROR);
    833     }
    834 
    835     excTop=excIndex+1; /* +1 for excWord which will be stored at excIndex */
    836 
    837     /* copy and shift the soft-dotted bits */
    838     excWord=((uint16_t)value&UCASE_DOT_MASK)<<UCASE_EXC_DOT_SHIFT;
    839 
    840     /* update maxFullLength */
    841     if(p->specialCasing!=NULL) {
    842         length=p->specialCasing->lowerCase[0];
    843         if(length>maxFullLength) {
    844             maxFullLength=length;
    845         }
    846         length=p->specialCasing->upperCase[0];
    847         if(length>maxFullLength) {
    848             maxFullLength=length;
    849         }
    850         length=p->specialCasing->titleCase[0];
    851         if(length>maxFullLength) {
    852             maxFullLength=length;
    853         }
    854     }
    855     if(p->caseFolding!=NULL) {
    856         length=p->caseFolding->full[0];
    857         if(length>maxFullLength) {
    858             maxFullLength=length;
    859         }
    860     }
    861 
    862     /* set the bits for conditional mappings */
    863     if(p->specialCasing!=NULL && p->specialCasing->isComplex) {
    864         excWord|=UCASE_EXC_CONDITIONAL_SPECIAL;
    865         p->specialCasing=NULL;
    866     }
    867     if(p->caseFolding!=NULL && p->caseFolding->simple==0 && p->caseFolding->full[0]==0) {
    868         excWord|=UCASE_EXC_CONDITIONAL_FOLD;
    869         p->caseFolding=NULL;
    870     }
    871 
    872     /*
    873      * Note:
    874      * UCD stores no simple mappings when they are the same as the code point itself.
    875      * SpecialCasing and CaseFolding do store simple mappings even if they are
    876      * the same as the code point itself.
    877      * Comparisons between simple regular mappings and simple special/folding
    878      * mappings need to compensate for the difference by comparing with the
    879      * original code point if a simple UCD mapping is missing (0).
    880      */
    881 
    882     /* remove redundant data */
    883     if(p->specialCasing!=NULL) {
    884         /* do not store full mappings if they are the same as the simple ones */
    885         if(fullMappingEqualsSimple(p->specialCasing->lowerCase, p->lowerCase, p->code)) {
    886             p->specialCasing->lowerCase[0]=0;
    887         }
    888         if(fullMappingEqualsSimple(p->specialCasing->upperCase, p->upperCase, p->code)) {
    889             p->specialCasing->upperCase[0]=0;
    890         }
    891         if(fullMappingEqualsSimple(p->specialCasing->titleCase, p->titleCase, p->code)) {
    892             p->specialCasing->titleCase[0]=0;
    893         }
    894     }
    895     if( p->caseFolding!=NULL &&
    896         fullMappingEqualsSimple(p->caseFolding->full, p->caseFolding->simple, p->code)
    897     ) {
    898         p->caseFolding->full[0]=0;
    899     }
    900 
    901     /* write the optional slots */
    902     slotBits=0;
    903     count=0;
    904 
    905     if(p->lowerCase!=0) {
    906         slots[count]=(uint32_t)p->lowerCase;
    907         slotBits|=slots[count];
    908         ++count;
    909         excWord|=U_MASK(UCASE_EXC_LOWER);
    910     }
    911     if( p->caseFolding!=NULL &&
    912         p->caseFolding->simple!=0 &&
    913         (p->lowerCase!=0 ?
    914             p->caseFolding->simple!=p->lowerCase :
    915             p->caseFolding->simple!=p->code)
    916     ) {
    917         slots[count]=(uint32_t)p->caseFolding->simple;
    918         slotBits|=slots[count];
    919         ++count;
    920         excWord|=U_MASK(UCASE_EXC_FOLD);
    921     }
    922     if(p->upperCase!=0) {
    923         slots[count]=(uint32_t)p->upperCase;
    924         slotBits|=slots[count];
    925         ++count;
    926         excWord|=U_MASK(UCASE_EXC_UPPER);
    927     }
    928     if(p->upperCase!=p->titleCase) {
    929         if(p->titleCase!=0) {
    930             slots[count]=(uint32_t)p->titleCase;
    931         } else {
    932             slots[count]=(uint32_t)p->code;
    933         }
    934         slotBits|=slots[count];
    935         ++count;
    936         excWord|=U_MASK(UCASE_EXC_TITLE);
    937     }
    938 
    939     /* length of case closure */
    940     if(p->closure[0]!=0) {
    941         length=getLengthOfCodePoints(p->closure, LENGTHOF(p->closure));
    942         slots[count]=(uint32_t)length; /* must be 1..UCASE_CLOSURE_MAX_LENGTH */
    943         slotBits|=slots[count];
    944         ++count;
    945         excWord|=U_MASK(UCASE_EXC_CLOSURE);
    946     }
    947 
    948     /* lengths of full case mapping strings, stored in the last slot */
    949     fullLengths=0;
    950     if(p->specialCasing!=NULL) {
    951         fullLengths=p->specialCasing->lowerCase[0];
    952         fullLengths|=p->specialCasing->upperCase[0]<<8;
    953         fullLengths|=p->specialCasing->titleCase[0]<<12;
    954     }
    955     if(p->caseFolding!=NULL) {
    956         fullLengths|=p->caseFolding->full[0]<<4;
    957     }
    958     if(fullLengths!=0) {
    959         slots[count]=fullLengths;
    960         slotBits|=slots[count];
    961         ++count;
    962         excWord|=U_MASK(UCASE_EXC_FULL_MAPPINGS);
    963     }
    964 
    965     /* write slots */
    966     doubleSlots=(UBool)(slotBits>0xffff);
    967     if(!doubleSlots) {
    968         for(i=0; i<count; ++i) {
    969             exceptions[excTop++]=(uint16_t)slots[i];
    970         }
    971     } else {
    972         excWord|=UCASE_EXC_DOUBLE_SLOTS;
    973         for(i=0; i<count; ++i) {
    974             exceptions[excTop++]=(uint16_t)(slots[i]>>16);
    975             exceptions[excTop++]=(uint16_t)slots[i];
    976         }
    977     }
    978 
    979     /* write the full case mapping strings */
    980     if(p->specialCasing!=NULL) {
    981         length=(uint16_t)p->specialCasing->lowerCase[0];
    982         u_memcpy((UChar *)exceptions+excTop, p->specialCasing->lowerCase+1, length);
    983         excTop+=length;
    984     }
    985     if(p->caseFolding!=NULL) {
    986         length=(uint16_t)p->caseFolding->full[0];
    987         u_memcpy((UChar *)exceptions+excTop, p->caseFolding->full+1, length);
    988         excTop+=length;
    989     }
    990     if(p->specialCasing!=NULL) {
    991         length=(uint16_t)p->specialCasing->upperCase[0];
    992         u_memcpy((UChar *)exceptions+excTop, p->specialCasing->upperCase+1, length);
    993         excTop+=length;
    994 
    995         length=(uint16_t)p->specialCasing->titleCase[0];
    996         u_memcpy((UChar *)exceptions+excTop, p->specialCasing->titleCase+1, length);
    997         excTop+=length;
    998     }
    999 
   1000     /* write the closure data */
   1001     if(p->closure[0]!=0) {
   1002         UChar32 c;
   1003 
   1004         for(i=0; i<LENGTHOF(p->closure) && (c=p->closure[i])!=0; ++i) {
   1005             U16_APPEND_UNSAFE((UChar *)exceptions, excTop, c);
   1006         }
   1007     }
   1008 
   1009     exceptionsTop=excTop;
   1010 
   1011     /* write the main exceptions word */
   1012     exceptions[excIndex]=excWord;
   1013 
   1014     return excIndex;
   1015 }
   1016 
   1017 extern void
   1018 makeExceptions() {
   1019     uint32_t *row;
   1020     uint32_t value;
   1021     int32_t i;
   1022     uint16_t excIndex;
   1023 
   1024     i=0;
   1025     while((row=upvec_getRow(pv, i, NULL, NULL))!=NULL) {
   1026         value=*row;
   1027         if(value&UCASE_EXCEPTION) {
   1028             excIndex=makeException(value, excProps+(value>>UGENCASE_EXC_SHIFT));
   1029             *row=(value&~(UGENCASE_EXC_MASK|UCASE_EXC_MASK))|(excIndex<<UCASE_EXC_SHIFT);
   1030         }
   1031         ++i;
   1032     }
   1033 }
   1034 
   1035 /* generate output data ----------------------------------------------------- */
   1036 
   1037 extern void
   1038 generateData(const char *dataDir, UBool csource) {
   1039     static int32_t indexes[UCASE_IX_TOP]={
   1040         UCASE_IX_TOP
   1041     };
   1042     static uint8_t trieBlock[40000];
   1043 
   1044     const uint32_t *row;
   1045     UChar32 start, end;
   1046     int32_t i;
   1047 
   1048     UNewDataMemory *pData;
   1049     UNewTrie *pTrie;
   1050     UErrorCode errorCode=U_ZERO_ERROR;
   1051     int32_t trieSize;
   1052     long dataLength;
   1053 
   1054     pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
   1055     if(pTrie==NULL) {
   1056         fprintf(stderr, "gencase error: unable to create a UNewTrie\n");
   1057         exit(U_MEMORY_ALLOCATION_ERROR);
   1058     }
   1059 
   1060     for(i=0; (row=upvec_getRow(pv, i, &start, &end))!=NULL; ++i) {
   1061         if(start<UPVEC_FIRST_SPECIAL_CP && !utrie_setRange32(pTrie, start, end+1, *row, TRUE)) {
   1062             fprintf(stderr, "gencase error: unable to set trie value (overflow)\n");
   1063             exit(U_BUFFER_OVERFLOW_ERROR);
   1064         }
   1065     }
   1066 
   1067     trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
   1068     if(U_FAILURE(errorCode)) {
   1069         fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
   1070         exit(errorCode);
   1071     }
   1072 
   1073     indexes[UCASE_IX_EXC_LENGTH]=exceptionsTop;
   1074     indexes[UCASE_IX_TRIE_SIZE]=trieSize;
   1075     indexes[UCASE_IX_UNFOLD_LENGTH]=unfoldTop;
   1076     indexes[UCASE_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+2*exceptionsTop+2*unfoldTop;
   1077 
   1078     indexes[UCASE_IX_MAX_FULL_LENGTH]=maxFullLength;
   1079 
   1080     if(beVerbose) {
   1081         printf("trie size in bytes:                    %5d\n", (int)trieSize);
   1082         printf("number of code points with exceptions: %5d\n", exceptionsCount);
   1083         printf("size in bytes of exceptions:           %5d\n", 2*exceptionsTop);
   1084         printf("size in bytes of reverse foldings:     %5d\n", 2*unfoldTop);
   1085         printf("data size:                             %5d\n", (int)indexes[UCASE_IX_LENGTH]);
   1086     }
   1087 
   1088     if(csource) {
   1089         /* write .c file for hardcoded data */
   1090         UTrie trie={ NULL };
   1091         UTrie2 *trie2;
   1092         FILE *f;
   1093 
   1094         utrie_unserialize(&trie, trieBlock, trieSize, &errorCode);
   1095         if(U_FAILURE(errorCode)) {
   1096             fprintf(
   1097                 stderr,
   1098                 "gencase error: failed to utrie_unserialize(ucase.icu trie) - %s\n",
   1099                 u_errorName(errorCode));
   1100             exit(errorCode);
   1101         }
   1102 
   1103         /* use UTrie2 */
   1104         dataInfo.formatVersion[0]=2;
   1105         dataInfo.formatVersion[2]=0;
   1106         dataInfo.formatVersion[3]=0;
   1107         trie2=utrie2_fromUTrie(&trie, 0, &errorCode);
   1108         if(U_FAILURE(errorCode)) {
   1109             fprintf(
   1110                 stderr,
   1111                 "gencase error: utrie2_fromUTrie() failed - %s\n",
   1112                 u_errorName(errorCode));
   1113             exit(errorCode);
   1114         }
   1115         {
   1116             /* delete lead surrogate code unit values */
   1117             UChar lead;
   1118             trie2=utrie2_cloneAsThawed(trie2, &errorCode);
   1119             for(lead=0xd800; lead<0xdc00; ++lead) {
   1120                 utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode);
   1121             }
   1122             utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode);
   1123             if(U_FAILURE(errorCode)) {
   1124                 fprintf(
   1125                     stderr,
   1126                     "gencase error: deleting lead surrogate code unit values failed - %s\n",
   1127                     u_errorName(errorCode));
   1128                 exit(errorCode);
   1129             }
   1130         }
   1131 
   1132         f=usrc_create(dataDir, "ucase_props_data.c");
   1133         if(f!=NULL) {
   1134             usrc_writeArray(f,
   1135                 "static const UVersionInfo ucase_props_dataVersion={",
   1136                 dataInfo.dataVersion, 8, 4,
   1137                 "};\n\n");
   1138             usrc_writeArray(f,
   1139                 "static const int32_t ucase_props_indexes[UCASE_IX_TOP]={",
   1140                 indexes, 32, UCASE_IX_TOP,
   1141                 "};\n\n");
   1142             usrc_writeUTrie2Arrays(f,
   1143                 "static const uint16_t ucase_props_trieIndex[%ld]={\n", NULL,
   1144                 trie2,
   1145                 "\n};\n\n");
   1146             usrc_writeArray(f,
   1147                 "static const uint16_t ucase_props_exceptions[%ld]={\n",
   1148                 exceptions, 16, exceptionsTop,
   1149                 "\n};\n\n");
   1150             usrc_writeArray(f,
   1151                 "static const uint16_t ucase_props_unfold[%ld]={\n",
   1152                 unfold, 16, unfoldTop,
   1153                 "\n};\n\n");
   1154             fputs(
   1155                 "static const UCaseProps ucase_props_singleton={\n"
   1156                 "  NULL,\n"
   1157                 "  ucase_props_indexes,\n"
   1158                 "  ucase_props_exceptions,\n"
   1159                 "  ucase_props_unfold,\n",
   1160                 f);
   1161             usrc_writeUTrie2Struct(f,
   1162                 "  {\n",
   1163                 trie2, "ucase_props_trieIndex", NULL,
   1164                 "  },\n");
   1165             usrc_writeArray(f, "  { ", dataInfo.formatVersion, 8, 4, " }\n");
   1166             fputs("};\n", f);
   1167             fclose(f);
   1168         }
   1169         utrie2_close(trie2);
   1170     } else {
   1171         /* write the data */
   1172         pData=udata_create(dataDir, UCASE_DATA_TYPE, UCASE_DATA_NAME, &dataInfo,
   1173                         haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
   1174         if(U_FAILURE(errorCode)) {
   1175             fprintf(stderr, "gencase: unable to create data memory, %s\n", u_errorName(errorCode));
   1176             exit(errorCode);
   1177         }
   1178 
   1179         udata_writeBlock(pData, indexes, sizeof(indexes));
   1180         udata_writeBlock(pData, trieBlock, trieSize);
   1181         udata_writeBlock(pData, exceptions, 2*exceptionsTop);
   1182         udata_writeBlock(pData, unfold, 2*unfoldTop);
   1183 
   1184         /* finish up */
   1185         dataLength=udata_finish(pData, &errorCode);
   1186         if(U_FAILURE(errorCode)) {
   1187             fprintf(stderr, "gencase: error %d writing the output file\n", errorCode);
   1188             exit(errorCode);
   1189         }
   1190 
   1191         if(dataLength!=indexes[UCASE_IX_LENGTH]) {
   1192             fprintf(stderr, "gencase: data length %ld != calculated size %d\n",
   1193                 dataLength, (int)indexes[UCASE_IX_LENGTH]);
   1194             exit(U_INTERNAL_PROGRAM_ERROR);
   1195         }
   1196     }
   1197 
   1198     utrie_close(pTrie);
   1199 }
   1200 
   1201 /*
   1202  * Hey, Emacs, please set the following:
   1203  *
   1204  * Local Variables:
   1205  * indent-tabs-mode: nil
   1206  * End:
   1207  *
   1208  */
   1209