Home | History | Annotate | Download | only in gennorm2
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  n2builder.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009nov25
     14 *   created by: Markus W. Scherer
     15 *
     16 * Builds Normalizer2 data and writes a binary .nrm file.
     17 * For the file format see source/common/normalizer2impl.h.
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "n2builder.h"
     22 
     23 #include <stdio.h>
     24 #include <stdlib.h>
     25 #include <string.h>
     26 #if U_HAVE_STD_STRING
     27 #include <vector>
     28 #endif
     29 #include "unicode/errorcode.h"
     30 #include "unicode/localpointer.h"
     31 #include "unicode/putil.h"
     32 #include "unicode/udata.h"
     33 #include "unicode/uniset.h"
     34 #include "unicode/unistr.h"
     35 #include "unicode/ustring.h"
     36 #include "hash.h"
     37 #include "normalizer2impl.h"
     38 #include "toolutil.h"
     39 #include "unewdata.h"
     40 #include "utrie2.h"
     41 #include "uvectr32.h"
     42 
     43 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     44 
     45 #if !UCONFIG_NO_NORMALIZATION
     46 
     47 /* UDataInfo cf. udata.h */
     48 static UDataInfo dataInfo={
     49     sizeof(UDataInfo),
     50     0,
     51 
     52     U_IS_BIG_ENDIAN,
     53     U_CHARSET_FAMILY,
     54     U_SIZEOF_UCHAR,
     55     0,
     56 
     57     { 0x4e, 0x72, 0x6d, 0x32 }, /* dataFormat="Nrm2" */
     58     { 1, 0, 0, 0 },             /* formatVersion */
     59     { 5, 2, 0, 0 }              /* dataVersion (Unicode version) */
     60 };
     61 
     62 U_NAMESPACE_BEGIN
     63 
     64 class HangulIterator {
     65 public:
     66     struct Range {
     67         UChar32 start, limit;
     68         uint16_t norm16;
     69     };
     70 
     71     HangulIterator() : rangeIndex(0) {}
     72     const Range *nextRange() {
     73         if(rangeIndex<LENGTHOF(ranges)) {
     74             return ranges+rangeIndex++;
     75         } else {
     76             return NULL;
     77         }
     78     }
     79     void reset() { rangeIndex=0; }
     80 private:
     81     static const Range ranges[4];
     82     int32_t rangeIndex;
     83 };
     84 
     85 const HangulIterator::Range HangulIterator::ranges[4]={
     86     { Hangul::JAMO_L_BASE, Hangul::JAMO_L_BASE+Hangul::JAMO_L_COUNT, 1 },
     87     { Hangul::JAMO_V_BASE, Hangul::JAMO_V_BASE+Hangul::JAMO_V_COUNT, Normalizer2Impl::JAMO_VT },
     88     // JAMO_T_BASE+1: not U+11A7
     89     { Hangul::JAMO_T_BASE+1, Hangul::JAMO_T_BASE+Hangul::JAMO_T_COUNT, Normalizer2Impl::JAMO_VT },
     90     { Hangul::HANGUL_BASE, Hangul::HANGUL_BASE+Hangul::HANGUL_COUNT, 0 },  // will become minYesNo
     91 };
     92 
     93 struct CompositionPair {
     94     CompositionPair(UChar32 t, UChar32 c) : trail(t), composite(c) {}
     95     UChar32 trail, composite;
     96 };
     97 
     98 struct Norm {
     99     enum MappingType { NONE, REMOVED, ROUND_TRIP, ONE_WAY };
    100 
    101     UBool hasMapping() const { return mappingType>REMOVED; }
    102 
    103     // Requires hasMapping() and well-formed mapping.
    104     void setMappingCP() {
    105         UChar32 c;
    106         if(!mapping->isEmpty() && mapping->length()==U16_LENGTH(c=mapping->char32At(0))) {
    107             mappingCP=c;
    108         } else {
    109             mappingCP=U_SENTINEL;
    110         }
    111     }
    112 
    113     const CompositionPair *getCompositionPairs(int32_t &length) const {
    114         if(compositions==NULL) {
    115             length=0;
    116             return NULL;
    117         } else {
    118             length=compositions->size()/2;
    119             return reinterpret_cast<const CompositionPair *>(compositions->getBuffer());
    120         }
    121     }
    122 
    123     UnicodeString *mapping;
    124     UChar32 mappingCP;  // >=0 if mapping to 1 code point
    125     int32_t mappingPhase;
    126     MappingType mappingType;
    127 
    128     UVector32 *compositions;  // (trail, composite) pairs
    129     uint8_t cc;
    130     UBool combinesBack;
    131     UBool hasNoCompBoundaryAfter;
    132 
    133     enum OffsetType {
    134         OFFSET_NONE, OFFSET_MAYBE_YES,
    135         OFFSET_YES_YES, OFFSET_YES_NO, OFFSET_NO_NO,
    136         OFFSET_DELTA
    137     };
    138     enum { OFFSET_SHIFT=4, OFFSET_MASK=(1<<OFFSET_SHIFT)-1 };
    139     int32_t offset;
    140 };
    141 
    142 class Normalizer2DBEnumerator {
    143 public:
    144     Normalizer2DBEnumerator(Normalizer2DataBuilder &b) : builder(b) {}
    145     virtual ~Normalizer2DBEnumerator() {}
    146     virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) = 0;
    147     Normalizer2DBEnumerator *ptr() { return this; }
    148 protected:
    149     Normalizer2DataBuilder &builder;
    150 };
    151 
    152 U_CDECL_BEGIN
    153 
    154 static UBool U_CALLCONV
    155 enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    156     return ((Normalizer2DBEnumerator *)context)->rangeHandler(start, end, value);
    157 }
    158 
    159 U_CDECL_END
    160 
    161 Normalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) :
    162         phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL) {
    163     memset(unicodeVersion, 0, sizeof(unicodeVersion));
    164     normTrie=utrie2_open(0, 0, &errorCode);
    165     normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm));
    166     norms=allocNorm();  // unused Norm struct at index 0
    167     memset(indexes, 0, sizeof(indexes));
    168 }
    169 
    170 Normalizer2DataBuilder::~Normalizer2DataBuilder() {
    171     utrie2_close(normTrie);
    172     int32_t normsLength=utm_countItems(normMem);
    173     for(int32_t i=1; i<normsLength; ++i) {
    174         delete norms[i].mapping;
    175         delete norms[i].compositions;
    176     }
    177     utm_close(normMem);
    178     utrie2_close(norm16Trie);
    179 }
    180 
    181 void
    182 Normalizer2DataBuilder::setUnicodeVersion(const char *v) {
    183     u_versionFromString(unicodeVersion, v);
    184 }
    185 
    186 Norm *Normalizer2DataBuilder::allocNorm() {
    187     Norm *p=(Norm *)utm_alloc(normMem);
    188     norms=(Norm *)utm_getStart(normMem);  // in case it got reallocated
    189     return p;
    190 }
    191 
    192 /* get an existing Norm unit */
    193 Norm *Normalizer2DataBuilder::getNorm(UChar32 c) {
    194     uint32_t i=utrie2_get32(normTrie, c);
    195     if(i==0) {
    196         return NULL;
    197     }
    198     return norms+i;
    199 }
    200 
    201 const Norm &Normalizer2DataBuilder::getNormRef(UChar32 c) const {
    202     return norms[utrie2_get32(normTrie, c)];
    203 }
    204 
    205 /*
    206  * get or create a Norm unit;
    207  * get or create the intermediate trie entries for it as well
    208  */
    209 Norm *Normalizer2DataBuilder::createNorm(UChar32 c) {
    210     uint32_t i=utrie2_get32(normTrie, c);
    211     if(i!=0) {
    212         return norms+i;
    213     } else {
    214         /* allocate Norm */
    215         Norm *p=allocNorm();
    216         IcuToolErrorCode errorCode("gennorm2/createNorm()");
    217         utrie2_set32(normTrie, c, (uint32_t)(p-norms), errorCode);
    218         return p;
    219     }
    220 }
    221 
    222 Norm *Normalizer2DataBuilder::checkNormForMapping(Norm *p, UChar32 c) {
    223     if(p!=NULL) {
    224         if(p->mappingType!=Norm::NONE) {
    225             if( overrideHandling==OVERRIDE_NONE ||
    226                 (overrideHandling==OVERRIDE_PREVIOUS && p->mappingPhase==phase)
    227             ) {
    228                 fprintf(stderr,
    229                         "error in gennorm2 phase %d: "
    230                         "not permitted to override mapping for U+%04lX from phase %d\n",
    231                         (int)phase, (long)c, (int)p->mappingPhase);
    232                 exit(U_INVALID_FORMAT_ERROR);
    233             }
    234             delete p->mapping;
    235             p->mapping=NULL;
    236         }
    237         p->mappingPhase=phase;
    238     }
    239     return p;
    240 }
    241 
    242 void Normalizer2DataBuilder::setOverrideHandling(OverrideHandling oh) {
    243     overrideHandling=oh;
    244     ++phase;
    245 }
    246 
    247 void Normalizer2DataBuilder::setCC(UChar32 c, uint8_t cc) {
    248     createNorm(c)->cc=cc;
    249 }
    250 
    251 uint8_t Normalizer2DataBuilder::getCC(UChar32 c) const {
    252     return getNormRef(c).cc;
    253 }
    254 
    255 static UBool isWellFormed(const UnicodeString &s) {
    256     UErrorCode errorCode=U_ZERO_ERROR;
    257     u_strToUTF8(NULL, 0, NULL, s.getBuffer(), s.length(), &errorCode);
    258     return U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR;
    259 }
    260 
    261 void Normalizer2DataBuilder::setOneWayMapping(UChar32 c, const UnicodeString &m) {
    262     if(!isWellFormed(m)) {
    263         fprintf(stderr,
    264                 "error in gennorm2 phase %d: "
    265                 "illegal one-way mapping from U+%04lX to malformed string\n",
    266                 (int)phase, (long)c);
    267         exit(U_INVALID_FORMAT_ERROR);
    268     }
    269     Norm *p=checkNormForMapping(createNorm(c), c);
    270     p->mapping=new UnicodeString(m);
    271     p->mappingType=Norm::ONE_WAY;
    272     p->setMappingCP();
    273 }
    274 
    275 void Normalizer2DataBuilder::setRoundTripMapping(UChar32 c, const UnicodeString &m) {
    276     if(U_IS_SURROGATE(c)) {
    277         fprintf(stderr,
    278                 "error in gennorm2 phase %d: "
    279                 "illegal round-trip mapping from surrogate code point U+%04lX\n",
    280                 (int)phase, (long)c);
    281         exit(U_INVALID_FORMAT_ERROR);
    282     }
    283     if(!isWellFormed(m)) {
    284         fprintf(stderr,
    285                 "error in gennorm2 phase %d: "
    286                 "illegal round-trip mapping from U+%04lX to malformed string\n",
    287                 (int)phase, (long)c);
    288         exit(U_INVALID_FORMAT_ERROR);
    289     }
    290     int32_t numCP=u_countChar32(m.getBuffer(), m.length());
    291     if(numCP!=2) {
    292         fprintf(stderr,
    293                 "error in gennorm2 phase %d: "
    294                 "illegal round-trip mapping from U+%04lX to %d!=2 code points\n",
    295                 (int)phase, (long)c, (int)numCP);
    296         exit(U_INVALID_FORMAT_ERROR);
    297     }
    298     Norm *p=checkNormForMapping(createNorm(c), c);
    299     p->mapping=new UnicodeString(m);
    300     p->mappingType=Norm::ROUND_TRIP;
    301     p->mappingCP=U_SENTINEL;
    302 }
    303 
    304 void Normalizer2DataBuilder::removeMapping(UChar32 c) {
    305     Norm *p=checkNormForMapping(getNorm(c), c);
    306     if(p!=NULL) {
    307         p->mappingType=Norm::REMOVED;
    308     }
    309 }
    310 
    311 class CompositionBuilder : public Normalizer2DBEnumerator {
    312 public:
    313     CompositionBuilder(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b) {}
    314     virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
    315         builder.addComposition(start, end, value);
    316         return TRUE;
    317     }
    318 };
    319 
    320 void
    321 Normalizer2DataBuilder::addComposition(UChar32 start, UChar32 end, uint32_t value) {
    322     if(norms[value].mappingType==Norm::ROUND_TRIP) {
    323         if(start!=end) {
    324             fprintf(stderr,
    325                     "gennorm2 error: same round-trip mapping for "
    326                     "more than 1 code point U+%04lX..U+%04lX\n",
    327                     (long)start, (long)end);
    328             exit(U_INVALID_FORMAT_ERROR);
    329         }
    330         if(norms[value].cc!=0) {
    331             fprintf(stderr,
    332                     "gennorm2 error: "
    333                     "U+%04lX has a round-trip mapping and ccc!=0, "
    334                     "not possible in Unicode normalization\n",
    335                     (long)start);
    336             exit(U_INVALID_FORMAT_ERROR);
    337         }
    338         // setRoundTripMapping() ensured that there are exactly two code points.
    339         const UnicodeString &m=*norms[value].mapping;
    340         UChar32 lead=m.char32At(0);
    341         UChar32 trail=m.char32At(m.length()-1);
    342         if(getCC(lead)!=0) {
    343             fprintf(stderr,
    344                     "gennorm2 error: "
    345                     "U+%04lX's round-trip mapping's starter U+%04lX has ccc!=0, "
    346                     "not possible in Unicode normalization\n",
    347                     (long)start, (long)lead);
    348             exit(U_INVALID_FORMAT_ERROR);
    349         }
    350         // Flag for trailing character.
    351         createNorm(trail)->combinesBack=TRUE;
    352         // Insert (trail, composite) pair into compositions list for the lead character.
    353         IcuToolErrorCode errorCode("gennorm2/addComposition()");
    354         Norm *leadNorm=createNorm(lead);
    355         UVector32 *compositions=leadNorm->compositions;
    356         int32_t i;
    357         if(compositions==NULL) {
    358             compositions=leadNorm->compositions=new UVector32(errorCode);
    359             i=0;  // "insert" the first pair at index 0
    360         } else {
    361             // Insertion sort, and check for duplicate trail characters.
    362             int32_t length;
    363             const CompositionPair *pairs=leadNorm->getCompositionPairs(length);
    364             for(i=0; i<length; ++i) {
    365                 if(trail==pairs[i].trail) {
    366                     fprintf(stderr,
    367                             "gennorm2 error: same round-trip mapping for "
    368                             "more than 1 code point (e.g., U+%04lX) to U+%04lX + U+%04lX\n",
    369                             (long)start, (long)lead, (long)trail);
    370                     exit(U_INVALID_FORMAT_ERROR);
    371                 }
    372                 if(trail<pairs[i].trail) {
    373                     break;
    374                 }
    375             }
    376         }
    377         compositions->insertElementAt(trail, 2*i, errorCode);
    378         compositions->insertElementAt(start, 2*i+1, errorCode);
    379     }
    380 }
    381 
    382 UBool Normalizer2DataBuilder::combinesWithCCBetween(const Norm &norm,
    383                                                     uint8_t lowCC, uint8_t highCC) const {
    384     if((highCC-lowCC)>=2) {
    385         int32_t length;
    386         const CompositionPair *pairs=norm.getCompositionPairs(length);
    387         for(int32_t i=0; i<length; ++i) {
    388             uint8_t trailCC=getCC(pairs[i].trail);
    389             if(lowCC<trailCC && trailCC<highCC) {
    390                 return TRUE;
    391             }
    392         }
    393     }
    394     return FALSE;
    395 }
    396 
    397 UChar32 Normalizer2DataBuilder::combine(const Norm &norm, UChar32 trail) const {
    398     int32_t length;
    399     const CompositionPair *pairs=norm.getCompositionPairs(length);
    400     for(int32_t i=0; i<length; ++i) {
    401         if(trail==pairs[i].trail) {
    402             return pairs[i].composite;
    403         }
    404         if(trail<pairs[i].trail) {
    405             break;
    406         }
    407     }
    408     return U_SENTINEL;
    409 }
    410 
    411 class Decomposer : public Normalizer2DBEnumerator {
    412 public:
    413     Decomposer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b), didDecompose(FALSE) {}
    414     virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
    415         didDecompose|=builder.decompose(start, end, value);
    416         return TRUE;
    417     }
    418     UBool didDecompose;
    419 };
    420 
    421 UBool
    422 Normalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) {
    423     if(norms[value].hasMapping()) {
    424         const UnicodeString &m=*norms[value].mapping;
    425         UnicodeString *decomposed=NULL;
    426         const UChar *s=m.getBuffer();
    427         int32_t length=m.length();
    428         int32_t prev, i=0;
    429         UChar32 c;
    430         while(i<length) {
    431             prev=i;
    432             U16_NEXT(s, i, length, c);
    433             if(start<=c && c<=end) {
    434                 fprintf(stderr,
    435                         "gennorm2 error: U+%04lX maps to itself directly or indirectly\n",
    436                         (long)c);
    437                 exit(U_INVALID_FORMAT_ERROR);
    438             }
    439             const Norm &cNorm=getNormRef(c);
    440             if(cNorm.hasMapping()) {
    441                 if(norms[value].mappingType==Norm::ROUND_TRIP) {
    442                     if(prev==0) {
    443                         if(cNorm.mappingType!=Norm::ROUND_TRIP) {
    444                             fprintf(stderr,
    445                                     "gennorm2 error: "
    446                                     "U+%04lX's round-trip mapping's starter "
    447                                     "U+%04lX one-way-decomposes, "
    448                                     "not possible in Unicode normalization\n",
    449                                     (long)start, (long)c);
    450                             exit(U_INVALID_FORMAT_ERROR);
    451                         }
    452                         uint8_t myTrailCC=getCC(m.char32At(i));
    453                         UChar32 cTrailChar=cNorm.mapping->char32At(cNorm.mapping->length()-1);
    454                         uint8_t cTrailCC=getCC(cTrailChar);
    455                         if(cTrailCC>myTrailCC) {
    456                             fprintf(stderr,
    457                                     "gennorm2 error: "
    458                                     "U+%04lX's round-trip mapping's starter "
    459                                     "U+%04lX decomposes and the "
    460                                     "inner/earlier tccc=%hu > outer/following tccc=%hu, "
    461                                     "not possible in Unicode normalization\n",
    462                                     (long)start, (long)c,
    463                                     (short)cTrailCC, (short)myTrailCC);
    464                             exit(U_INVALID_FORMAT_ERROR);
    465                         }
    466                     } else {
    467                         fprintf(stderr,
    468                                 "gennorm2 error: "
    469                                 "U+%04lX's round-trip mapping's non-starter "
    470                                 "U+%04lX decomposes, "
    471                                 "not possible in Unicode normalization\n",
    472                                 (long)start, (long)c);
    473                         exit(U_INVALID_FORMAT_ERROR);
    474                     }
    475                 }
    476                 if(decomposed==NULL) {
    477                     decomposed=new UnicodeString(m, 0, prev);
    478                 }
    479                 decomposed->append(*cNorm.mapping);
    480             } else if(Hangul::isHangul(c)) {
    481                 UChar buffer[3];
    482                 int32_t hangulLength=Hangul::decompose(c, buffer);
    483                 if(norms[value].mappingType==Norm::ROUND_TRIP && prev!=0) {
    484                     fprintf(stderr,
    485                             "gennorm2 error: "
    486                             "U+%04lX's round-trip mapping's non-starter "
    487                             "U+%04lX decomposes, "
    488                             "not possible in Unicode normalization\n",
    489                             (long)start, (long)c);
    490                     exit(U_INVALID_FORMAT_ERROR);
    491                 }
    492                 if(decomposed==NULL) {
    493                     decomposed=new UnicodeString(m, 0, prev);
    494                 }
    495                 decomposed->append(buffer, hangulLength);
    496             } else if(decomposed!=NULL) {
    497                 decomposed->append(m, prev, i-prev);
    498             }
    499         }
    500         if(decomposed!=NULL) {
    501             delete norms[value].mapping;
    502             norms[value].mapping=decomposed;
    503             // Not  norms[value].setMappingCP();  because the original mapping
    504             // is most likely to be encodable as a delta.
    505             return TRUE;
    506         }
    507     }
    508     return FALSE;
    509 }
    510 
    511 class BuilderReorderingBuffer {
    512 public:
    513     BuilderReorderingBuffer() : fLength(0), fLastStarterIndex(-1), fDidReorder(FALSE) {}
    514     void reset() {
    515         fLength=0;
    516         fLastStarterIndex=-1;
    517         fDidReorder=FALSE;
    518     }
    519     int32_t length() const { return fLength; }
    520     UBool isEmpty() const { return fLength==0; }
    521     int32_t lastStarterIndex() const { return fLastStarterIndex; }
    522     UChar32 charAt(int32_t i) const { return fArray[i]>>8; }
    523     uint8_t ccAt(int32_t i) const { return (uint8_t)fArray[i]; }
    524     UBool didReorder() const { return fDidReorder; }
    525     void append(UChar32 c, uint8_t cc) {
    526         if(cc==0 || fLength==0 || ccAt(fLength-1)<=cc) {
    527             if(cc==0) {
    528                 fLastStarterIndex=fLength;
    529             }
    530             fArray[fLength++]=(c<<8)|cc;
    531             return;
    532         }
    533         // Let this character bubble back to its canonical order.
    534         int32_t i=fLength-1;
    535         while(i>fLastStarterIndex && ccAt(i)>cc) {
    536             --i;
    537         }
    538         ++i;  // after the last starter or prevCC<=cc
    539         // Move this and the following characters forward one to make space.
    540         for(int32_t j=fLength; i<j; --j) {
    541             fArray[j]=fArray[j-1];
    542         }
    543         fArray[i]=(c<<8)|cc;
    544         ++fLength;
    545         fDidReorder=TRUE;
    546     }
    547     void toString(UnicodeString &dest) {
    548         dest.remove();
    549         for(int32_t i=0; i<fLength; ++i) {
    550             dest.append(charAt(i));
    551         }
    552     }
    553     void setComposite(UChar32 composite, int32_t combMarkIndex) {
    554         fArray[fLastStarterIndex]=composite<<8;
    555         // Remove the combining mark that contributed to the composite.
    556         --fLength;
    557         while(combMarkIndex<fLength) {
    558             fArray[combMarkIndex]=fArray[combMarkIndex+1];
    559             ++combMarkIndex;
    560         }
    561     }
    562 private:
    563     int32_t fArray[Normalizer2Impl::MAPPING_LENGTH_MASK];
    564     int32_t fLength;
    565     int32_t fLastStarterIndex;
    566     UBool fDidReorder;
    567 };
    568 
    569 void
    570 Normalizer2DataBuilder::reorder(Norm *p, BuilderReorderingBuffer &buffer) {
    571     UnicodeString &m=*p->mapping;
    572     int32_t length=m.length();
    573     if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) {
    574         return;  // writeMapping() will complain about it and print the code point.
    575     }
    576     const UChar *s=m.getBuffer();
    577     int32_t i=0;
    578     UChar32 c;
    579     while(i<length) {
    580         U16_NEXT(s, i, length, c);
    581         buffer.append(c, getCC(c));
    582     }
    583     if(buffer.didReorder()) {
    584         buffer.toString(m);
    585     }
    586 }
    587 
    588 UBool Normalizer2DataBuilder::hasNoCompBoundaryAfter(BuilderReorderingBuffer &buffer) {
    589     if(buffer.isEmpty()) {
    590         return TRUE;  // maps-to-empty string is no boundary of any kind
    591     }
    592     int32_t lastStarterIndex=buffer.lastStarterIndex();
    593     if(lastStarterIndex<0) {
    594         return TRUE;  // no starter
    595     }
    596     UChar32 starter=buffer.charAt(lastStarterIndex);
    597     if( Hangul::isJamoL(starter) ||
    598         (Hangul::isJamoV(starter) &&
    599          0<lastStarterIndex && Hangul::isJamoL(buffer.charAt(lastStarterIndex-1)))
    600     ) {
    601         // A Jamo leading consonant or an LV pair combines-forward if it is at the end,
    602         // otherwise it is blocked.
    603         return lastStarterIndex==buffer.length()-1;
    604     }
    605     // no Hangul in fully decomposed mapping
    606     const Norm *starterNorm=&getNormRef(starter);
    607     if(starterNorm->compositions==NULL) {
    608         return FALSE;  // the last starter does not combine forward
    609     }
    610     // Compose as far as possible, and see if further compositions are possible.
    611     uint8_t prevCC=0;
    612     for(int32_t combMarkIndex=lastStarterIndex+1; combMarkIndex<buffer.length();) {
    613         uint8_t cc=buffer.ccAt(combMarkIndex);  // !=0 because after last starter
    614         if(combinesWithCCBetween(*starterNorm, prevCC, cc)) {
    615             return TRUE;
    616         }
    617         if( prevCC<cc &&
    618             (starter=combine(*starterNorm, buffer.charAt(combMarkIndex)))>=0
    619         ) {
    620             buffer.setComposite(starter, combMarkIndex);
    621             starterNorm=&getNormRef(starter);
    622             if(starterNorm->compositions==NULL) {
    623                 return FALSE;  // the composite does not combine further
    624             }
    625         } else {
    626             prevCC=cc;
    627             ++combMarkIndex;
    628         }
    629     }
    630     // TRUE if the final, forward-combining starter is at the end.
    631     return prevCC==0;
    632 }
    633 
    634 // Requires p->hasMapping().
    635 void Normalizer2DataBuilder::writeMapping(UChar32 c, const Norm *p, UnicodeString &dataString) {
    636     UnicodeString &m=*p->mapping;
    637     int32_t length=m.length();
    638     if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) {
    639         fprintf(stderr,
    640                 "gennorm2 error: "
    641                 "mapping for U+%04lX longer than maximum of %d\n",
    642                 (long)c, Normalizer2Impl::MAPPING_LENGTH_MASK);
    643         exit(U_INVALID_FORMAT_ERROR);
    644     }
    645     int32_t leadCC, trailCC;
    646     if(length==0) {
    647         leadCC=trailCC=0;
    648     } else {
    649         leadCC=getCC(m.char32At(0));
    650         trailCC=getCC(m.char32At(length-1));
    651     }
    652     if(c<Normalizer2Impl::MIN_CCC_LCCC_CP && (p->cc!=0 || leadCC!=0)) {
    653         fprintf(stderr,
    654                 "gennorm2 error: "
    655                 "U+%04lX below U+0300 has ccc!=0 or lccc!=0, not supported by ICU\n",
    656                 (long)c);
    657         exit(U_INVALID_FORMAT_ERROR);
    658     }
    659     int32_t firstUnit=length|(trailCC<<8);
    660     int32_t secondUnit=p->cc|(leadCC<<8);
    661     if(secondUnit!=0) {
    662         firstUnit|=Normalizer2Impl::MAPPING_HAS_CCC_LCCC_WORD;
    663     }
    664     if(p->compositions!=NULL) {
    665         firstUnit|=Normalizer2Impl::MAPPING_PLUS_COMPOSITION_LIST;
    666     }
    667     if(p->hasNoCompBoundaryAfter) {
    668         firstUnit|=Normalizer2Impl::MAPPING_NO_COMP_BOUNDARY_AFTER;
    669     }
    670     dataString.append((UChar)firstUnit);
    671     if(secondUnit!=0) {
    672         dataString.append((UChar)secondUnit);
    673     }
    674     dataString.append(m);
    675 }
    676 
    677 // Requires p->compositions!=NULL.
    678 void Normalizer2DataBuilder::writeCompositions(UChar32 c, const Norm *p, UnicodeString &dataString) {
    679     if(p->cc!=0) {
    680         fprintf(stderr,
    681                 "gennorm2 error: "
    682                 "U+%04lX combines-forward and has ccc!=0, not possible in Unicode normalization\n",
    683                 (long)c);
    684         exit(U_INVALID_FORMAT_ERROR);
    685     }
    686     int32_t length;
    687     const CompositionPair *pairs=p->getCompositionPairs(length);
    688     for(int32_t i=0; i<length; ++i) {
    689         const CompositionPair &pair=pairs[i];
    690         // 22 bits for the composite character and whether it combines forward.
    691         UChar32 compositeAndFwd=pair.composite<<1;
    692         if(getNormRef(pair.composite).compositions!=NULL) {
    693             compositeAndFwd|=1;  // The composite character also combines-forward.
    694         }
    695         // Encode most pairs in two units and some in three.
    696         int32_t firstUnit, secondUnit, thirdUnit;
    697         if(pair.trail<Normalizer2Impl::COMP_1_TRAIL_LIMIT) {
    698             if(compositeAndFwd<=0xffff) {
    699                 firstUnit=pair.trail<<1;
    700                 secondUnit=compositeAndFwd;
    701                 thirdUnit=-1;
    702             } else {
    703                 firstUnit=(pair.trail<<1)|Normalizer2Impl::COMP_1_TRIPLE;
    704                 secondUnit=compositeAndFwd>>16;
    705                 thirdUnit=compositeAndFwd;
    706             }
    707         } else {
    708             firstUnit=(Normalizer2Impl::COMP_1_TRAIL_LIMIT+
    709                        (pair.trail>>Normalizer2Impl::COMP_1_TRAIL_SHIFT))|
    710                       Normalizer2Impl::COMP_1_TRIPLE;
    711             secondUnit=(pair.trail<<Normalizer2Impl::COMP_2_TRAIL_SHIFT)|
    712                        (compositeAndFwd>>16);
    713             thirdUnit=compositeAndFwd;
    714         }
    715         // Set the high bit of the first unit if this is the last composition pair.
    716         if(i==(length-1)) {
    717             firstUnit|=Normalizer2Impl::COMP_1_LAST_TUPLE;
    718         }
    719         dataString.append((UChar)firstUnit).append((UChar)secondUnit);
    720         if(thirdUnit>=0) {
    721             dataString.append((UChar)thirdUnit);
    722         }
    723     }
    724 }
    725 
    726 class ExtraDataWriter : public Normalizer2DBEnumerator {
    727 public:
    728     ExtraDataWriter(Normalizer2DataBuilder &b) :
    729         Normalizer2DBEnumerator(b),
    730         yesYesCompositions(1000, (UChar32)0xffff, 2),  // 0=inert, 1=Jamo L, 2=start of compositions
    731         yesNoData(1000, (UChar32)0, 1) {}  // 0=Hangul, 1=start of normal data
    732     virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
    733         if(value!=0) {
    734             if(start!=end) {
    735                 fprintf(stderr,
    736                         "gennorm2 error: unexpected shared data for "
    737                         "multiple code points U+%04lX..U+%04lX\n",
    738                         (long)start, (long)end);
    739                 exit(U_INTERNAL_PROGRAM_ERROR);
    740             }
    741             builder.writeExtraData(start, value, *this);
    742         }
    743         return TRUE;
    744     }
    745     UnicodeString maybeYesCompositions;
    746     UnicodeString yesYesCompositions;
    747     UnicodeString yesNoData;
    748     UnicodeString noNoMappings;
    749     Hashtable previousNoNoMappings;  // If constructed in runtime code, pass in UErrorCode.
    750 };
    751 
    752 void Normalizer2DataBuilder::writeExtraData(UChar32 c, uint32_t value, ExtraDataWriter &writer) {
    753     Norm *p=norms+value;
    754     if(p->combinesBack) {
    755         if(p->hasMapping()) {
    756             fprintf(stderr,
    757                     "gennorm2 error: "
    758                     "U+%04lX combines-back and decomposes, not possible in Unicode normalization\n",
    759                     (long)c);
    760             exit(U_INVALID_FORMAT_ERROR);
    761         }
    762         if(p->compositions!=NULL) {
    763             p->offset=
    764                 (writer.maybeYesCompositions.length()<<Norm::OFFSET_SHIFT)|
    765                 Norm::OFFSET_MAYBE_YES;
    766             writeCompositions(c, p, writer.maybeYesCompositions);
    767         }
    768     } else if(!p->hasMapping()) {
    769         if(p->compositions!=NULL) {
    770             p->offset=
    771                 (writer.yesYesCompositions.length()<<Norm::OFFSET_SHIFT)|
    772                 Norm::OFFSET_YES_YES;
    773             writeCompositions(c, p, writer.yesYesCompositions);
    774         }
    775     } else if(p->mappingType==Norm::ROUND_TRIP) {
    776         p->offset=
    777             (writer.yesNoData.length()<<Norm::OFFSET_SHIFT)|
    778             Norm::OFFSET_YES_NO;
    779         writeMapping(c, p, writer.yesNoData);
    780         if(p->compositions!=NULL) {
    781             writeCompositions(c, p, writer.yesNoData);
    782         }
    783     } else /* one-way */ {
    784         if(p->compositions!=NULL) {
    785             fprintf(stderr,
    786                     "gennorm2 error: "
    787                     "U+%04lX combines-forward and has a one-way mapping, "
    788                     "not possible in Unicode normalization\n",
    789                     (long)c);
    790             exit(U_INVALID_FORMAT_ERROR);
    791         }
    792         if(p->cc==0 && optimization!=OPTIMIZE_FAST) {
    793             // Try a compact, algorithmic encoding.
    794             // Only for ccc=0, because we can't store additional information.
    795             if(p->mappingCP>=0) {
    796                 int32_t delta=p->mappingCP-c;
    797                 if(-Normalizer2Impl::MAX_DELTA<=delta && delta<=Normalizer2Impl::MAX_DELTA) {
    798                     p->offset=(delta<<Norm::OFFSET_SHIFT)|Norm::OFFSET_DELTA;
    799                 }
    800             }
    801         }
    802         if(p->offset==0) {
    803             int32_t oldNoNoLength=writer.noNoMappings.length();
    804             writeMapping(c, p, writer.noNoMappings);
    805             UnicodeString newMapping=writer.noNoMappings.tempSubString(oldNoNoLength);
    806             int32_t previousOffset=writer.previousNoNoMappings.geti(newMapping);
    807             if(previousOffset!=0) {
    808                 // Duplicate, remove the new units and point to the old ones.
    809                 writer.noNoMappings.truncate(oldNoNoLength);
    810                 p->offset=
    811                     ((previousOffset-1)<<Norm::OFFSET_SHIFT)|
    812                     Norm::OFFSET_NO_NO;
    813             } else {
    814                 // Enter this new mapping into the hashtable, avoiding value 0 which is "not found".
    815                 IcuToolErrorCode errorCode("gennorm2/writeExtraData()/Hashtable.puti()");
    816                 writer.previousNoNoMappings.puti(newMapping, oldNoNoLength+1, errorCode);
    817                 p->offset=
    818                     (oldNoNoLength<<Norm::OFFSET_SHIFT)|
    819                     Norm::OFFSET_NO_NO;
    820             }
    821         }
    822     }
    823 }
    824 
    825 class Norm16Writer : public Normalizer2DBEnumerator {
    826 public:
    827     Norm16Writer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b) {}
    828     virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
    829         builder.writeNorm16(start, end, value);
    830         return TRUE;
    831     }
    832 };
    833 
    834 void Normalizer2DataBuilder::writeNorm16(UChar32 start, UChar32 end, uint32_t value) {
    835     if(value!=0) {
    836         const Norm *p=norms+value;
    837         int32_t offset=p->offset>>Norm::OFFSET_SHIFT;
    838         int32_t norm16=0;
    839         UBool isDecompNo=FALSE;
    840         UBool isCompNoMaybe=FALSE;
    841         switch(p->offset&Norm::OFFSET_MASK) {
    842         case Norm::OFFSET_NONE:
    843             // No mapping, no compositions list.
    844             if(p->combinesBack) {
    845                 norm16=Normalizer2Impl::MIN_NORMAL_MAYBE_YES+p->cc;
    846                 isDecompNo=(UBool)(p->cc!=0);
    847                 isCompNoMaybe=TRUE;
    848             } else if(p->cc!=0) {
    849                 norm16=Normalizer2Impl::MIN_YES_YES_WITH_CC-1+p->cc;
    850                 isDecompNo=isCompNoMaybe=TRUE;
    851             }
    852             break;
    853         case Norm::OFFSET_MAYBE_YES:
    854             norm16=indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]+offset;
    855             isCompNoMaybe=TRUE;
    856             break;
    857         case Norm::OFFSET_YES_YES:
    858             norm16=offset;
    859             break;
    860         case Norm::OFFSET_YES_NO:
    861             norm16=indexes[Normalizer2Impl::IX_MIN_YES_NO]+offset;
    862             isDecompNo=TRUE;
    863             break;
    864         case Norm::OFFSET_NO_NO:
    865             norm16=indexes[Normalizer2Impl::IX_MIN_NO_NO]+offset;
    866             isDecompNo=isCompNoMaybe=TRUE;
    867             break;
    868         case Norm::OFFSET_DELTA:
    869             norm16=getCenterNoNoDelta()+offset;
    870             isDecompNo=isCompNoMaybe=TRUE;
    871             break;
    872         default:  // Should not occur.
    873             exit(U_INTERNAL_PROGRAM_ERROR);
    874         }
    875         IcuToolErrorCode errorCode("gennorm2/writeNorm16()");
    876         utrie2_setRange32(norm16Trie, start, end, (uint32_t)norm16, TRUE, errorCode);
    877         if(isDecompNo && start<indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]) {
    878             indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=start;
    879         }
    880         if(isCompNoMaybe && start<indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]) {
    881             indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=start;
    882         }
    883     }
    884 }
    885 
    886 void Normalizer2DataBuilder::setHangulData() {
    887     HangulIterator hi;
    888     const HangulIterator::Range *range;
    889     // Check that none of the Hangul/Jamo code points have data.
    890     while((range=hi.nextRange())!=NULL) {
    891         for(UChar32 c=range->start; c<range->limit; ++c) {
    892             if(utrie2_get32(norm16Trie, c)!=0) {
    893                 fprintf(stderr,
    894                         "gennorm2 error: "
    895                         "illegal mapping/composition/ccc data for Hangul or Jamo U+%04lX\n",
    896                         (long)c);
    897                 exit(U_INVALID_FORMAT_ERROR);
    898             }
    899         }
    900     }
    901     // Set data for algorithmic runtime handling.
    902     IcuToolErrorCode errorCode("gennorm2/setHangulData()");
    903     hi.reset();
    904     while((range=hi.nextRange())!=NULL) {
    905         uint16_t norm16=range->norm16;
    906         if(norm16==0) {
    907             norm16=(uint16_t)indexes[Normalizer2Impl::IX_MIN_YES_NO];  // Hangul LV/LVT encoded as minYesNo
    908             if(range->start<indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]) {
    909                 indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=range->start;
    910             }
    911         } else {
    912             if(range->start<indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]) {  // Jamo V/T are maybeYes
    913                 indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=range->start;
    914             }
    915         }
    916         utrie2_setRange32(norm16Trie, range->start, range->limit-1, norm16, TRUE, errorCode);
    917         errorCode.assertSuccess();
    918     }
    919 }
    920 
    921 U_CDECL_BEGIN
    922 
    923 static UBool U_CALLCONV
    924 enumRangeMaxValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) {
    925     uint32_t *pMaxValue=(uint32_t *)context;
    926     if(value>*pMaxValue) {
    927         *pMaxValue=value;
    928     }
    929     return TRUE;
    930 }
    931 
    932 U_CDECL_END
    933 
    934 void Normalizer2DataBuilder::processData() {
    935     IcuToolErrorCode errorCode("gennorm2/processData()");
    936     norm16Trie=utrie2_open(0, 0, errorCode);
    937     errorCode.assertSuccess();
    938 
    939     utrie2_enum(normTrie, NULL, enumRangeHandler, CompositionBuilder(*this).ptr());
    940 
    941     Decomposer decomposer(*this);
    942     do {
    943         decomposer.didDecompose=FALSE;
    944         utrie2_enum(normTrie, NULL, enumRangeHandler, &decomposer);
    945     } while(decomposer.didDecompose);
    946 
    947     BuilderReorderingBuffer buffer;
    948     int32_t normsLength=utm_countItems(normMem);
    949     for(int32_t i=1; i<normsLength; ++i) {
    950         if(norms[i].hasMapping()) {
    951             buffer.reset();
    952             reorder(norms+i, buffer);
    953             norms[i].hasNoCompBoundaryAfter=hasNoCompBoundaryAfter(buffer);
    954         }
    955     }
    956 
    957     indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=0x110000;
    958     indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=0x110000;
    959 
    960     ExtraDataWriter extraDataWriter(*this);
    961     utrie2_enum(normTrie, NULL, enumRangeHandler, &extraDataWriter);
    962 
    963     extraData=extraDataWriter.maybeYesCompositions;
    964     extraData.append(extraDataWriter.yesYesCompositions).
    965               append(extraDataWriter.yesNoData).
    966               append(extraDataWriter.noNoMappings);
    967     // Pad to even length for 4-byte alignment of following data.
    968     if(extraData.length()&1) {
    969         extraData.append((UChar)0);
    970     }
    971 
    972     indexes[Normalizer2Impl::IX_MIN_YES_NO]=
    973         extraDataWriter.yesYesCompositions.length();
    974     indexes[Normalizer2Impl::IX_MIN_NO_NO]=
    975         indexes[Normalizer2Impl::IX_MIN_YES_NO]+
    976         extraDataWriter.yesNoData.length();
    977     indexes[Normalizer2Impl::IX_LIMIT_NO_NO]=
    978         indexes[Normalizer2Impl::IX_MIN_NO_NO]+
    979         extraDataWriter.noNoMappings.length();
    980     indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]=
    981         Normalizer2Impl::MIN_NORMAL_MAYBE_YES-
    982         extraDataWriter.maybeYesCompositions.length();
    983 
    984     int32_t minNoNoDelta=getCenterNoNoDelta()-Normalizer2Impl::MAX_DELTA;
    985     if(indexes[Normalizer2Impl::IX_LIMIT_NO_NO]>minNoNoDelta) {
    986         fprintf(stderr,
    987                 "gennorm2 error: "
    988                 "data structure overflow, too much mapping composition data\n");
    989         exit(U_BUFFER_OVERFLOW_ERROR);
    990     }
    991 
    992     utrie2_enum(normTrie, NULL, enumRangeHandler, Norm16Writer(*this).ptr());
    993 
    994     setHangulData();
    995 
    996     // Look for the "worst" norm16 value of any supplementary code point
    997     // corresponding to a lead surrogate, and set it as that surrogate's value.
    998     // Enables quick check inner loops to look at only code units.
    999     //
   1000     // We could be more sophisticated:
   1001     // We could collect a bit set for whether there are values in the different
   1002     // norm16 ranges (yesNo, maybeYes, yesYesWithCC etc.)
   1003     // and select the best value that only breaks the composition and/or decomposition
   1004     // inner loops if necessary.
   1005     // However, that seems like overkill for an optimization for supplementary characters.
   1006     for(UChar lead=0xd800; lead<0xdc00; ++lead) {
   1007         uint32_t maxValue=utrie2_get32(norm16Trie, lead);
   1008         utrie2_enumForLeadSurrogate(norm16Trie, lead, NULL, enumRangeMaxValue, &maxValue);
   1009         if( maxValue>=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO] &&
   1010             maxValue>(uint32_t)indexes[Normalizer2Impl::IX_MIN_NO_NO]
   1011         ) {
   1012             // Set noNo ("worst" value) if it got into "less-bad" maybeYes or ccc!=0.
   1013             // Otherwise it might end up at something like JAMO_VT which stays in
   1014             // the inner decomposition quick check loop.
   1015             maxValue=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]-1;
   1016         }
   1017         utrie2_set32ForLeadSurrogateCodeUnit(norm16Trie, lead, maxValue, errorCode);
   1018     }
   1019 
   1020     // Adjust supplementary minimum code points to break quick check loops at their lead surrogates.
   1021     // For an empty data file, minCP=0x110000 turns into 0xdc00 (first trail surrogate)
   1022     // which is harmless.
   1023     // As a result, the minimum code points are always BMP code points.
   1024     int32_t minCP=indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP];
   1025     if(minCP>=0x10000) {
   1026         indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=U16_LEAD(minCP);
   1027     }
   1028     minCP=indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP];
   1029     if(minCP>=0x10000) {
   1030         indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=U16_LEAD(minCP);
   1031     }
   1032 }
   1033 
   1034 void Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
   1035     processData();
   1036 
   1037     IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()");
   1038     utrie2_freeze(norm16Trie, UTRIE2_16_VALUE_BITS, errorCode);
   1039     int32_t norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode);
   1040     if(errorCode.get()!=U_BUFFER_OVERFLOW_ERROR) {
   1041         fprintf(stderr, "gennorm2 error: unable to freeze/serialize the normalization trie - %s\n",
   1042                 errorCode.errorName());
   1043         exit(errorCode.reset());
   1044     }
   1045     errorCode.reset();
   1046     LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]);
   1047     utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode);
   1048     errorCode.assertSuccess();
   1049 
   1050     int32_t offset=(int32_t)sizeof(indexes);
   1051     indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]=offset;
   1052     offset+=norm16TrieLength;
   1053     indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]=offset;
   1054     int32_t totalSize=offset+=extraData.length()*2;
   1055     for(int32_t i=Normalizer2Impl::IX_RESERVED2_OFFSET; i<=Normalizer2Impl::IX_TOTAL_SIZE; ++i) {
   1056         indexes[i]=totalSize;
   1057     }
   1058 
   1059     if(beVerbose) {
   1060         printf("size of normalization trie:         %5ld bytes\n", (long)norm16TrieLength);
   1061         printf("size of 16-bit extra data:          %5ld uint16_t\n", (long)extraData.length());
   1062         printf("size of binary data file contents:  %5ld bytes\n", (long)totalSize);
   1063         printf("minDecompNoCodePoint:              U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]);
   1064         printf("minCompNoMaybeCodePoint:           U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]);
   1065         printf("minYesNo:                          0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_YES_NO]);
   1066         printf("minNoNo:                           0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_NO_NO]);
   1067         printf("limitNoNo:                         0x%04x\n", (int)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]);
   1068         printf("minMaybeYes:                       0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]);
   1069     }
   1070 
   1071     memcpy(dataInfo.dataVersion, unicodeVersion, 4);
   1072     UNewDataMemory *pData=
   1073         udata_create(NULL, NULL, filename, &dataInfo,
   1074                      haveCopyright ? U_COPYRIGHT_STRING : NULL, errorCode);
   1075     if(errorCode.isFailure()) {
   1076         fprintf(stderr, "gennorm2 error: unable to create the output file %s - %s\n",
   1077                 filename, errorCode.errorName());
   1078         exit(errorCode.reset());
   1079     }
   1080     udata_writeBlock(pData, indexes, sizeof(indexes));
   1081     udata_writeBlock(pData, norm16TrieBytes.getAlias(), norm16TrieLength);
   1082     udata_writeUString(pData, extraData.getBuffer(), extraData.length());
   1083 
   1084     int32_t writtenSize=udata_finish(pData, errorCode);
   1085     if(errorCode.isFailure()) {
   1086         fprintf(stderr, "gennorm2: error %s writing the output file\n", errorCode.errorName());
   1087         exit(errorCode.reset());
   1088     }
   1089     if(writtenSize!=totalSize) {
   1090         fprintf(stderr, "gennorm2 error: written size %ld != calculated size %ld\n",
   1091             (long)writtenSize, (long)totalSize);
   1092         exit(U_INTERNAL_PROGRAM_ERROR);
   1093     }
   1094 }
   1095 
   1096 U_NAMESPACE_END
   1097 
   1098 #endif /* #if !UCONFIG_NO_NORMALIZATION */
   1099 
   1100 /*
   1101  * Hey, Emacs, please set the following:
   1102  *
   1103  * Local Variables:
   1104  * indent-tabs-mode: nil
   1105  * End:
   1106  */
   1107