Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 2001-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  utrie2.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2008aug16 (starting from a copy of utrie.c)
     14 *   created by: Markus W. Scherer
     15 *
     16 *   This is a common implementation of a Unicode trie.
     17 *   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
     18 *   Unicode code points (0..0x10ffff).
     19 *   This is the second common version of a Unicode trie (hence the name UTrie2).
     20 *   See utrie2.h for a comparison.
     21 *
     22 *   This file contains only the runtime and enumeration code, for read-only access.
     23 *   See utrie2_builder.c for the builder code.
     24 */
     25 #ifdef UTRIE2_DEBUG
     26 #   include <stdio.h>
     27 #endif
     28 
     29 #include "unicode/utypes.h"
     30 #include "cmemory.h"
     31 #include "utrie2.h"
     32 #include "utrie2_impl.h"
     33 
     34 /* Public UTrie2 API implementation ----------------------------------------- */
     35 
     36 static uint32_t
     37 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
     38     int32_t i2, block;
     39 
     40     if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
     41         return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
     42     }
     43 
     44     if(U_IS_LEAD(c) && fromLSCP) {
     45         i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
     46             (c>>UTRIE2_SHIFT_2);
     47     } else {
     48         i2=trie->index1[c>>UTRIE2_SHIFT_1]+
     49             ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
     50     }
     51     block=trie->index2[i2];
     52     return trie->data[block+(c&UTRIE2_DATA_MASK)];
     53 }
     54 
     55 U_CAPI uint32_t U_EXPORT2
     56 utrie2_get32(const UTrie2 *trie, UChar32 c) {
     57     if(trie->data16!=NULL) {
     58         return UTRIE2_GET16(trie, c);
     59     } else if(trie->data32!=NULL) {
     60         return UTRIE2_GET32(trie, c);
     61     } else if((uint32_t)c>0x10ffff) {
     62         return trie->errorValue;
     63     } else {
     64         return get32(trie->newTrie, c, TRUE);
     65     }
     66 }
     67 
     68 U_CAPI uint32_t U_EXPORT2
     69 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
     70     if(!U_IS_LEAD(c)) {
     71         return trie->errorValue;
     72     }
     73     if(trie->data16!=NULL) {
     74         return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
     75     } else if(trie->data32!=NULL) {
     76         return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
     77     } else {
     78         return get32(trie->newTrie, c, FALSE);
     79     }
     80 }
     81 
     82 static U_INLINE int32_t
     83 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
     84     int32_t idx=
     85         _UTRIE2_INDEX_FROM_CP(
     86             trie,
     87             trie->data32==NULL ? trie->indexLength : 0,
     88             c);
     89     return (idx<<3)|i;
     90 }
     91 
     92 U_CAPI int32_t U_EXPORT2
     93 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
     94                            const uint8_t *src, const uint8_t *limit) {
     95     int32_t i, length;
     96     i=0;
     97     /* support 64-bit pointers by avoiding cast of arbitrary difference */
     98     if((limit-src)<=7) {
     99         length=(int32_t)(limit-src);
    100     } else {
    101         length=7;
    102     }
    103     c=utf8_nextCharSafeBody(src, &i, length, c, -1);
    104     return u8Index(trie, c, i);
    105 }
    106 
    107 U_CAPI int32_t U_EXPORT2
    108 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
    109                            const uint8_t *start, const uint8_t *src) {
    110     int32_t i, length;
    111     /* support 64-bit pointers by avoiding cast of arbitrary difference */
    112     if((src-start)<=7) {
    113         i=length=(int32_t)(src-start);
    114     } else {
    115         i=length=7;
    116         start=src-7;
    117     }
    118     c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
    119     i=length-i;  /* number of bytes read backward from src */
    120     return u8Index(trie, c, i);
    121 }
    122 
    123 U_CAPI UTrie2 * U_EXPORT2
    124 utrie2_openFromSerialized(UTrie2ValueBits valueBits,
    125                           const void *data, int32_t length, int32_t *pActualLength,
    126                           UErrorCode *pErrorCode) {
    127     const UTrie2Header *header;
    128     const uint16_t *p16;
    129     int32_t actualLength;
    130 
    131     UTrie2 tempTrie;
    132     UTrie2 *trie;
    133 
    134     if(U_FAILURE(*pErrorCode)) {
    135         return 0;
    136     }
    137 
    138     if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
    139         valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
    140     ) {
    141         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    142         return 0;
    143     }
    144 
    145     /* enough data for a trie header? */
    146     if(length<(int32_t)sizeof(UTrie2Header)) {
    147         *pErrorCode=U_INVALID_FORMAT_ERROR;
    148         return 0;
    149     }
    150 
    151     /* check the signature */
    152     header=(const UTrie2Header *)data;
    153     if(header->signature!=UTRIE2_SIG) {
    154         *pErrorCode=U_INVALID_FORMAT_ERROR;
    155         return 0;
    156     }
    157 
    158     /* get the options */
    159     if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
    160         *pErrorCode=U_INVALID_FORMAT_ERROR;
    161         return 0;
    162     }
    163 
    164     /* get the length values and offsets */
    165     uprv_memset(&tempTrie, 0, sizeof(tempTrie));
    166     tempTrie.indexLength=header->indexLength;
    167     tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
    168     tempTrie.index2NullOffset=header->index2NullOffset;
    169     tempTrie.dataNullOffset=header->dataNullOffset;
    170 
    171     tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
    172     tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
    173     if(valueBits==UTRIE2_16_VALUE_BITS) {
    174         tempTrie.highValueIndex+=tempTrie.indexLength;
    175     }
    176 
    177     /* calculate the actual length */
    178     actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
    179     if(valueBits==UTRIE2_16_VALUE_BITS) {
    180         actualLength+=tempTrie.dataLength*2;
    181     } else {
    182         actualLength+=tempTrie.dataLength*4;
    183     }
    184     if(length<actualLength) {
    185         *pErrorCode=U_INVALID_FORMAT_ERROR;  /* not enough bytes */
    186         return 0;
    187     }
    188 
    189     /* allocate the trie */
    190     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
    191     if(trie==NULL) {
    192         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    193         return 0;
    194     }
    195     uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
    196     trie->memory=(uint32_t *)data;
    197     trie->length=actualLength;
    198     trie->isMemoryOwned=FALSE;
    199 
    200     /* set the pointers to its index and data arrays */
    201     p16=(const uint16_t *)(header+1);
    202     trie->index=p16;
    203     p16+=trie->indexLength;
    204 
    205     /* get the data */
    206     switch(valueBits) {
    207     case UTRIE2_16_VALUE_BITS:
    208         trie->data16=p16;
    209         trie->data32=NULL;
    210         trie->initialValue=trie->index[trie->dataNullOffset];
    211         trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
    212         break;
    213     case UTRIE2_32_VALUE_BITS:
    214         trie->data16=NULL;
    215         trie->data32=(const uint32_t *)p16;
    216         trie->initialValue=trie->data32[trie->dataNullOffset];
    217         trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
    218         break;
    219     default:
    220         *pErrorCode=U_INVALID_FORMAT_ERROR;
    221         return 0;
    222     }
    223 
    224     if(pActualLength!=NULL) {
    225         *pActualLength=actualLength;
    226     }
    227     return trie;
    228 }
    229 
    230 U_CAPI UTrie2 * U_EXPORT2
    231 utrie2_openDummy(UTrie2ValueBits valueBits,
    232                  uint32_t initialValue, uint32_t errorValue,
    233                  UErrorCode *pErrorCode) {
    234     UTrie2 *trie;
    235     UTrie2Header *header;
    236     uint32_t *p;
    237     uint16_t *dest16;
    238     int32_t indexLength, dataLength, length, i;
    239     int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
    240 
    241     if(U_FAILURE(*pErrorCode)) {
    242         return 0;
    243     }
    244 
    245     if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
    246         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    247         return 0;
    248     }
    249 
    250     /* calculate the total length of the dummy trie data */
    251     indexLength=UTRIE2_INDEX_1_OFFSET;
    252     dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
    253     length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
    254     if(valueBits==UTRIE2_16_VALUE_BITS) {
    255         length+=dataLength*2;
    256     } else {
    257         length+=dataLength*4;
    258     }
    259 
    260     /* allocate the trie */
    261     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
    262     if(trie==NULL) {
    263         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    264         return 0;
    265     }
    266     uprv_memset(trie, 0, sizeof(UTrie2));
    267     trie->memory=uprv_malloc(length);
    268     if(trie->memory==NULL) {
    269         uprv_free(trie);
    270         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    271         return 0;
    272     }
    273     trie->length=length;
    274     trie->isMemoryOwned=TRUE;
    275 
    276     /* set the UTrie2 fields */
    277     if(valueBits==UTRIE2_16_VALUE_BITS) {
    278         dataMove=indexLength;
    279     } else {
    280         dataMove=0;
    281     }
    282 
    283     trie->indexLength=indexLength;
    284     trie->dataLength=dataLength;
    285     trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
    286     trie->dataNullOffset=(uint16_t)dataMove;
    287     trie->initialValue=initialValue;
    288     trie->errorValue=errorValue;
    289     trie->highStart=0;
    290     trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
    291 
    292     /* set the header fields */
    293     header=(UTrie2Header *)trie->memory;
    294 
    295     header->signature=UTRIE2_SIG; /* "Tri2" */
    296     header->options=(uint16_t)valueBits;
    297 
    298     header->indexLength=(uint16_t)indexLength;
    299     header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
    300     header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
    301     header->dataNullOffset=(uint16_t)dataMove;
    302     header->shiftedHighStart=0;
    303 
    304     /* fill the index and data arrays */
    305     dest16=(uint16_t *)(header+1);
    306     trie->index=dest16;
    307 
    308     /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
    309     for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
    310         *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT);  /* null data block */
    311     }
    312 
    313     /* write UTF-8 2-byte index-2 values, not right-shifted */
    314     for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
    315         *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
    316     }
    317     for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
    318         *dest16++=(uint16_t)dataMove;
    319     }
    320 
    321     /* write the 16/32-bit data array */
    322     switch(valueBits) {
    323     case UTRIE2_16_VALUE_BITS:
    324         /* write 16-bit data values */
    325         trie->data16=dest16;
    326         trie->data32=NULL;
    327         for(i=0; i<0x80; ++i) {
    328             *dest16++=(uint16_t)initialValue;
    329         }
    330         for(; i<0xc0; ++i) {
    331             *dest16++=(uint16_t)errorValue;
    332         }
    333         /* highValue and reserved values */
    334         for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
    335             *dest16++=(uint16_t)initialValue;
    336         }
    337         break;
    338     case UTRIE2_32_VALUE_BITS:
    339         /* write 32-bit data values */
    340         p=(uint32_t *)dest16;
    341         trie->data16=NULL;
    342         trie->data32=p;
    343         for(i=0; i<0x80; ++i) {
    344             *p++=initialValue;
    345         }
    346         for(; i<0xc0; ++i) {
    347             *p++=errorValue;
    348         }
    349         /* highValue and reserved values */
    350         for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
    351             *p++=initialValue;
    352         }
    353         break;
    354     default:
    355         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    356         return 0;
    357     }
    358 
    359     return trie;
    360 }
    361 
    362 U_CAPI void U_EXPORT2
    363 utrie2_close(UTrie2 *trie) {
    364     if(trie!=NULL) {
    365         if(trie->isMemoryOwned) {
    366             uprv_free(trie->memory);
    367         }
    368         if(trie->newTrie!=NULL) {
    369             uprv_free(trie->newTrie->data);
    370             uprv_free(trie->newTrie);
    371         }
    372         uprv_free(trie);
    373     }
    374 }
    375 
    376 U_CAPI int32_t U_EXPORT2
    377 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) {
    378     uint32_t signature;
    379     if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) {
    380         return 0;
    381     }
    382     signature=*(const uint32_t *)data;
    383     if(signature==UTRIE2_SIG) {
    384         return 2;
    385     }
    386     if(anyEndianOk && signature==UTRIE2_OE_SIG) {
    387         return 2;
    388     }
    389     if(signature==UTRIE_SIG) {
    390         return 1;
    391     }
    392     if(anyEndianOk && signature==UTRIE_OE_SIG) {
    393         return 1;
    394     }
    395     return 0;
    396 }
    397 
    398 U_CAPI int32_t U_EXPORT2
    399 utrie2_swap(const UDataSwapper *ds,
    400             const void *inData, int32_t length, void *outData,
    401             UErrorCode *pErrorCode) {
    402     const UTrie2Header *inTrie;
    403     UTrie2Header trie;
    404     int32_t dataLength, size;
    405     UTrie2ValueBits valueBits;
    406 
    407     if(U_FAILURE(*pErrorCode)) {
    408         return 0;
    409     }
    410     if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
    411         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    412         return 0;
    413     }
    414 
    415     /* setup and swapping */
    416     if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
    417         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    418         return 0;
    419     }
    420 
    421     inTrie=(const UTrie2Header *)inData;
    422     trie.signature=ds->readUInt32(inTrie->signature);
    423     trie.options=ds->readUInt16(inTrie->options);
    424     trie.indexLength=ds->readUInt16(inTrie->indexLength);
    425     trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
    426 
    427     valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
    428     dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
    429 
    430     if( trie.signature!=UTRIE2_SIG ||
    431         valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
    432         trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
    433         dataLength<UTRIE2_DATA_START_OFFSET
    434     ) {
    435         *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
    436         return 0;
    437     }
    438 
    439     size=sizeof(UTrie2Header)+trie.indexLength*2;
    440     switch(valueBits) {
    441     case UTRIE2_16_VALUE_BITS:
    442         size+=dataLength*2;
    443         break;
    444     case UTRIE2_32_VALUE_BITS:
    445         size+=dataLength*4;
    446         break;
    447     default:
    448         *pErrorCode=U_INVALID_FORMAT_ERROR;
    449         return 0;
    450     }
    451 
    452     if(length>=0) {
    453         UTrie2Header *outTrie;
    454 
    455         if(length<size) {
    456             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    457             return 0;
    458         }
    459 
    460         outTrie=(UTrie2Header *)outData;
    461 
    462         /* swap the header */
    463         ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
    464         ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
    465 
    466         /* swap the index and the data */
    467         switch(valueBits) {
    468         case UTRIE2_16_VALUE_BITS:
    469             ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
    470             break;
    471         case UTRIE2_32_VALUE_BITS:
    472             ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
    473             ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
    474                                      (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
    475             break;
    476         default:
    477             *pErrorCode=U_INVALID_FORMAT_ERROR;
    478             return 0;
    479         }
    480     }
    481 
    482     return size;
    483 }
    484 
    485 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c
    486 // to avoid a dependency from utrie2.cpp on utrie.c.
    487 
    488 /* enumeration -------------------------------------------------------------- */
    489 
    490 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
    491 
    492 /* default UTrie2EnumValue() returns the input value itself */
    493 static uint32_t U_CALLCONV
    494 enumSameValue(const void * /*context*/, uint32_t value) {
    495     return value;
    496 }
    497 
    498 /**
    499  * Enumerate all ranges of code points with the same relevant values.
    500  * The values are transformed from the raw trie entries by the enumValue function.
    501  *
    502  * Currently requires start<limit and both start and limit must be multiples
    503  * of UTRIE2_DATA_BLOCK_LENGTH.
    504  *
    505  * Optimizations:
    506  * - Skip a whole block if we know that it is filled with a single value,
    507  *   and it is the same as we visited just before.
    508  * - Handle the null block specially because we know a priori that it is filled
    509  *   with a single value.
    510  */
    511 static void
    512 enumEitherTrie(const UTrie2 *trie,
    513                UChar32 start, UChar32 limit,
    514                UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
    515     const uint32_t *data32;
    516     const uint16_t *idx;
    517 
    518     uint32_t value, prevValue, initialValue;
    519     UChar32 c, prev, highStart;
    520     int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
    521 
    522     if(enumRange==NULL) {
    523         return;
    524     }
    525     if(enumValue==NULL) {
    526         enumValue=enumSameValue;
    527     }
    528 
    529     if(trie->newTrie==NULL) {
    530         /* frozen trie */
    531         idx=trie->index;
    532         data32=trie->data32;
    533 
    534         index2NullOffset=trie->index2NullOffset;
    535         nullBlock=trie->dataNullOffset;
    536     } else {
    537         /* unfrozen, mutable trie */
    538         idx=NULL;
    539         data32=trie->newTrie->data;
    540 
    541         index2NullOffset=trie->newTrie->index2NullOffset;
    542         nullBlock=trie->newTrie->dataNullOffset;
    543     }
    544 
    545     highStart=trie->highStart;
    546 
    547     /* get the enumeration value that corresponds to an initial-value trie data entry */
    548     initialValue=enumValue(context, trie->initialValue);
    549 
    550     /* set variables for previous range */
    551     prevI2Block=-1;
    552     prevBlock=-1;
    553     prev=start;
    554     prevValue=0;
    555 
    556     /* enumerate index-2 blocks */
    557     for(c=start; c<limit && c<highStart;) {
    558         /* Code point limit for iterating inside this i2Block. */
    559         UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
    560         if(limit<tempLimit) {
    561             tempLimit=limit;
    562         }
    563         if(c<=0xffff) {
    564             if(!U_IS_SURROGATE(c)) {
    565                 i2Block=c>>UTRIE2_SHIFT_2;
    566             } else if(U_IS_SURROGATE_LEAD(c)) {
    567                 /*
    568                  * Enumerate values for lead surrogate code points, not code units:
    569                  * This special block has half the normal length.
    570                  */
    571                 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
    572                 tempLimit=MIN_VALUE(0xdc00, limit);
    573             } else {
    574                 /*
    575                  * Switch back to the normal part of the index-2 table.
    576                  * Enumerate the second half of the surrogates block.
    577                  */
    578                 i2Block=0xd800>>UTRIE2_SHIFT_2;
    579                 tempLimit=MIN_VALUE(0xe000, limit);
    580             }
    581         } else {
    582             /* supplementary code points */
    583             if(idx!=NULL) {
    584                 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
    585                               (c>>UTRIE2_SHIFT_1)];
    586             } else {
    587                 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
    588             }
    589             if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
    590                 /*
    591                  * The index-2 block is the same as the previous one, and filled with prevValue.
    592                  * Only possible for supplementary code points because the linear-BMP index-2
    593                  * table creates unique i2Block values.
    594                  */
    595                 c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
    596                 continue;
    597             }
    598         }
    599         prevI2Block=i2Block;
    600         if(i2Block==index2NullOffset) {
    601             /* this is the null index-2 block */
    602             if(prevValue!=initialValue) {
    603                 if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
    604                     return;
    605                 }
    606                 prevBlock=nullBlock;
    607                 prev=c;
    608                 prevValue=initialValue;
    609             }
    610             c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
    611         } else {
    612             /* enumerate data blocks for one index-2 block */
    613             int32_t i2, i2Limit;
    614             i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
    615             if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
    616                 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
    617             } else {
    618                 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
    619             }
    620             for(; i2<i2Limit; ++i2) {
    621                 if(idx!=NULL) {
    622                     block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
    623                 } else {
    624                     block=trie->newTrie->index2[i2Block+i2];
    625                 }
    626                 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
    627                     /* the block is the same as the previous one, and filled with prevValue */
    628                     c+=UTRIE2_DATA_BLOCK_LENGTH;
    629                     continue;
    630                 }
    631                 prevBlock=block;
    632                 if(block==nullBlock) {
    633                     /* this is the null data block */
    634                     if(prevValue!=initialValue) {
    635                         if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
    636                             return;
    637                         }
    638                         prev=c;
    639                         prevValue=initialValue;
    640                     }
    641                     c+=UTRIE2_DATA_BLOCK_LENGTH;
    642                 } else {
    643                     for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
    644                         value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
    645                         if(value!=prevValue) {
    646                             if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
    647                                 return;
    648                             }
    649                             prev=c;
    650                             prevValue=value;
    651                         }
    652                         ++c;
    653                     }
    654                 }
    655             }
    656         }
    657     }
    658 
    659     if(c>limit) {
    660         c=limit;  /* could be higher if in the index2NullOffset */
    661     } else if(c<limit) {
    662         /* c==highStart<limit */
    663         uint32_t highValue;
    664         if(idx!=NULL) {
    665             highValue=
    666                 data32!=NULL ?
    667                     data32[trie->highValueIndex] :
    668                     idx[trie->highValueIndex];
    669         } else {
    670             highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
    671         }
    672         value=enumValue(context, highValue);
    673         if(value!=prevValue) {
    674             if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
    675                 return;
    676             }
    677             prev=c;
    678             prevValue=value;
    679         }
    680         c=limit;
    681     }
    682 
    683     /* deliver last range */
    684     enumRange(context, prev, c-1, prevValue);
    685 }
    686 
    687 U_CAPI void U_EXPORT2
    688 utrie2_enum(const UTrie2 *trie,
    689             UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
    690     enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
    691 }
    692 
    693 U_CAPI void U_EXPORT2
    694 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
    695                             UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
    696                             const void *context) {
    697     if(!U16_IS_LEAD(lead)) {
    698         return;
    699     }
    700     lead=(lead-0xd7c0)<<10;   /* start code point */
    701     enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
    702 }
    703 
    704 /* C++ convenience wrappers ------------------------------------------------- */
    705 
    706 U_NAMESPACE_BEGIN
    707 
    708 uint16_t BackwardUTrie2StringIterator::previous16() {
    709     codePointLimit=codePointStart;
    710     if(start>=codePointStart) {
    711         codePoint=U_SENTINEL;
    712         return 0;
    713     }
    714     uint16_t result;
    715     UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
    716     return result;
    717 }
    718 
    719 uint16_t ForwardUTrie2StringIterator::next16() {
    720     codePointStart=codePointLimit;
    721     if(codePointLimit==limit) {
    722         codePoint=U_SENTINEL;
    723         return 0;
    724     }
    725     uint16_t result;
    726     UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
    727     return result;
    728 }
    729 
    730 UTrie2 *UTrie2Singleton::getInstance(InstantiatorFn *instantiator, const void *context,
    731                                      UErrorCode &errorCode) {
    732     void *duplicate;
    733     UTrie2 *instance=(UTrie2 *)singleton.getInstance(instantiator, context, duplicate, errorCode);
    734     utrie2_close((UTrie2 *)duplicate);
    735     return instance;
    736 }
    737 
    738 U_NAMESPACE_END
    739