Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  unames.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 1999oct04
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/putil.h"
     19 #include "unicode/uchar.h"
     20 #include "unicode/udata.h"
     21 #include "unicode/utf.h"
     22 #include "unicode/utf16.h"
     23 #include "ustr_imp.h"
     24 #include "umutex.h"
     25 #include "cmemory.h"
     26 #include "cstring.h"
     27 #include "ucln_cmn.h"
     28 #include "udataswp.h"
     29 #include "uprops.h"
     30 
     31 /* prototypes ------------------------------------------------------------- */
     32 
     33 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     34 
     35 static const char DATA_NAME[] = "unames";
     36 static const char DATA_TYPE[] = "icu";
     37 
     38 #define GROUP_SHIFT 5
     39 #define LINES_PER_GROUP (1L<<GROUP_SHIFT)
     40 #define GROUP_MASK (LINES_PER_GROUP-1)
     41 
     42 /*
     43  * This struct was replaced by explicitly accessing equivalent
     44  * fields from triples of uint16_t.
     45  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
     46  * which broke the assumption that sizeof(Group)==6 and that the ++ operator
     47  * would advance by 6 bytes (3 uint16_t).
     48  *
     49  * We can't just change the data structure because it's loaded from a data file,
     50  * and we don't want to make it less compact, so we changed the access code.
     51  *
     52  * For details see ICU tickets 6331 and 6008.
     53 typedef struct {
     54     uint16_t groupMSB,
     55              offsetHigh, offsetLow; / * avoid padding * /
     56 } Group;
     57  */
     58 enum {
     59     GROUP_MSB,
     60     GROUP_OFFSET_HIGH,
     61     GROUP_OFFSET_LOW,
     62     GROUP_LENGTH
     63 };
     64 
     65 /*
     66  * Get the 32-bit group offset.
     67  * @param group (const uint16_t *) pointer to a Group triple of uint16_t
     68  * @return group offset (int32_t)
     69  */
     70 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
     71 
     72 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
     73 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
     74 
     75 typedef struct {
     76     uint32_t start, end;
     77     uint8_t type, variant;
     78     uint16_t size;
     79 } AlgorithmicRange;
     80 
     81 typedef struct {
     82     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
     83 } UCharNames;
     84 
     85 /*
     86  * Get the groups table from a UCharNames struct.
     87  * The groups table consists of one uint16_t groupCount followed by
     88  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
     89  * and the comment for the old struct Group above.
     90  *
     91  * @param names (const UCharNames *) pointer to the UCharNames indexes
     92  * @return (const uint16_t *) pointer to the groups table
     93  */
     94 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
     95 
     96 typedef struct {
     97     const char *otherName;
     98     UChar32 code;
     99 } FindName;
    100 
    101 #define DO_FIND_NAME NULL
    102 
    103 static UDataMemory *uCharNamesData=NULL;
    104 static UCharNames *uCharNames=NULL;
    105 static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
    106 
    107 /*
    108  * Maximum length of character names (regular & 1.0).
    109  */
    110 static int32_t gMaxNameLength=0;
    111 
    112 /*
    113  * Set of chars used in character names (regular & 1.0).
    114  * Chars are platform-dependent (can be EBCDIC).
    115  */
    116 static uint32_t gNameSet[8]={ 0 };
    117 
    118 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
    119 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
    120 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
    121 
    122 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
    123 
    124 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
    125     "unassigned",
    126     "uppercase letter",
    127     "lowercase letter",
    128     "titlecase letter",
    129     "modifier letter",
    130     "other letter",
    131     "non spacing mark",
    132     "enclosing mark",
    133     "combining spacing mark",
    134     "decimal digit number",
    135     "letter number",
    136     "other number",
    137     "space separator",
    138     "line separator",
    139     "paragraph separator",
    140     "control",
    141     "format",
    142     "private use area",
    143     "surrogate",
    144     "dash punctuation",
    145     "start punctuation",
    146     "end punctuation",
    147     "connector punctuation",
    148     "other punctuation",
    149     "math symbol",
    150     "currency symbol",
    151     "modifier symbol",
    152     "other symbol",
    153     "initial punctuation",
    154     "final punctuation",
    155     "noncharacter",
    156     "lead surrogate",
    157     "trail surrogate"
    158 };
    159 
    160 /* implementation ----------------------------------------------------------- */
    161 
    162 static UBool U_CALLCONV unames_cleanup(void)
    163 {
    164     if(uCharNamesData) {
    165         udata_close(uCharNamesData);
    166         uCharNamesData = NULL;
    167     }
    168     if(uCharNames) {
    169         uCharNames = NULL;
    170     }
    171     gMaxNameLength=0;
    172     return TRUE;
    173 }
    174 
    175 static UBool U_CALLCONV
    176 isAcceptable(void * /*context*/,
    177              const char * /*type*/, const char * /*name*/,
    178              const UDataInfo *pInfo) {
    179     return (UBool)(
    180         pInfo->size>=20 &&
    181         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    182         pInfo->charsetFamily==U_CHARSET_FAMILY &&
    183         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
    184         pInfo->dataFormat[1]==0x6e &&
    185         pInfo->dataFormat[2]==0x61 &&
    186         pInfo->dataFormat[3]==0x6d &&
    187         pInfo->formatVersion[0]==1);
    188 }
    189 
    190 static UBool
    191 isDataLoaded(UErrorCode *pErrorCode) {
    192     /* load UCharNames from file if necessary */
    193     UBool isCached;
    194 
    195     /* do this because double-checked locking is broken */
    196     UMTX_CHECK(NULL, (uCharNames!=NULL), isCached);
    197 
    198     if(!isCached) {
    199         UCharNames *names;
    200         UDataMemory *data;
    201 
    202         /* check error code from previous attempt */
    203         if(U_FAILURE(gLoadErrorCode)) {
    204             *pErrorCode=gLoadErrorCode;
    205             return FALSE;
    206         }
    207 
    208         /* open the data outside the mutex block */
    209         data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
    210         if(U_FAILURE(*pErrorCode)) {
    211             gLoadErrorCode=*pErrorCode;
    212             return FALSE;
    213         }
    214 
    215         names=(UCharNames *)udata_getMemory(data);
    216 
    217         /* in the mutex block, set the data for this process */
    218         {
    219             umtx_lock(NULL);
    220             if(uCharNames==NULL) {
    221                 uCharNamesData=data;
    222                 uCharNames=names;
    223                 data=NULL;
    224                 names=NULL;
    225                 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
    226             }
    227             umtx_unlock(NULL);
    228         }
    229 
    230         /* if a different thread set it first, then close the extra data */
    231         if(data!=NULL) {
    232             udata_close(data); /* NULL if it was set correctly */
    233         }
    234     }
    235     return TRUE;
    236 }
    237 
    238 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
    239     if((bufferLength)>0) { \
    240         *(buffer)++=c; \
    241         --(bufferLength); \
    242     } \
    243     ++(bufferPos); \
    244 }
    245 
    246 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
    247 
    248 /*
    249  * Important: expandName() and compareName() are almost the same -
    250  * apply fixes to both.
    251  *
    252  * UnicodeData.txt uses ';' as a field separator, so no
    253  * field can contain ';' as part of its contents.
    254  * In unames.dat, it is marked as token[';']==-1 only if the
    255  * semicolon is used in the data file - which is iff we
    256  * have Unicode 1.0 names or ISO comments or aliases.
    257  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
    258  * although we know that it will never be part of a name.
    259  */
    260 static uint16_t
    261 expandName(UCharNames *names,
    262            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
    263            char *buffer, uint16_t bufferLength) {
    264     uint16_t *tokens=(uint16_t *)names+8;
    265     uint16_t token, tokenCount=*tokens++, bufferPos=0;
    266     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
    267     uint8_t c;
    268 
    269     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    270         /*
    271          * skip the modern name if it is not requested _and_
    272          * if the semicolon byte value is a character, not a token number
    273          */
    274         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    275             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
    276             do {
    277                 while(nameLength>0) {
    278                     --nameLength;
    279                     if(*name++==';') {
    280                         break;
    281                     }
    282                 }
    283             } while(--fieldIndex>0);
    284         } else {
    285             /*
    286              * the semicolon byte value is a token number, therefore
    287              * only modern names are stored in unames.dat and there is no
    288              * such requested alternate name here
    289              */
    290             nameLength=0;
    291         }
    292     }
    293 
    294     /* write each letter directly, and write a token word per token */
    295     while(nameLength>0) {
    296         --nameLength;
    297         c=*name++;
    298 
    299         if(c>=tokenCount) {
    300             if(c!=';') {
    301                 /* implicit letter */
    302                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    303             } else {
    304                 /* finished */
    305                 break;
    306             }
    307         } else {
    308             token=tokens[c];
    309             if(token==(uint16_t)(-2)) {
    310                 /* this is a lead byte for a double-byte token */
    311                 token=tokens[c<<8|*name++];
    312                 --nameLength;
    313             }
    314             if(token==(uint16_t)(-1)) {
    315                 if(c!=';') {
    316                     /* explicit letter */
    317                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    318                 } else {
    319                     /* stop, but skip the semicolon if we are seeking
    320                        extended names and there was no 2.0 name but there
    321                        is a 1.0 name. */
    322                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
    323                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    324                             continue;
    325                         }
    326                     }
    327                     /* finished */
    328                     break;
    329                 }
    330             } else {
    331                 /* write token word */
    332                 uint8_t *tokenString=tokenStrings+token;
    333                 while((c=*tokenString++)!=0) {
    334                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    335                 }
    336             }
    337         }
    338     }
    339 
    340     /* zero-terminate */
    341     if(bufferLength>0) {
    342         *buffer=0;
    343     }
    344 
    345     return bufferPos;
    346 }
    347 
    348 /*
    349  * compareName() is almost the same as expandName() except that it compares
    350  * the currently expanded name to an input name.
    351  * It returns the match/no match result as soon as possible.
    352  */
    353 static UBool
    354 compareName(UCharNames *names,
    355             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
    356             const char *otherName) {
    357     uint16_t *tokens=(uint16_t *)names+8;
    358     uint16_t token, tokenCount=*tokens++;
    359     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
    360     uint8_t c;
    361     const char *origOtherName = otherName;
    362 
    363     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    364         /*
    365          * skip the modern name if it is not requested _and_
    366          * if the semicolon byte value is a character, not a token number
    367          */
    368         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    369             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
    370             do {
    371                 while(nameLength>0) {
    372                     --nameLength;
    373                     if(*name++==';') {
    374                         break;
    375                     }
    376                 }
    377             } while(--fieldIndex>0);
    378         } else {
    379             /*
    380              * the semicolon byte value is a token number, therefore
    381              * only modern names are stored in unames.dat and there is no
    382              * such requested alternate name here
    383              */
    384             nameLength=0;
    385         }
    386     }
    387 
    388     /* compare each letter directly, and compare a token word per token */
    389     while(nameLength>0) {
    390         --nameLength;
    391         c=*name++;
    392 
    393         if(c>=tokenCount) {
    394             if(c!=';') {
    395                 /* implicit letter */
    396                 if((char)c!=*otherName++) {
    397                     return FALSE;
    398                 }
    399             } else {
    400                 /* finished */
    401                 break;
    402             }
    403         } else {
    404             token=tokens[c];
    405             if(token==(uint16_t)(-2)) {
    406                 /* this is a lead byte for a double-byte token */
    407                 token=tokens[c<<8|*name++];
    408                 --nameLength;
    409             }
    410             if(token==(uint16_t)(-1)) {
    411                 if(c!=';') {
    412                     /* explicit letter */
    413                     if((char)c!=*otherName++) {
    414                         return FALSE;
    415                     }
    416                 } else {
    417                     /* stop, but skip the semicolon if we are seeking
    418                        extended names and there was no 2.0 name but there
    419                        is a 1.0 name. */
    420                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
    421                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    422                             continue;
    423                         }
    424                     }
    425                     /* finished */
    426                     break;
    427                 }
    428             } else {
    429                 /* write token word */
    430                 uint8_t *tokenString=tokenStrings+token;
    431                 while((c=*tokenString++)!=0) {
    432                     if((char)c!=*otherName++) {
    433                         return FALSE;
    434                     }
    435                 }
    436             }
    437         }
    438     }
    439 
    440     /* complete match? */
    441     return (UBool)(*otherName==0);
    442 }
    443 
    444 static uint8_t getCharCat(UChar32 cp) {
    445     uint8_t cat;
    446 
    447     if (U_IS_UNICODE_NONCHAR(cp)) {
    448         return U_NONCHARACTER_CODE_POINT;
    449     }
    450 
    451     if ((cat = u_charType(cp)) == U_SURROGATE) {
    452         cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
    453     }
    454 
    455     return cat;
    456 }
    457 
    458 static const char *getCharCatName(UChar32 cp) {
    459     uint8_t cat = getCharCat(cp);
    460 
    461     /* Return unknown if the table of names above is not up to
    462        date. */
    463 
    464     if (cat >= LENGTHOF(charCatNames)) {
    465         return "unknown";
    466     } else {
    467         return charCatNames[cat];
    468     }
    469 }
    470 
    471 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
    472     const char *catname = getCharCatName(code);
    473     uint16_t length = 0;
    474 
    475     UChar32 cp;
    476     int ndigits, i;
    477 
    478     WRITE_CHAR(buffer, bufferLength, length, '<');
    479     while (catname[length - 1]) {
    480         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
    481     }
    482     WRITE_CHAR(buffer, bufferLength, length, '-');
    483     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
    484         ;
    485     if (ndigits < 4)
    486         ndigits = 4;
    487     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
    488         uint8_t v = (uint8_t)(cp & 0xf);
    489         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
    490     }
    491     buffer += ndigits;
    492     length += ndigits;
    493     WRITE_CHAR(buffer, bufferLength, length, '>');
    494 
    495     return length;
    496 }
    497 
    498 /*
    499  * getGroup() does a binary search for the group that contains the
    500  * Unicode code point "code".
    501  * The return value is always a valid Group* that may contain "code"
    502  * or else is the highest group before "code".
    503  * If the lowest group is after "code", then that one is returned.
    504  */
    505 static const uint16_t *
    506 getGroup(UCharNames *names, uint32_t code) {
    507     const uint16_t *groups=GET_GROUPS(names);
    508     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
    509              start=0,
    510              limit=*groups++,
    511              number;
    512 
    513     /* binary search for the group of names that contains the one for code */
    514     while(start<limit-1) {
    515         number=(uint16_t)((start+limit)/2);
    516         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
    517             limit=number;
    518         } else {
    519             start=number;
    520         }
    521     }
    522 
    523     /* return this regardless of whether it is an exact match */
    524     return groups+start*GROUP_LENGTH;
    525 }
    526 
    527 /*
    528  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
    529  * expands them into offsets and lengths for each string.
    530  * Lengths are stored with a variable-width encoding in consecutive nibbles:
    531  * If a nibble<0xc, then it is the length itself (0=empty string).
    532  * If a nibble>=0xc, then it forms a length value with the following nibble.
    533  * Calculation see below.
    534  * The offsets and lengths arrays must be at least 33 (one more) long because
    535  * there is no check here at the end if the last nibble is still used.
    536  */
    537 static const uint8_t *
    538 expandGroupLengths(const uint8_t *s,
    539                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
    540     /* read the lengths of the 32 strings in this group and get each string's offset */
    541     uint16_t i=0, offset=0, length=0;
    542     uint8_t lengthByte;
    543 
    544     /* all 32 lengths must be read to get the offset of the first group string */
    545     while(i<LINES_PER_GROUP) {
    546         lengthByte=*s++;
    547 
    548         /* read even nibble - MSBs of lengthByte */
    549         if(length>=12) {
    550             /* double-nibble length spread across two bytes */
    551             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
    552             lengthByte&=0xf;
    553         } else if((lengthByte /* &0xf0 */)>=0xc0) {
    554             /* double-nibble length spread across this one byte */
    555             length=(uint16_t)((lengthByte&0x3f)+12);
    556         } else {
    557             /* single-nibble length in MSBs */
    558             length=(uint16_t)(lengthByte>>4);
    559             lengthByte&=0xf;
    560         }
    561 
    562         *offsets++=offset;
    563         *lengths++=length;
    564 
    565         offset+=length;
    566         ++i;
    567 
    568         /* read odd nibble - LSBs of lengthByte */
    569         if((lengthByte&0xf0)==0) {
    570             /* this nibble was not consumed for a double-nibble length above */
    571             length=lengthByte;
    572             if(length<12) {
    573                 /* single-nibble length in LSBs */
    574                 *offsets++=offset;
    575                 *lengths++=length;
    576 
    577                 offset+=length;
    578                 ++i;
    579             }
    580         } else {
    581             length=0;   /* prevent double-nibble detection in the next iteration */
    582         }
    583     }
    584 
    585     /* now, s is at the first group string */
    586     return s;
    587 }
    588 
    589 static uint16_t
    590 expandGroupName(UCharNames *names, const uint16_t *group,
    591                 uint16_t lineNumber, UCharNameChoice nameChoice,
    592                 char *buffer, uint16_t bufferLength) {
    593     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
    594     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
    595     s=expandGroupLengths(s, offsets, lengths);
    596     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
    597                       buffer, bufferLength);
    598 }
    599 
    600 static uint16_t
    601 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
    602         char *buffer, uint16_t bufferLength) {
    603     const uint16_t *group=getGroup(names, code);
    604     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
    605         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
    606                                buffer, bufferLength);
    607     } else {
    608         /* group not found */
    609         /* zero-terminate */
    610         if(bufferLength>0) {
    611             *buffer=0;
    612         }
    613         return 0;
    614     }
    615 }
    616 
    617 /*
    618  * enumGroupNames() enumerates all the names in a 32-group
    619  * and either calls the enumerator function or finds a given input name.
    620  */
    621 static UBool
    622 enumGroupNames(UCharNames *names, const uint16_t *group,
    623                UChar32 start, UChar32 end,
    624                UEnumCharNamesFn *fn, void *context,
    625                UCharNameChoice nameChoice) {
    626     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
    627     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
    628 
    629     s=expandGroupLengths(s, offsets, lengths);
    630     if(fn!=DO_FIND_NAME) {
    631         char buffer[200];
    632         uint16_t length;
    633 
    634         while(start<=end) {
    635             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
    636             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
    637                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
    638             }
    639             /* here, we assume that the buffer is large enough */
    640             if(length>0) {
    641                 if(!fn(context, start, nameChoice, buffer, length)) {
    642                     return FALSE;
    643                 }
    644             }
    645             ++start;
    646         }
    647     } else {
    648         const char *otherName=((FindName *)context)->otherName;
    649         while(start<=end) {
    650             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
    651                 ((FindName *)context)->code=start;
    652                 return FALSE;
    653             }
    654             ++start;
    655         }
    656     }
    657     return TRUE;
    658 }
    659 
    660 /*
    661  * enumExtNames enumerate extended names.
    662  * It only needs to do it if it is called with a real function and not
    663  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
    664  * for extended names by itself.
    665  */
    666 static UBool
    667 enumExtNames(UChar32 start, UChar32 end,
    668              UEnumCharNamesFn *fn, void *context)
    669 {
    670     if(fn!=DO_FIND_NAME) {
    671         char buffer[200];
    672         uint16_t length;
    673 
    674         while(start<=end) {
    675             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
    676             /* here, we assume that the buffer is large enough */
    677             if(length>0) {
    678                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
    679                     return FALSE;
    680                 }
    681             }
    682             ++start;
    683         }
    684     }
    685 
    686     return TRUE;
    687 }
    688 
    689 static UBool
    690 enumNames(UCharNames *names,
    691           UChar32 start, UChar32 limit,
    692           UEnumCharNamesFn *fn, void *context,
    693           UCharNameChoice nameChoice) {
    694     uint16_t startGroupMSB, endGroupMSB, groupCount;
    695     const uint16_t *group, *groupLimit;
    696 
    697     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
    698     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
    699 
    700     /* find the group that contains start, or the highest before it */
    701     group=getGroup(names, start);
    702 
    703     if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
    704         /* enumerate synthetic names between start and the group start */
    705         UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
    706         if(extLimit>limit) {
    707             extLimit=limit;
    708         }
    709         if(!enumExtNames(start, extLimit-1, fn, context)) {
    710             return FALSE;
    711         }
    712         start=extLimit;
    713     }
    714 
    715     if(startGroupMSB==endGroupMSB) {
    716         if(startGroupMSB==group[GROUP_MSB]) {
    717             /* if start and limit-1 are in the same group, then enumerate only in that one */
    718             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
    719         }
    720     } else {
    721         const uint16_t *groups=GET_GROUPS(names);
    722         groupCount=*groups++;
    723         groupLimit=groups+groupCount*GROUP_LENGTH;
    724 
    725         if(startGroupMSB==group[GROUP_MSB]) {
    726             /* enumerate characters in the partial start group */
    727             if((start&GROUP_MASK)!=0) {
    728                 if(!enumGroupNames(names, group,
    729                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
    730                                    fn, context, nameChoice)) {
    731                     return FALSE;
    732                 }
    733                 group=NEXT_GROUP(group); /* continue with the next group */
    734             }
    735         } else if(startGroupMSB>group[GROUP_MSB]) {
    736             /* make sure that we start enumerating with the first group after start */
    737             const uint16_t *nextGroup=NEXT_GROUP(group);
    738             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
    739                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
    740                 if (end > limit) {
    741                     end = limit;
    742                 }
    743                 if (!enumExtNames(start, end - 1, fn, context)) {
    744                     return FALSE;
    745                 }
    746             }
    747             group=nextGroup;
    748         }
    749 
    750         /* enumerate entire groups between the start- and end-groups */
    751         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
    752             const uint16_t *nextGroup;
    753             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
    754             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
    755                 return FALSE;
    756             }
    757             nextGroup=NEXT_GROUP(group);
    758             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
    759                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
    760                 if (end > limit) {
    761                     end = limit;
    762                 }
    763                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
    764                     return FALSE;
    765                 }
    766             }
    767             group=nextGroup;
    768         }
    769 
    770         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
    771         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
    772             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
    773         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
    774             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
    775             if (next > start) {
    776                 start = next;
    777             }
    778         } else {
    779             return TRUE;
    780         }
    781     }
    782 
    783     /* we have not found a group, which means everything is made of
    784        extended names. */
    785     if (nameChoice == U_EXTENDED_CHAR_NAME) {
    786         if (limit > UCHAR_MAX_VALUE + 1) {
    787             limit = UCHAR_MAX_VALUE + 1;
    788         }
    789         return enumExtNames(start, limit - 1, fn, context);
    790     }
    791 
    792     return TRUE;
    793 }
    794 
    795 static uint16_t
    796 writeFactorSuffix(const uint16_t *factors, uint16_t count,
    797                   const char *s, /* suffix elements */
    798                   uint32_t code,
    799                   uint16_t indexes[8], /* output fields from here */
    800                   const char *elementBases[8], const char *elements[8],
    801                   char *buffer, uint16_t bufferLength) {
    802     uint16_t i, factor, bufferPos=0;
    803     char c;
    804 
    805     /* write elements according to the factors */
    806 
    807     /*
    808      * the factorized elements are determined by modulo arithmetic
    809      * with the factors of this algorithm
    810      *
    811      * note that for fewer operations, count is decremented here
    812      */
    813     --count;
    814     for(i=count; i>0; --i) {
    815         factor=factors[i];
    816         indexes[i]=(uint16_t)(code%factor);
    817         code/=factor;
    818     }
    819     /*
    820      * we don't need to calculate the last modulus because start<=code<=end
    821      * guarantees here that code<=factors[0]
    822      */
    823     indexes[0]=(uint16_t)code;
    824 
    825     /* write each element */
    826     for(;;) {
    827         if(elementBases!=NULL) {
    828             *elementBases++=s;
    829         }
    830 
    831         /* skip indexes[i] strings */
    832         factor=indexes[i];
    833         while(factor>0) {
    834             while(*s++!=0) {}
    835             --factor;
    836         }
    837         if(elements!=NULL) {
    838             *elements++=s;
    839         }
    840 
    841         /* write element */
    842         while((c=*s++)!=0) {
    843             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    844         }
    845 
    846         /* we do not need to perform the rest of this loop for i==count - break here */
    847         if(i>=count) {
    848             break;
    849         }
    850 
    851         /* skip the rest of the strings for this factors[i] */
    852         factor=(uint16_t)(factors[i]-indexes[i]-1);
    853         while(factor>0) {
    854             while(*s++!=0) {}
    855             --factor;
    856         }
    857 
    858         ++i;
    859     }
    860 
    861     /* zero-terminate */
    862     if(bufferLength>0) {
    863         *buffer=0;
    864     }
    865 
    866     return bufferPos;
    867 }
    868 
    869 /*
    870  * Important:
    871  * Parts of findAlgName() are almost the same as some of getAlgName().
    872  * Fixes must be applied to both.
    873  */
    874 static uint16_t
    875 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
    876         char *buffer, uint16_t bufferLength) {
    877     uint16_t bufferPos=0;
    878 
    879     /* Only the normative character name can be algorithmic. */
    880     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    881         /* zero-terminate */
    882         if(bufferLength>0) {
    883             *buffer=0;
    884         }
    885         return 0;
    886     }
    887 
    888     switch(range->type) {
    889     case 0: {
    890         /* name = prefix hex-digits */
    891         const char *s=(const char *)(range+1);
    892         char c;
    893 
    894         uint16_t i, count;
    895 
    896         /* copy prefix */
    897         while((c=*s++)!=0) {
    898             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    899         }
    900 
    901         /* write hexadecimal code point value */
    902         count=range->variant;
    903 
    904         /* zero-terminate */
    905         if(count<bufferLength) {
    906             buffer[count]=0;
    907         }
    908 
    909         for(i=count; i>0;) {
    910             if(--i<bufferLength) {
    911                 c=(char)(code&0xf);
    912                 if(c<10) {
    913                     c+='0';
    914                 } else {
    915                     c+='A'-10;
    916                 }
    917                 buffer[i]=c;
    918             }
    919             code>>=4;
    920         }
    921 
    922         bufferPos+=count;
    923         break;
    924     }
    925     case 1: {
    926         /* name = prefix factorized-elements */
    927         uint16_t indexes[8];
    928         const uint16_t *factors=(const uint16_t *)(range+1);
    929         uint16_t count=range->variant;
    930         const char *s=(const char *)(factors+count);
    931         char c;
    932 
    933         /* copy prefix */
    934         while((c=*s++)!=0) {
    935             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    936         }
    937 
    938         bufferPos+=writeFactorSuffix(factors, count,
    939                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
    940         break;
    941     }
    942     default:
    943         /* undefined type */
    944         /* zero-terminate */
    945         if(bufferLength>0) {
    946             *buffer=0;
    947         }
    948         break;
    949     }
    950 
    951     return bufferPos;
    952 }
    953 
    954 /*
    955  * Important: enumAlgNames() and findAlgName() are almost the same.
    956  * Any fix must be applied to both.
    957  */
    958 static UBool
    959 enumAlgNames(AlgorithmicRange *range,
    960              UChar32 start, UChar32 limit,
    961              UEnumCharNamesFn *fn, void *context,
    962              UCharNameChoice nameChoice) {
    963     char buffer[200];
    964     uint16_t length;
    965 
    966     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    967         return TRUE;
    968     }
    969 
    970     switch(range->type) {
    971     case 0: {
    972         char *s, *end;
    973         char c;
    974 
    975         /* get the full name of the start character */
    976         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
    977         if(length<=0) {
    978             return TRUE;
    979         }
    980 
    981         /* call the enumerator function with this first character */
    982         if(!fn(context, start, nameChoice, buffer, length)) {
    983             return FALSE;
    984         }
    985 
    986         /* go to the end of the name; all these names have the same length */
    987         end=buffer;
    988         while(*end!=0) {
    989             ++end;
    990         }
    991 
    992         /* enumerate the rest of the names */
    993         while(++start<limit) {
    994             /* increment the hexadecimal number on a character-basis */
    995             s=end;
    996             for (;;) {
    997                 c=*--s;
    998                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
    999                     *s=(char)(c+1);
   1000                     break;
   1001                 } else if(c=='9') {
   1002                     *s='A';
   1003                     break;
   1004                 } else if(c=='F') {
   1005                     *s='0';
   1006                 }
   1007             }
   1008 
   1009             if(!fn(context, start, nameChoice, buffer, length)) {
   1010                 return FALSE;
   1011             }
   1012         }
   1013         break;
   1014     }
   1015     case 1: {
   1016         uint16_t indexes[8];
   1017         const char *elementBases[8], *elements[8];
   1018         const uint16_t *factors=(const uint16_t *)(range+1);
   1019         uint16_t count=range->variant;
   1020         const char *s=(const char *)(factors+count);
   1021         char *suffix, *t;
   1022         uint16_t prefixLength, i, idx;
   1023 
   1024         char c;
   1025 
   1026         /* name = prefix factorized-elements */
   1027 
   1028         /* copy prefix */
   1029         suffix=buffer;
   1030         prefixLength=0;
   1031         while((c=*s++)!=0) {
   1032             *suffix++=c;
   1033             ++prefixLength;
   1034         }
   1035 
   1036         /* append the suffix of the start character */
   1037         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
   1038                                               s, (uint32_t)start-range->start,
   1039                                               indexes, elementBases, elements,
   1040                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
   1041 
   1042         /* call the enumerator function with this first character */
   1043         if(!fn(context, start, nameChoice, buffer, length)) {
   1044             return FALSE;
   1045         }
   1046 
   1047         /* enumerate the rest of the names */
   1048         while(++start<limit) {
   1049             /* increment the indexes in lexical order bound by the factors */
   1050             i=count;
   1051             for (;;) {
   1052                 idx=(uint16_t)(indexes[--i]+1);
   1053                 if(idx<factors[i]) {
   1054                     /* skip one index and its element string */
   1055                     indexes[i]=idx;
   1056                     s=elements[i];
   1057                     while(*s++!=0) {
   1058                     }
   1059                     elements[i]=s;
   1060                     break;
   1061                 } else {
   1062                     /* reset this index to 0 and its element string to the first one */
   1063                     indexes[i]=0;
   1064                     elements[i]=elementBases[i];
   1065                 }
   1066             }
   1067 
   1068             /* to make matters a little easier, just append all elements to the suffix */
   1069             t=suffix;
   1070             length=prefixLength;
   1071             for(i=0; i<count; ++i) {
   1072                 s=elements[i];
   1073                 while((c=*s++)!=0) {
   1074                     *t++=c;
   1075                     ++length;
   1076                 }
   1077             }
   1078             /* zero-terminate */
   1079             *t=0;
   1080 
   1081             if(!fn(context, start, nameChoice, buffer, length)) {
   1082                 return FALSE;
   1083             }
   1084         }
   1085         break;
   1086     }
   1087     default:
   1088         /* undefined type */
   1089         break;
   1090     }
   1091 
   1092     return TRUE;
   1093 }
   1094 
   1095 /*
   1096  * findAlgName() is almost the same as enumAlgNames() except that it
   1097  * returns the code point for a name if it fits into the range.
   1098  * It returns 0xffff otherwise.
   1099  */
   1100 static UChar32
   1101 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
   1102     UChar32 code;
   1103 
   1104     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
   1105         return 0xffff;
   1106     }
   1107 
   1108     switch(range->type) {
   1109     case 0: {
   1110         /* name = prefix hex-digits */
   1111         const char *s=(const char *)(range+1);
   1112         char c;
   1113 
   1114         uint16_t i, count;
   1115 
   1116         /* compare prefix */
   1117         while((c=*s++)!=0) {
   1118             if((char)c!=*otherName++) {
   1119                 return 0xffff;
   1120             }
   1121         }
   1122 
   1123         /* read hexadecimal code point value */
   1124         count=range->variant;
   1125         code=0;
   1126         for(i=0; i<count; ++i) {
   1127             c=*otherName++;
   1128             if('0'<=c && c<='9') {
   1129                 code=(code<<4)|(c-'0');
   1130             } else if('A'<=c && c<='F') {
   1131                 code=(code<<4)|(c-'A'+10);
   1132             } else {
   1133                 return 0xffff;
   1134             }
   1135         }
   1136 
   1137         /* does it fit into the range? */
   1138         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
   1139             return code;
   1140         }
   1141         break;
   1142     }
   1143     case 1: {
   1144         char buffer[64];
   1145         uint16_t indexes[8];
   1146         const char *elementBases[8], *elements[8];
   1147         const uint16_t *factors=(const uint16_t *)(range+1);
   1148         uint16_t count=range->variant;
   1149         const char *s=(const char *)(factors+count), *t;
   1150         UChar32 start, limit;
   1151         uint16_t i, idx;
   1152 
   1153         char c;
   1154 
   1155         /* name = prefix factorized-elements */
   1156 
   1157         /* compare prefix */
   1158         while((c=*s++)!=0) {
   1159             if((char)c!=*otherName++) {
   1160                 return 0xffff;
   1161             }
   1162         }
   1163 
   1164         start=(UChar32)range->start;
   1165         limit=(UChar32)(range->end+1);
   1166 
   1167         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
   1168         writeFactorSuffix(factors, count, s, 0,
   1169                           indexes, elementBases, elements, buffer, sizeof(buffer));
   1170 
   1171         /* compare the first suffix */
   1172         if(0==uprv_strcmp(otherName, buffer)) {
   1173             return start;
   1174         }
   1175 
   1176         /* enumerate and compare the rest of the suffixes */
   1177         while(++start<limit) {
   1178             /* increment the indexes in lexical order bound by the factors */
   1179             i=count;
   1180             for (;;) {
   1181                 idx=(uint16_t)(indexes[--i]+1);
   1182                 if(idx<factors[i]) {
   1183                     /* skip one index and its element string */
   1184                     indexes[i]=idx;
   1185                     s=elements[i];
   1186                     while(*s++!=0) {}
   1187                     elements[i]=s;
   1188                     break;
   1189                 } else {
   1190                     /* reset this index to 0 and its element string to the first one */
   1191                     indexes[i]=0;
   1192                     elements[i]=elementBases[i];
   1193                 }
   1194             }
   1195 
   1196             /* to make matters a little easier, just compare all elements of the suffix */
   1197             t=otherName;
   1198             for(i=0; i<count; ++i) {
   1199                 s=elements[i];
   1200                 while((c=*s++)!=0) {
   1201                     if(c!=*t++) {
   1202                         s=""; /* does not match */
   1203                         i=99;
   1204                     }
   1205                 }
   1206             }
   1207             if(i<99 && *t==0) {
   1208                 return start;
   1209             }
   1210         }
   1211         break;
   1212     }
   1213     default:
   1214         /* undefined type */
   1215         break;
   1216     }
   1217 
   1218     return 0xffff;
   1219 }
   1220 
   1221 /* sets of name characters, maximum name lengths ---------------------------- */
   1222 
   1223 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
   1224 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
   1225 
   1226 static int32_t
   1227 calcStringSetLength(uint32_t set[8], const char *s) {
   1228     int32_t length=0;
   1229     char c;
   1230 
   1231     while((c=*s++)!=0) {
   1232         SET_ADD(set, c);
   1233         ++length;
   1234     }
   1235     return length;
   1236 }
   1237 
   1238 static int32_t
   1239 calcAlgNameSetsLengths(int32_t maxNameLength) {
   1240     AlgorithmicRange *range;
   1241     uint32_t *p;
   1242     uint32_t rangeCount;
   1243     int32_t length;
   1244 
   1245     /* enumerate algorithmic ranges */
   1246     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1247     rangeCount=*p;
   1248     range=(AlgorithmicRange *)(p+1);
   1249     while(rangeCount>0) {
   1250         switch(range->type) {
   1251         case 0:
   1252             /* name = prefix + (range->variant times) hex-digits */
   1253             /* prefix */
   1254             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
   1255             if(length>maxNameLength) {
   1256                 maxNameLength=length;
   1257             }
   1258             break;
   1259         case 1: {
   1260             /* name = prefix factorized-elements */
   1261             const uint16_t *factors=(const uint16_t *)(range+1);
   1262             const char *s;
   1263             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
   1264 
   1265             /* prefix length */
   1266             s=(const char *)(factors+count);
   1267             length=calcStringSetLength(gNameSet, s);
   1268             s+=length+1; /* start of factor suffixes */
   1269 
   1270             /* get the set and maximum factor suffix length for each factor */
   1271             for(i=0; i<count; ++i) {
   1272                 maxFactorLength=0;
   1273                 for(factor=factors[i]; factor>0; --factor) {
   1274                     factorLength=calcStringSetLength(gNameSet, s);
   1275                     s+=factorLength+1;
   1276                     if(factorLength>maxFactorLength) {
   1277                         maxFactorLength=factorLength;
   1278                     }
   1279                 }
   1280                 length+=maxFactorLength;
   1281             }
   1282 
   1283             if(length>maxNameLength) {
   1284                 maxNameLength=length;
   1285             }
   1286             break;
   1287         }
   1288         default:
   1289             /* unknown type */
   1290             break;
   1291         }
   1292 
   1293         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
   1294         --rangeCount;
   1295     }
   1296     return maxNameLength;
   1297 }
   1298 
   1299 static int32_t
   1300 calcExtNameSetsLengths(int32_t maxNameLength) {
   1301     int32_t i, length;
   1302 
   1303     for(i=0; i<LENGTHOF(charCatNames); ++i) {
   1304         /*
   1305          * for each category, count the length of the category name
   1306          * plus 9=
   1307          * 2 for <>
   1308          * 1 for -
   1309          * 6 for most hex digits per code point
   1310          */
   1311         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
   1312         if(length>maxNameLength) {
   1313             maxNameLength=length;
   1314         }
   1315     }
   1316     return maxNameLength;
   1317 }
   1318 
   1319 static int32_t
   1320 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
   1321                   uint32_t set[8],
   1322                   const uint8_t **pLine, const uint8_t *lineLimit) {
   1323     const uint8_t *line=*pLine;
   1324     int32_t length=0, tokenLength;
   1325     uint16_t c, token;
   1326 
   1327     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
   1328         if(c>=tokenCount) {
   1329             /* implicit letter */
   1330             SET_ADD(set, c);
   1331             ++length;
   1332         } else {
   1333             token=tokens[c];
   1334             if(token==(uint16_t)(-2)) {
   1335                 /* this is a lead byte for a double-byte token */
   1336                 c=c<<8|*line++;
   1337                 token=tokens[c];
   1338             }
   1339             if(token==(uint16_t)(-1)) {
   1340                 /* explicit letter */
   1341                 SET_ADD(set, c);
   1342                 ++length;
   1343             } else {
   1344                 /* count token word */
   1345                 if(tokenLengths!=NULL) {
   1346                     /* use cached token length */
   1347                     tokenLength=tokenLengths[c];
   1348                     if(tokenLength==0) {
   1349                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
   1350                         tokenLengths[c]=(int8_t)tokenLength;
   1351                     }
   1352                 } else {
   1353                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
   1354                 }
   1355                 length+=tokenLength;
   1356             }
   1357         }
   1358     }
   1359 
   1360     *pLine=line;
   1361     return length;
   1362 }
   1363 
   1364 static void
   1365 calcGroupNameSetsLengths(int32_t maxNameLength) {
   1366     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
   1367 
   1368     uint16_t *tokens=(uint16_t *)uCharNames+8;
   1369     uint16_t tokenCount=*tokens++;
   1370     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
   1371 
   1372     int8_t *tokenLengths;
   1373 
   1374     const uint16_t *group;
   1375     const uint8_t *s, *line, *lineLimit;
   1376 
   1377     int32_t groupCount, lineNumber, length;
   1378 
   1379     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
   1380     if(tokenLengths!=NULL) {
   1381         uprv_memset(tokenLengths, 0, tokenCount);
   1382     }
   1383 
   1384     group=GET_GROUPS(uCharNames);
   1385     groupCount=*group++;
   1386 
   1387     /* enumerate all groups */
   1388     while(groupCount>0) {
   1389         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
   1390         s=expandGroupLengths(s, offsets, lengths);
   1391 
   1392         /* enumerate all lines in each group */
   1393         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
   1394             line=s+offsets[lineNumber];
   1395             length=lengths[lineNumber];
   1396             if(length==0) {
   1397                 continue;
   1398             }
   1399 
   1400             lineLimit=line+length;
   1401 
   1402             /* read regular name */
   1403             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
   1404             if(length>maxNameLength) {
   1405                 maxNameLength=length;
   1406             }
   1407             if(line==lineLimit) {
   1408                 continue;
   1409             }
   1410 
   1411             /* read Unicode 1.0 name */
   1412             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
   1413             if(length>maxNameLength) {
   1414                 maxNameLength=length;
   1415             }
   1416             if(line==lineLimit) {
   1417                 continue;
   1418             }
   1419 
   1420             /* read ISO comment */
   1421             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
   1422         }
   1423 
   1424         group=NEXT_GROUP(group);
   1425         --groupCount;
   1426     }
   1427 
   1428     if(tokenLengths!=NULL) {
   1429         uprv_free(tokenLengths);
   1430     }
   1431 
   1432     /* set gMax... - name length last for threading */
   1433     gMaxNameLength=maxNameLength;
   1434 }
   1435 
   1436 static UBool
   1437 calcNameSetsLengths(UErrorCode *pErrorCode) {
   1438     static const char extChars[]="0123456789ABCDEF<>-";
   1439     int32_t i, maxNameLength;
   1440 
   1441     if(gMaxNameLength!=0) {
   1442         return TRUE;
   1443     }
   1444 
   1445     if(!isDataLoaded(pErrorCode)) {
   1446         return FALSE;
   1447     }
   1448 
   1449     /* set hex digits, used in various names, and <>-, used in extended names */
   1450     for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
   1451         SET_ADD(gNameSet, extChars[i]);
   1452     }
   1453 
   1454     /* set sets and lengths from algorithmic names */
   1455     maxNameLength=calcAlgNameSetsLengths(0);
   1456 
   1457     /* set sets and lengths from extended names */
   1458     maxNameLength=calcExtNameSetsLengths(maxNameLength);
   1459 
   1460     /* set sets and lengths from group names, set global maximum values */
   1461     calcGroupNameSetsLengths(maxNameLength);
   1462 
   1463     return TRUE;
   1464 }
   1465 
   1466 /* public API --------------------------------------------------------------- */
   1467 
   1468 U_CAPI int32_t U_EXPORT2
   1469 u_charName(UChar32 code, UCharNameChoice nameChoice,
   1470            char *buffer, int32_t bufferLength,
   1471            UErrorCode *pErrorCode) {
   1472     AlgorithmicRange *algRange;
   1473     uint32_t *p;
   1474     uint32_t i;
   1475     int32_t length;
   1476 
   1477     /* check the argument values */
   1478     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1479         return 0;
   1480     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
   1481               bufferLength<0 || (bufferLength>0 && buffer==NULL)
   1482     ) {
   1483         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1484         return 0;
   1485     }
   1486 
   1487     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
   1488         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
   1489     }
   1490 
   1491     length=0;
   1492 
   1493     /* try algorithmic names first */
   1494     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1495     i=*p;
   1496     algRange=(AlgorithmicRange *)(p+1);
   1497     while(i>0) {
   1498         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
   1499             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
   1500             break;
   1501         }
   1502         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1503         --i;
   1504     }
   1505 
   1506     if(i==0) {
   1507         if (nameChoice == U_EXTENDED_CHAR_NAME) {
   1508             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
   1509             if (!length) {
   1510                 /* extended character name */
   1511                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
   1512             }
   1513         } else {
   1514             /* normal character name */
   1515             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
   1516         }
   1517     }
   1518 
   1519     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
   1520 }
   1521 
   1522 U_CAPI int32_t U_EXPORT2
   1523 u_getISOComment(UChar32 /*c*/,
   1524                 char *dest, int32_t destCapacity,
   1525                 UErrorCode *pErrorCode) {
   1526     /* check the argument values */
   1527     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1528         return 0;
   1529     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
   1530         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1531         return 0;
   1532     }
   1533 
   1534     return u_terminateChars(dest, destCapacity, 0, pErrorCode);
   1535 }
   1536 
   1537 U_CAPI UChar32 U_EXPORT2
   1538 u_charFromName(UCharNameChoice nameChoice,
   1539                const char *name,
   1540                UErrorCode *pErrorCode) {
   1541     char upper[120], lower[120];
   1542     FindName findName;
   1543     AlgorithmicRange *algRange;
   1544     uint32_t *p;
   1545     uint32_t i;
   1546     UChar32 cp = 0;
   1547     char c0;
   1548     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
   1549 
   1550     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1551         return error;
   1552     }
   1553 
   1554     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
   1555         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1556         return error;
   1557     }
   1558 
   1559     if(!isDataLoaded(pErrorCode)) {
   1560         return error;
   1561     }
   1562 
   1563     /* construct the uppercase and lowercase of the name first */
   1564     for(i=0; i<sizeof(upper); ++i) {
   1565         if((c0=*name++)!=0) {
   1566             upper[i]=uprv_toupper(c0);
   1567             lower[i]=uprv_tolower(c0);
   1568         } else {
   1569             upper[i]=lower[i]=0;
   1570             break;
   1571         }
   1572     }
   1573     if(i==sizeof(upper)) {
   1574         /* name too long, there is no such character */
   1575         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1576         return error;
   1577     }
   1578 
   1579     /* try extended names first */
   1580     if (lower[0] == '<') {
   1581         if (nameChoice == U_EXTENDED_CHAR_NAME) {
   1582             if (lower[--i] == '>') {
   1583                 for (--i; lower[i] && lower[i] != '-'; --i) {
   1584                 }
   1585 
   1586                 if (lower[i] == '-') { /* We've got a category. */
   1587                     uint32_t cIdx;
   1588 
   1589                     lower[i] = 0;
   1590 
   1591                     for (++i; lower[i] != '>'; ++i) {
   1592                         if (lower[i] >= '0' && lower[i] <= '9') {
   1593                             cp = (cp << 4) + lower[i] - '0';
   1594                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
   1595                             cp = (cp << 4) + lower[i] - 'a' + 10;
   1596                         } else {
   1597                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1598                             return error;
   1599                         }
   1600                     }
   1601 
   1602                     /* Now validate the category name.
   1603                        We could use a binary search, or a trie, if
   1604                        we really wanted to. */
   1605 
   1606                     for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
   1607 
   1608                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
   1609                             if (getCharCat(cp) == cIdx) {
   1610                                 return cp;
   1611                             }
   1612                             break;
   1613                         }
   1614                     }
   1615                 }
   1616             }
   1617         }
   1618 
   1619         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1620         return error;
   1621     }
   1622 
   1623     /* try algorithmic names now */
   1624     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1625     i=*p;
   1626     algRange=(AlgorithmicRange *)(p+1);
   1627     while(i>0) {
   1628         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
   1629             return cp;
   1630         }
   1631         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1632         --i;
   1633     }
   1634 
   1635     /* normal character name */
   1636     findName.otherName=upper;
   1637     findName.code=error;
   1638     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
   1639     if (findName.code == error) {
   1640          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1641     }
   1642     return findName.code;
   1643 }
   1644 
   1645 U_CAPI void U_EXPORT2
   1646 u_enumCharNames(UChar32 start, UChar32 limit,
   1647                 UEnumCharNamesFn *fn,
   1648                 void *context,
   1649                 UCharNameChoice nameChoice,
   1650                 UErrorCode *pErrorCode) {
   1651     AlgorithmicRange *algRange;
   1652     uint32_t *p;
   1653     uint32_t i;
   1654 
   1655     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1656         return;
   1657     }
   1658 
   1659     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
   1660         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1661         return;
   1662     }
   1663 
   1664     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
   1665         limit = UCHAR_MAX_VALUE + 1;
   1666     }
   1667     if((uint32_t)start>=(uint32_t)limit) {
   1668         return;
   1669     }
   1670 
   1671     if(!isDataLoaded(pErrorCode)) {
   1672         return;
   1673     }
   1674 
   1675     /* interleave the data-driven ones with the algorithmic ones */
   1676     /* iterate over all algorithmic ranges; assume that they are in ascending order */
   1677     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1678     i=*p;
   1679     algRange=(AlgorithmicRange *)(p+1);
   1680     while(i>0) {
   1681         /* enumerate the character names before the current algorithmic range */
   1682         /* here: start<limit */
   1683         if((uint32_t)start<algRange->start) {
   1684             if((uint32_t)limit<=algRange->start) {
   1685                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
   1686                 return;
   1687             }
   1688             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
   1689                 return;
   1690             }
   1691             start=(UChar32)algRange->start;
   1692         }
   1693         /* enumerate the character names in the current algorithmic range */
   1694         /* here: algRange->start<=start<limit */
   1695         if((uint32_t)start<=algRange->end) {
   1696             if((uint32_t)limit<=(algRange->end+1)) {
   1697                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
   1698                 return;
   1699             }
   1700             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
   1701                 return;
   1702             }
   1703             start=(UChar32)algRange->end+1;
   1704         }
   1705         /* continue to the next algorithmic range (here: start<limit) */
   1706         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1707         --i;
   1708     }
   1709     /* enumerate the character names after the last algorithmic range */
   1710     enumNames(uCharNames, start, limit, fn, context, nameChoice);
   1711 }
   1712 
   1713 U_CAPI int32_t U_EXPORT2
   1714 uprv_getMaxCharNameLength() {
   1715     UErrorCode errorCode=U_ZERO_ERROR;
   1716     if(calcNameSetsLengths(&errorCode)) {
   1717         return gMaxNameLength;
   1718     } else {
   1719         return 0;
   1720     }
   1721 }
   1722 
   1723 /**
   1724  * Converts the char set cset into a Unicode set uset.
   1725  * @param cset Set of 256 bit flags corresponding to a set of chars.
   1726  * @param uset USet to receive characters. Existing contents are deleted.
   1727  */
   1728 static void
   1729 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
   1730     UChar us[256];
   1731     char cs[256];
   1732 
   1733     int32_t i, length;
   1734     UErrorCode errorCode;
   1735 
   1736     errorCode=U_ZERO_ERROR;
   1737 
   1738     if(!calcNameSetsLengths(&errorCode)) {
   1739         return;
   1740     }
   1741 
   1742     /* build a char string with all chars that are used in character names */
   1743     length=0;
   1744     for(i=0; i<256; ++i) {
   1745         if(SET_CONTAINS(cset, i)) {
   1746             cs[length++]=(char)i;
   1747         }
   1748     }
   1749 
   1750     /* convert the char string to a UChar string */
   1751     u_charsToUChars(cs, us, length);
   1752 
   1753     /* add each UChar to the USet */
   1754     for(i=0; i<length; ++i) {
   1755         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
   1756             sa->add(sa->set, us[i]);
   1757         }
   1758     }
   1759 }
   1760 
   1761 /**
   1762  * Fills set with characters that are used in Unicode character names.
   1763  * @param set USet to receive characters.
   1764  */
   1765 U_CAPI void U_EXPORT2
   1766 uprv_getCharNameCharacters(const USetAdder *sa) {
   1767     charSetToUSet(gNameSet, sa);
   1768 }
   1769 
   1770 /* data swapping ------------------------------------------------------------ */
   1771 
   1772 /*
   1773  * The token table contains non-negative entries for token bytes,
   1774  * and -1 for bytes that represent themselves in the data file's charset.
   1775  * -2 entries are used for lead bytes.
   1776  *
   1777  * Direct bytes (-1 entries) must be translated from the input charset family
   1778  * to the output charset family.
   1779  * makeTokenMap() writes a permutation mapping for this.
   1780  * Use it once for single-/lead-byte tokens and once more for all trail byte
   1781  * tokens. (';' is an unused trail byte marked with -1.)
   1782  */
   1783 static void
   1784 makeTokenMap(const UDataSwapper *ds,
   1785              int16_t tokens[], uint16_t tokenCount,
   1786              uint8_t map[256],
   1787              UErrorCode *pErrorCode) {
   1788     UBool usedOutChar[256];
   1789     uint16_t i, j;
   1790     uint8_t c1, c2;
   1791 
   1792     if(U_FAILURE(*pErrorCode)) {
   1793         return;
   1794     }
   1795 
   1796     if(ds->inCharset==ds->outCharset) {
   1797         /* Same charset family: identity permutation */
   1798         for(i=0; i<256; ++i) {
   1799             map[i]=(uint8_t)i;
   1800         }
   1801     } else {
   1802         uprv_memset(map, 0, 256);
   1803         uprv_memset(usedOutChar, 0, 256);
   1804 
   1805         if(tokenCount>256) {
   1806             tokenCount=256;
   1807         }
   1808 
   1809         /* set the direct bytes (byte 0 always maps to itself) */
   1810         for(i=1; i<tokenCount; ++i) {
   1811             if(tokens[i]==-1) {
   1812                 /* convert the direct byte character */
   1813                 c1=(uint8_t)i;
   1814                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
   1815                 if(U_FAILURE(*pErrorCode)) {
   1816                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
   1817                                      i, ds->inCharset);
   1818                     return;
   1819                 }
   1820 
   1821                 /* enter the converted character into the map and mark it used */
   1822                 map[c1]=c2;
   1823                 usedOutChar[c2]=TRUE;
   1824             }
   1825         }
   1826 
   1827         /* set the mappings for the rest of the permutation */
   1828         for(i=j=1; i<tokenCount; ++i) {
   1829             /* set mappings that were not set for direct bytes */
   1830             if(map[i]==0) {
   1831                 /* set an output byte value that was not used as an output byte above */
   1832                 while(usedOutChar[j]) {
   1833                     ++j;
   1834                 }
   1835                 map[i]=(uint8_t)j++;
   1836             }
   1837         }
   1838 
   1839         /*
   1840          * leave mappings at tokenCount and above unset if tokenCount<256
   1841          * because they won't be used
   1842          */
   1843     }
   1844 }
   1845 
   1846 U_CAPI int32_t U_EXPORT2
   1847 uchar_swapNames(const UDataSwapper *ds,
   1848                 const void *inData, int32_t length, void *outData,
   1849                 UErrorCode *pErrorCode) {
   1850     const UDataInfo *pInfo;
   1851     int32_t headerSize;
   1852 
   1853     const uint8_t *inBytes;
   1854     uint8_t *outBytes;
   1855 
   1856     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
   1857              offset, i, count, stringsCount;
   1858 
   1859     const AlgorithmicRange *inRange;
   1860     AlgorithmicRange *outRange;
   1861 
   1862     /* udata_swapDataHeader checks the arguments */
   1863     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1864     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1865         return 0;
   1866     }
   1867 
   1868     /* check data format and format version */
   1869     pInfo=(const UDataInfo *)((const char *)inData+4);
   1870     if(!(
   1871         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
   1872         pInfo->dataFormat[1]==0x6e &&
   1873         pInfo->dataFormat[2]==0x61 &&
   1874         pInfo->dataFormat[3]==0x6d &&
   1875         pInfo->formatVersion[0]==1
   1876     )) {
   1877         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
   1878                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   1879                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   1880                          pInfo->formatVersion[0]);
   1881         *pErrorCode=U_UNSUPPORTED_ERROR;
   1882         return 0;
   1883     }
   1884 
   1885     inBytes=(const uint8_t *)inData+headerSize;
   1886     outBytes=(uint8_t *)outData+headerSize;
   1887     if(length<0) {
   1888         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
   1889     } else {
   1890         length-=headerSize;
   1891         if( length<20 ||
   1892             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
   1893         ) {
   1894             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
   1895                              length);
   1896             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1897             return 0;
   1898         }
   1899     }
   1900 
   1901     if(length<0) {
   1902         /* preflighting: iterate through algorithmic ranges */
   1903         offset=algNamesOffset;
   1904         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
   1905         offset+=4;
   1906 
   1907         for(i=0; i<count; ++i) {
   1908             inRange=(const AlgorithmicRange *)(inBytes+offset);
   1909             offset+=ds->readUInt16(inRange->size);
   1910         }
   1911     } else {
   1912         /* swap data */
   1913         const uint16_t *p;
   1914         uint16_t *q, *temp;
   1915 
   1916         int16_t tokens[512];
   1917         uint16_t tokenCount;
   1918 
   1919         uint8_t map[256], trailMap[256];
   1920 
   1921         /* copy the data for inaccessible bytes */
   1922         if(inBytes!=outBytes) {
   1923             uprv_memcpy(outBytes, inBytes, length);
   1924         }
   1925 
   1926         /* the initial 4 offsets first */
   1927         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
   1928         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
   1929         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
   1930         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
   1931 
   1932         /*
   1933          * now the tokens table
   1934          * it needs to be permutated along with the compressed name strings
   1935          */
   1936         p=(const uint16_t *)(inBytes+16);
   1937         q=(uint16_t *)(outBytes+16);
   1938 
   1939         /* read and swap the tokenCount */
   1940         tokenCount=ds->readUInt16(*p);
   1941         ds->swapArray16(ds, p, 2, q, pErrorCode);
   1942         ++p;
   1943         ++q;
   1944 
   1945         /* read the first 512 tokens and make the token maps */
   1946         if(tokenCount<=512) {
   1947             count=tokenCount;
   1948         } else {
   1949             count=512;
   1950         }
   1951         for(i=0; i<count; ++i) {
   1952             tokens[i]=udata_readInt16(ds, p[i]);
   1953         }
   1954         for(; i<512; ++i) {
   1955             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
   1956         }
   1957         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
   1958         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
   1959         if(U_FAILURE(*pErrorCode)) {
   1960             return 0;
   1961         }
   1962 
   1963         /*
   1964          * swap and permutate the tokens
   1965          * go through a temporary array to support in-place swapping
   1966          */
   1967         temp=(uint16_t *)uprv_malloc(tokenCount*2);
   1968         if(temp==NULL) {
   1969             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
   1970                              tokenCount);
   1971             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1972             return 0;
   1973         }
   1974 
   1975         /* swap and permutate single-/lead-byte tokens */
   1976         for(i=0; i<tokenCount && i<256; ++i) {
   1977             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
   1978         }
   1979 
   1980         /* swap and permutate trail-byte tokens */
   1981         for(; i<tokenCount; ++i) {
   1982             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
   1983         }
   1984 
   1985         /* copy the result into the output and free the temporary array */
   1986         uprv_memcpy(q, temp, tokenCount*2);
   1987         uprv_free(temp);
   1988 
   1989         /*
   1990          * swap the token strings but not a possible padding byte after
   1991          * the terminating NUL of the last string
   1992          */
   1993         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
   1994                                     outBytes+tokenStringOffset, pErrorCode);
   1995         if(U_FAILURE(*pErrorCode)) {
   1996             udata_printError(ds, "uchar_swapNames(token strings) failed\n");
   1997             return 0;
   1998         }
   1999 
   2000         /* swap the group table */
   2001         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
   2002         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
   2003                            outBytes+groupsOffset, pErrorCode);
   2004 
   2005         /*
   2006          * swap the group strings
   2007          * swap the string bytes but not the nibble-encoded string lengths
   2008          */
   2009         if(ds->inCharset!=ds->outCharset) {
   2010             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
   2011 
   2012             const uint8_t *inStrings, *nextInStrings;
   2013             uint8_t *outStrings;
   2014 
   2015             uint8_t c;
   2016 
   2017             inStrings=inBytes+groupStringOffset;
   2018             outStrings=outBytes+groupStringOffset;
   2019 
   2020             stringsCount=algNamesOffset-groupStringOffset;
   2021 
   2022             /* iterate through string groups until only a few padding bytes are left */
   2023             while(stringsCount>32) {
   2024                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
   2025 
   2026                 /* move past the length bytes */
   2027                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
   2028                 outStrings+=nextInStrings-inStrings;
   2029                 inStrings=nextInStrings;
   2030 
   2031                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
   2032                 stringsCount-=count;
   2033 
   2034                 /* swap the string bytes using map[] and trailMap[] */
   2035                 while(count>0) {
   2036                     c=*inStrings++;
   2037                     *outStrings++=map[c];
   2038                     if(tokens[c]!=-2) {
   2039                         --count;
   2040                     } else {
   2041                         /* token lead byte: swap the trail byte, too */
   2042                         *outStrings++=trailMap[*inStrings++];
   2043                         count-=2;
   2044                     }
   2045                 }
   2046             }
   2047         }
   2048 
   2049         /* swap the algorithmic ranges */
   2050         offset=algNamesOffset;
   2051         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
   2052         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
   2053         offset+=4;
   2054 
   2055         for(i=0; i<count; ++i) {
   2056             if(offset>(uint32_t)length) {
   2057                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
   2058                                  length, i);
   2059                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   2060                 return 0;
   2061             }
   2062 
   2063             inRange=(const AlgorithmicRange *)(inBytes+offset);
   2064             outRange=(AlgorithmicRange *)(outBytes+offset);
   2065             offset+=ds->readUInt16(inRange->size);
   2066 
   2067             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
   2068             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
   2069             switch(inRange->type) {
   2070             case 0:
   2071                 /* swap prefix string */
   2072                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
   2073                                     outRange+1, pErrorCode);
   2074                 if(U_FAILURE(*pErrorCode)) {
   2075                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
   2076                                      i);
   2077                     return 0;
   2078                 }
   2079                 break;
   2080             case 1:
   2081                 {
   2082                     /* swap factors and the prefix and factor strings */
   2083                     uint32_t factorsCount;
   2084 
   2085                     factorsCount=inRange->variant;
   2086                     p=(const uint16_t *)(inRange+1);
   2087                     q=(uint16_t *)(outRange+1);
   2088                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
   2089 
   2090                     /* swap the strings, up to the last terminating NUL */
   2091                     p+=factorsCount;
   2092                     q+=factorsCount;
   2093                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
   2094                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
   2095                         --stringsCount;
   2096                     }
   2097                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
   2098                 }
   2099                 break;
   2100             default:
   2101                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
   2102                                  inRange->type, i);
   2103                 *pErrorCode=U_UNSUPPORTED_ERROR;
   2104                 return 0;
   2105             }
   2106         }
   2107     }
   2108 
   2109     return headerSize+(int32_t)offset;
   2110 }
   2111 
   2112 /*
   2113  * Hey, Emacs, please set the following:
   2114  *
   2115  * Local Variables:
   2116  * indent-tabs-mode: nil
   2117  * End:
   2118  *
   2119  */
   2120