Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2014, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *   file name:  unames.c
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 1999oct04
     16 *   created by: Markus W. Scherer
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/putil.h"
     21 #include "unicode/uchar.h"
     22 #include "unicode/udata.h"
     23 #include "unicode/utf.h"
     24 #include "unicode/utf16.h"
     25 #include "uassert.h"
     26 #include "ustr_imp.h"
     27 #include "umutex.h"
     28 #include "cmemory.h"
     29 #include "cstring.h"
     30 #include "ucln_cmn.h"
     31 #include "udataswp.h"
     32 #include "uprops.h"
     33 
     34 U_NAMESPACE_BEGIN
     35 
     36 /* prototypes ------------------------------------------------------------- */
     37 
     38 static const char DATA_NAME[] = "unames";
     39 static const char DATA_TYPE[] = "icu";
     40 
     41 #define GROUP_SHIFT 5
     42 #define LINES_PER_GROUP (1L<<GROUP_SHIFT)
     43 #define GROUP_MASK (LINES_PER_GROUP-1)
     44 
     45 /*
     46  * This struct was replaced by explicitly accessing equivalent
     47  * fields from triples of uint16_t.
     48  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
     49  * which broke the assumption that sizeof(Group)==6 and that the ++ operator
     50  * would advance by 6 bytes (3 uint16_t).
     51  *
     52  * We can't just change the data structure because it's loaded from a data file,
     53  * and we don't want to make it less compact, so we changed the access code.
     54  *
     55  * For details see ICU tickets 6331 and 6008.
     56 typedef struct {
     57     uint16_t groupMSB,
     58              offsetHigh, offsetLow; / * avoid padding * /
     59 } Group;
     60  */
     61 enum {
     62     GROUP_MSB,
     63     GROUP_OFFSET_HIGH,
     64     GROUP_OFFSET_LOW,
     65     GROUP_LENGTH
     66 };
     67 
     68 /*
     69  * Get the 32-bit group offset.
     70  * @param group (const uint16_t *) pointer to a Group triple of uint16_t
     71  * @return group offset (int32_t)
     72  */
     73 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
     74 
     75 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
     76 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
     77 
     78 typedef struct {
     79     uint32_t start, end;
     80     uint8_t type, variant;
     81     uint16_t size;
     82 } AlgorithmicRange;
     83 
     84 typedef struct {
     85     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
     86 } UCharNames;
     87 
     88 /*
     89  * Get the groups table from a UCharNames struct.
     90  * The groups table consists of one uint16_t groupCount followed by
     91  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
     92  * and the comment for the old struct Group above.
     93  *
     94  * @param names (const UCharNames *) pointer to the UCharNames indexes
     95  * @return (const uint16_t *) pointer to the groups table
     96  */
     97 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
     98 
     99 typedef struct {
    100     const char *otherName;
    101     UChar32 code;
    102 } FindName;
    103 
    104 #define DO_FIND_NAME NULL
    105 
    106 static UDataMemory *uCharNamesData=NULL;
    107 static UCharNames *uCharNames=NULL;
    108 static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
    109 
    110 /*
    111  * Maximum length of character names (regular & 1.0).
    112  */
    113 static int32_t gMaxNameLength=0;
    114 
    115 /*
    116  * Set of chars used in character names (regular & 1.0).
    117  * Chars are platform-dependent (can be EBCDIC).
    118  */
    119 static uint32_t gNameSet[8]={ 0 };
    120 
    121 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
    122 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
    123 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
    124 
    125 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
    126 
    127 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
    128     "unassigned",
    129     "uppercase letter",
    130     "lowercase letter",
    131     "titlecase letter",
    132     "modifier letter",
    133     "other letter",
    134     "non spacing mark",
    135     "enclosing mark",
    136     "combining spacing mark",
    137     "decimal digit number",
    138     "letter number",
    139     "other number",
    140     "space separator",
    141     "line separator",
    142     "paragraph separator",
    143     "control",
    144     "format",
    145     "private use area",
    146     "surrogate",
    147     "dash punctuation",
    148     "start punctuation",
    149     "end punctuation",
    150     "connector punctuation",
    151     "other punctuation",
    152     "math symbol",
    153     "currency symbol",
    154     "modifier symbol",
    155     "other symbol",
    156     "initial punctuation",
    157     "final punctuation",
    158     "noncharacter",
    159     "lead surrogate",
    160     "trail surrogate"
    161 };
    162 
    163 /* implementation ----------------------------------------------------------- */
    164 
    165 static UBool U_CALLCONV unames_cleanup(void)
    166 {
    167     if(uCharNamesData) {
    168         udata_close(uCharNamesData);
    169         uCharNamesData = NULL;
    170     }
    171     if(uCharNames) {
    172         uCharNames = NULL;
    173     }
    174     gCharNamesInitOnce.reset();
    175     gMaxNameLength=0;
    176     return TRUE;
    177 }
    178 
    179 static UBool U_CALLCONV
    180 isAcceptable(void * /*context*/,
    181              const char * /*type*/, const char * /*name*/,
    182              const UDataInfo *pInfo) {
    183     return (UBool)(
    184         pInfo->size>=20 &&
    185         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    186         pInfo->charsetFamily==U_CHARSET_FAMILY &&
    187         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
    188         pInfo->dataFormat[1]==0x6e &&
    189         pInfo->dataFormat[2]==0x61 &&
    190         pInfo->dataFormat[3]==0x6d &&
    191         pInfo->formatVersion[0]==1);
    192 }
    193 
    194 static void U_CALLCONV
    195 loadCharNames(UErrorCode &status) {
    196     U_ASSERT(uCharNamesData == NULL);
    197     U_ASSERT(uCharNames == NULL);
    198 
    199     uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
    200     if(U_FAILURE(status)) {
    201         uCharNamesData = NULL;
    202     } else {
    203         uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
    204     }
    205     ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
    206 }
    207 
    208 
    209 static UBool
    210 isDataLoaded(UErrorCode *pErrorCode) {
    211     umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
    212     return U_SUCCESS(*pErrorCode);
    213 }
    214 
    215 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
    216     if((bufferLength)>0) { \
    217         *(buffer)++=c; \
    218         --(bufferLength); \
    219     } \
    220     ++(bufferPos); \
    221 }
    222 
    223 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
    224 
    225 /*
    226  * Important: expandName() and compareName() are almost the same -
    227  * apply fixes to both.
    228  *
    229  * UnicodeData.txt uses ';' as a field separator, so no
    230  * field can contain ';' as part of its contents.
    231  * In unames.dat, it is marked as token[';']==-1 only if the
    232  * semicolon is used in the data file - which is iff we
    233  * have Unicode 1.0 names or ISO comments or aliases.
    234  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
    235  * although we know that it will never be part of a name.
    236  */
    237 static uint16_t
    238 expandName(UCharNames *names,
    239            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
    240            char *buffer, uint16_t bufferLength) {
    241     uint16_t *tokens=(uint16_t *)names+8;
    242     uint16_t token, tokenCount=*tokens++, bufferPos=0;
    243     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
    244     uint8_t c;
    245 
    246     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    247         /*
    248          * skip the modern name if it is not requested _and_
    249          * if the semicolon byte value is a character, not a token number
    250          */
    251         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    252             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
    253             do {
    254                 while(nameLength>0) {
    255                     --nameLength;
    256                     if(*name++==';') {
    257                         break;
    258                     }
    259                 }
    260             } while(--fieldIndex>0);
    261         } else {
    262             /*
    263              * the semicolon byte value is a token number, therefore
    264              * only modern names are stored in unames.dat and there is no
    265              * such requested alternate name here
    266              */
    267             nameLength=0;
    268         }
    269     }
    270 
    271     /* write each letter directly, and write a token word per token */
    272     while(nameLength>0) {
    273         --nameLength;
    274         c=*name++;
    275 
    276         if(c>=tokenCount) {
    277             if(c!=';') {
    278                 /* implicit letter */
    279                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    280             } else {
    281                 /* finished */
    282                 break;
    283             }
    284         } else {
    285             token=tokens[c];
    286             if(token==(uint16_t)(-2)) {
    287                 /* this is a lead byte for a double-byte token */
    288                 token=tokens[c<<8|*name++];
    289                 --nameLength;
    290             }
    291             if(token==(uint16_t)(-1)) {
    292                 if(c!=';') {
    293                     /* explicit letter */
    294                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    295                 } else {
    296                     /* stop, but skip the semicolon if we are seeking
    297                        extended names and there was no 2.0 name but there
    298                        is a 1.0 name. */
    299                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
    300                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    301                             continue;
    302                         }
    303                     }
    304                     /* finished */
    305                     break;
    306                 }
    307             } else {
    308                 /* write token word */
    309                 uint8_t *tokenString=tokenStrings+token;
    310                 while((c=*tokenString++)!=0) {
    311                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    312                 }
    313             }
    314         }
    315     }
    316 
    317     /* zero-terminate */
    318     if(bufferLength>0) {
    319         *buffer=0;
    320     }
    321 
    322     return bufferPos;
    323 }
    324 
    325 /*
    326  * compareName() is almost the same as expandName() except that it compares
    327  * the currently expanded name to an input name.
    328  * It returns the match/no match result as soon as possible.
    329  */
    330 static UBool
    331 compareName(UCharNames *names,
    332             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
    333             const char *otherName) {
    334     uint16_t *tokens=(uint16_t *)names+8;
    335     uint16_t token, tokenCount=*tokens++;
    336     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
    337     uint8_t c;
    338     const char *origOtherName = otherName;
    339 
    340     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    341         /*
    342          * skip the modern name if it is not requested _and_
    343          * if the semicolon byte value is a character, not a token number
    344          */
    345         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    346             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
    347             do {
    348                 while(nameLength>0) {
    349                     --nameLength;
    350                     if(*name++==';') {
    351                         break;
    352                     }
    353                 }
    354             } while(--fieldIndex>0);
    355         } else {
    356             /*
    357              * the semicolon byte value is a token number, therefore
    358              * only modern names are stored in unames.dat and there is no
    359              * such requested alternate name here
    360              */
    361             nameLength=0;
    362         }
    363     }
    364 
    365     /* compare each letter directly, and compare a token word per token */
    366     while(nameLength>0) {
    367         --nameLength;
    368         c=*name++;
    369 
    370         if(c>=tokenCount) {
    371             if(c!=';') {
    372                 /* implicit letter */
    373                 if((char)c!=*otherName++) {
    374                     return FALSE;
    375                 }
    376             } else {
    377                 /* finished */
    378                 break;
    379             }
    380         } else {
    381             token=tokens[c];
    382             if(token==(uint16_t)(-2)) {
    383                 /* this is a lead byte for a double-byte token */
    384                 token=tokens[c<<8|*name++];
    385                 --nameLength;
    386             }
    387             if(token==(uint16_t)(-1)) {
    388                 if(c!=';') {
    389                     /* explicit letter */
    390                     if((char)c!=*otherName++) {
    391                         return FALSE;
    392                     }
    393                 } else {
    394                     /* stop, but skip the semicolon if we are seeking
    395                        extended names and there was no 2.0 name but there
    396                        is a 1.0 name. */
    397                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
    398                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    399                             continue;
    400                         }
    401                     }
    402                     /* finished */
    403                     break;
    404                 }
    405             } else {
    406                 /* write token word */
    407                 uint8_t *tokenString=tokenStrings+token;
    408                 while((c=*tokenString++)!=0) {
    409                     if((char)c!=*otherName++) {
    410                         return FALSE;
    411                     }
    412                 }
    413             }
    414         }
    415     }
    416 
    417     /* complete match? */
    418     return (UBool)(*otherName==0);
    419 }
    420 
    421 static uint8_t getCharCat(UChar32 cp) {
    422     uint8_t cat;
    423 
    424     if (U_IS_UNICODE_NONCHAR(cp)) {
    425         return U_NONCHARACTER_CODE_POINT;
    426     }
    427 
    428     if ((cat = u_charType(cp)) == U_SURROGATE) {
    429         cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
    430     }
    431 
    432     return cat;
    433 }
    434 
    435 static const char *getCharCatName(UChar32 cp) {
    436     uint8_t cat = getCharCat(cp);
    437 
    438     /* Return unknown if the table of names above is not up to
    439        date. */
    440 
    441     if (cat >= UPRV_LENGTHOF(charCatNames)) {
    442         return "unknown";
    443     } else {
    444         return charCatNames[cat];
    445     }
    446 }
    447 
    448 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
    449     const char *catname = getCharCatName(code);
    450     uint16_t length = 0;
    451 
    452     UChar32 cp;
    453     int ndigits, i;
    454 
    455     WRITE_CHAR(buffer, bufferLength, length, '<');
    456     while (catname[length - 1]) {
    457         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
    458     }
    459     WRITE_CHAR(buffer, bufferLength, length, '-');
    460     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
    461         ;
    462     if (ndigits < 4)
    463         ndigits = 4;
    464     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
    465         uint8_t v = (uint8_t)(cp & 0xf);
    466         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
    467     }
    468     buffer += ndigits;
    469     length += ndigits;
    470     WRITE_CHAR(buffer, bufferLength, length, '>');
    471 
    472     return length;
    473 }
    474 
    475 /*
    476  * getGroup() does a binary search for the group that contains the
    477  * Unicode code point "code".
    478  * The return value is always a valid Group* that may contain "code"
    479  * or else is the highest group before "code".
    480  * If the lowest group is after "code", then that one is returned.
    481  */
    482 static const uint16_t *
    483 getGroup(UCharNames *names, uint32_t code) {
    484     const uint16_t *groups=GET_GROUPS(names);
    485     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
    486              start=0,
    487              limit=*groups++,
    488              number;
    489 
    490     /* binary search for the group of names that contains the one for code */
    491     while(start<limit-1) {
    492         number=(uint16_t)((start+limit)/2);
    493         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
    494             limit=number;
    495         } else {
    496             start=number;
    497         }
    498     }
    499 
    500     /* return this regardless of whether it is an exact match */
    501     return groups+start*GROUP_LENGTH;
    502 }
    503 
    504 /*
    505  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
    506  * expands them into offsets and lengths for each string.
    507  * Lengths are stored with a variable-width encoding in consecutive nibbles:
    508  * If a nibble<0xc, then it is the length itself (0=empty string).
    509  * If a nibble>=0xc, then it forms a length value with the following nibble.
    510  * Calculation see below.
    511  * The offsets and lengths arrays must be at least 33 (one more) long because
    512  * there is no check here at the end if the last nibble is still used.
    513  */
    514 static const uint8_t *
    515 expandGroupLengths(const uint8_t *s,
    516                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
    517     /* read the lengths of the 32 strings in this group and get each string's offset */
    518     uint16_t i=0, offset=0, length=0;
    519     uint8_t lengthByte;
    520 
    521     /* all 32 lengths must be read to get the offset of the first group string */
    522     while(i<LINES_PER_GROUP) {
    523         lengthByte=*s++;
    524 
    525         /* read even nibble - MSBs of lengthByte */
    526         if(length>=12) {
    527             /* double-nibble length spread across two bytes */
    528             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
    529             lengthByte&=0xf;
    530         } else if((lengthByte /* &0xf0 */)>=0xc0) {
    531             /* double-nibble length spread across this one byte */
    532             length=(uint16_t)((lengthByte&0x3f)+12);
    533         } else {
    534             /* single-nibble length in MSBs */
    535             length=(uint16_t)(lengthByte>>4);
    536             lengthByte&=0xf;
    537         }
    538 
    539         *offsets++=offset;
    540         *lengths++=length;
    541 
    542         offset+=length;
    543         ++i;
    544 
    545         /* read odd nibble - LSBs of lengthByte */
    546         if((lengthByte&0xf0)==0) {
    547             /* this nibble was not consumed for a double-nibble length above */
    548             length=lengthByte;
    549             if(length<12) {
    550                 /* single-nibble length in LSBs */
    551                 *offsets++=offset;
    552                 *lengths++=length;
    553 
    554                 offset+=length;
    555                 ++i;
    556             }
    557         } else {
    558             length=0;   /* prevent double-nibble detection in the next iteration */
    559         }
    560     }
    561 
    562     /* now, s is at the first group string */
    563     return s;
    564 }
    565 
    566 static uint16_t
    567 expandGroupName(UCharNames *names, const uint16_t *group,
    568                 uint16_t lineNumber, UCharNameChoice nameChoice,
    569                 char *buffer, uint16_t bufferLength) {
    570     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
    571     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
    572     s=expandGroupLengths(s, offsets, lengths);
    573     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
    574                       buffer, bufferLength);
    575 }
    576 
    577 static uint16_t
    578 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
    579         char *buffer, uint16_t bufferLength) {
    580     const uint16_t *group=getGroup(names, code);
    581     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
    582         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
    583                                buffer, bufferLength);
    584     } else {
    585         /* group not found */
    586         /* zero-terminate */
    587         if(bufferLength>0) {
    588             *buffer=0;
    589         }
    590         return 0;
    591     }
    592 }
    593 
    594 /*
    595  * enumGroupNames() enumerates all the names in a 32-group
    596  * and either calls the enumerator function or finds a given input name.
    597  */
    598 static UBool
    599 enumGroupNames(UCharNames *names, const uint16_t *group,
    600                UChar32 start, UChar32 end,
    601                UEnumCharNamesFn *fn, void *context,
    602                UCharNameChoice nameChoice) {
    603     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
    604     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
    605 
    606     s=expandGroupLengths(s, offsets, lengths);
    607     if(fn!=DO_FIND_NAME) {
    608         char buffer[200];
    609         uint16_t length;
    610 
    611         while(start<=end) {
    612             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
    613             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
    614                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
    615             }
    616             /* here, we assume that the buffer is large enough */
    617             if(length>0) {
    618                 if(!fn(context, start, nameChoice, buffer, length)) {
    619                     return FALSE;
    620                 }
    621             }
    622             ++start;
    623         }
    624     } else {
    625         const char *otherName=((FindName *)context)->otherName;
    626         while(start<=end) {
    627             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
    628                 ((FindName *)context)->code=start;
    629                 return FALSE;
    630             }
    631             ++start;
    632         }
    633     }
    634     return TRUE;
    635 }
    636 
    637 /*
    638  * enumExtNames enumerate extended names.
    639  * It only needs to do it if it is called with a real function and not
    640  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
    641  * for extended names by itself.
    642  */
    643 static UBool
    644 enumExtNames(UChar32 start, UChar32 end,
    645              UEnumCharNamesFn *fn, void *context)
    646 {
    647     if(fn!=DO_FIND_NAME) {
    648         char buffer[200];
    649         uint16_t length;
    650 
    651         while(start<=end) {
    652             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
    653             /* here, we assume that the buffer is large enough */
    654             if(length>0) {
    655                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
    656                     return FALSE;
    657                 }
    658             }
    659             ++start;
    660         }
    661     }
    662 
    663     return TRUE;
    664 }
    665 
    666 static UBool
    667 enumNames(UCharNames *names,
    668           UChar32 start, UChar32 limit,
    669           UEnumCharNamesFn *fn, void *context,
    670           UCharNameChoice nameChoice) {
    671     uint16_t startGroupMSB, endGroupMSB, groupCount;
    672     const uint16_t *group, *groupLimit;
    673 
    674     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
    675     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
    676 
    677     /* find the group that contains start, or the highest before it */
    678     group=getGroup(names, start);
    679 
    680     if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
    681         /* enumerate synthetic names between start and the group start */
    682         UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
    683         if(extLimit>limit) {
    684             extLimit=limit;
    685         }
    686         if(!enumExtNames(start, extLimit-1, fn, context)) {
    687             return FALSE;
    688         }
    689         start=extLimit;
    690     }
    691 
    692     if(startGroupMSB==endGroupMSB) {
    693         if(startGroupMSB==group[GROUP_MSB]) {
    694             /* if start and limit-1 are in the same group, then enumerate only in that one */
    695             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
    696         }
    697     } else {
    698         const uint16_t *groups=GET_GROUPS(names);
    699         groupCount=*groups++;
    700         groupLimit=groups+groupCount*GROUP_LENGTH;
    701 
    702         if(startGroupMSB==group[GROUP_MSB]) {
    703             /* enumerate characters in the partial start group */
    704             if((start&GROUP_MASK)!=0) {
    705                 if(!enumGroupNames(names, group,
    706                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
    707                                    fn, context, nameChoice)) {
    708                     return FALSE;
    709                 }
    710                 group=NEXT_GROUP(group); /* continue with the next group */
    711             }
    712         } else if(startGroupMSB>group[GROUP_MSB]) {
    713             /* make sure that we start enumerating with the first group after start */
    714             const uint16_t *nextGroup=NEXT_GROUP(group);
    715             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
    716                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
    717                 if (end > limit) {
    718                     end = limit;
    719                 }
    720                 if (!enumExtNames(start, end - 1, fn, context)) {
    721                     return FALSE;
    722                 }
    723             }
    724             group=nextGroup;
    725         }
    726 
    727         /* enumerate entire groups between the start- and end-groups */
    728         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
    729             const uint16_t *nextGroup;
    730             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
    731             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
    732                 return FALSE;
    733             }
    734             nextGroup=NEXT_GROUP(group);
    735             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
    736                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
    737                 if (end > limit) {
    738                     end = limit;
    739                 }
    740                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
    741                     return FALSE;
    742                 }
    743             }
    744             group=nextGroup;
    745         }
    746 
    747         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
    748         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
    749             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
    750         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
    751             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
    752             if (next > start) {
    753                 start = next;
    754             }
    755         } else {
    756             return TRUE;
    757         }
    758     }
    759 
    760     /* we have not found a group, which means everything is made of
    761        extended names. */
    762     if (nameChoice == U_EXTENDED_CHAR_NAME) {
    763         if (limit > UCHAR_MAX_VALUE + 1) {
    764             limit = UCHAR_MAX_VALUE + 1;
    765         }
    766         return enumExtNames(start, limit - 1, fn, context);
    767     }
    768 
    769     return TRUE;
    770 }
    771 
    772 static uint16_t
    773 writeFactorSuffix(const uint16_t *factors, uint16_t count,
    774                   const char *s, /* suffix elements */
    775                   uint32_t code,
    776                   uint16_t indexes[8], /* output fields from here */
    777                   const char *elementBases[8], const char *elements[8],
    778                   char *buffer, uint16_t bufferLength) {
    779     uint16_t i, factor, bufferPos=0;
    780     char c;
    781 
    782     /* write elements according to the factors */
    783 
    784     /*
    785      * the factorized elements are determined by modulo arithmetic
    786      * with the factors of this algorithm
    787      *
    788      * note that for fewer operations, count is decremented here
    789      */
    790     --count;
    791     for(i=count; i>0; --i) {
    792         factor=factors[i];
    793         indexes[i]=(uint16_t)(code%factor);
    794         code/=factor;
    795     }
    796     /*
    797      * we don't need to calculate the last modulus because start<=code<=end
    798      * guarantees here that code<=factors[0]
    799      */
    800     indexes[0]=(uint16_t)code;
    801 
    802     /* write each element */
    803     for(;;) {
    804         if(elementBases!=NULL) {
    805             *elementBases++=s;
    806         }
    807 
    808         /* skip indexes[i] strings */
    809         factor=indexes[i];
    810         while(factor>0) {
    811             while(*s++!=0) {}
    812             --factor;
    813         }
    814         if(elements!=NULL) {
    815             *elements++=s;
    816         }
    817 
    818         /* write element */
    819         while((c=*s++)!=0) {
    820             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    821         }
    822 
    823         /* we do not need to perform the rest of this loop for i==count - break here */
    824         if(i>=count) {
    825             break;
    826         }
    827 
    828         /* skip the rest of the strings for this factors[i] */
    829         factor=(uint16_t)(factors[i]-indexes[i]-1);
    830         while(factor>0) {
    831             while(*s++!=0) {}
    832             --factor;
    833         }
    834 
    835         ++i;
    836     }
    837 
    838     /* zero-terminate */
    839     if(bufferLength>0) {
    840         *buffer=0;
    841     }
    842 
    843     return bufferPos;
    844 }
    845 
    846 /*
    847  * Important:
    848  * Parts of findAlgName() are almost the same as some of getAlgName().
    849  * Fixes must be applied to both.
    850  */
    851 static uint16_t
    852 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
    853         char *buffer, uint16_t bufferLength) {
    854     uint16_t bufferPos=0;
    855 
    856     /* Only the normative character name can be algorithmic. */
    857     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    858         /* zero-terminate */
    859         if(bufferLength>0) {
    860             *buffer=0;
    861         }
    862         return 0;
    863     }
    864 
    865     switch(range->type) {
    866     case 0: {
    867         /* name = prefix hex-digits */
    868         const char *s=(const char *)(range+1);
    869         char c;
    870 
    871         uint16_t i, count;
    872 
    873         /* copy prefix */
    874         while((c=*s++)!=0) {
    875             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    876         }
    877 
    878         /* write hexadecimal code point value */
    879         count=range->variant;
    880 
    881         /* zero-terminate */
    882         if(count<bufferLength) {
    883             buffer[count]=0;
    884         }
    885 
    886         for(i=count; i>0;) {
    887             if(--i<bufferLength) {
    888                 c=(char)(code&0xf);
    889                 if(c<10) {
    890                     c+='0';
    891                 } else {
    892                     c+='A'-10;
    893                 }
    894                 buffer[i]=c;
    895             }
    896             code>>=4;
    897         }
    898 
    899         bufferPos+=count;
    900         break;
    901     }
    902     case 1: {
    903         /* name = prefix factorized-elements */
    904         uint16_t indexes[8];
    905         const uint16_t *factors=(const uint16_t *)(range+1);
    906         uint16_t count=range->variant;
    907         const char *s=(const char *)(factors+count);
    908         char c;
    909 
    910         /* copy prefix */
    911         while((c=*s++)!=0) {
    912             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    913         }
    914 
    915         bufferPos+=writeFactorSuffix(factors, count,
    916                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
    917         break;
    918     }
    919     default:
    920         /* undefined type */
    921         /* zero-terminate */
    922         if(bufferLength>0) {
    923             *buffer=0;
    924         }
    925         break;
    926     }
    927 
    928     return bufferPos;
    929 }
    930 
    931 /*
    932  * Important: enumAlgNames() and findAlgName() are almost the same.
    933  * Any fix must be applied to both.
    934  */
    935 static UBool
    936 enumAlgNames(AlgorithmicRange *range,
    937              UChar32 start, UChar32 limit,
    938              UEnumCharNamesFn *fn, void *context,
    939              UCharNameChoice nameChoice) {
    940     char buffer[200];
    941     uint16_t length;
    942 
    943     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    944         return TRUE;
    945     }
    946 
    947     switch(range->type) {
    948     case 0: {
    949         char *s, *end;
    950         char c;
    951 
    952         /* get the full name of the start character */
    953         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
    954         if(length<=0) {
    955             return TRUE;
    956         }
    957 
    958         /* call the enumerator function with this first character */
    959         if(!fn(context, start, nameChoice, buffer, length)) {
    960             return FALSE;
    961         }
    962 
    963         /* go to the end of the name; all these names have the same length */
    964         end=buffer;
    965         while(*end!=0) {
    966             ++end;
    967         }
    968 
    969         /* enumerate the rest of the names */
    970         while(++start<limit) {
    971             /* increment the hexadecimal number on a character-basis */
    972             s=end;
    973             for (;;) {
    974                 c=*--s;
    975                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
    976                     *s=(char)(c+1);
    977                     break;
    978                 } else if(c=='9') {
    979                     *s='A';
    980                     break;
    981                 } else if(c=='F') {
    982                     *s='0';
    983                 }
    984             }
    985 
    986             if(!fn(context, start, nameChoice, buffer, length)) {
    987                 return FALSE;
    988             }
    989         }
    990         break;
    991     }
    992     case 1: {
    993         uint16_t indexes[8];
    994         const char *elementBases[8], *elements[8];
    995         const uint16_t *factors=(const uint16_t *)(range+1);
    996         uint16_t count=range->variant;
    997         const char *s=(const char *)(factors+count);
    998         char *suffix, *t;
    999         uint16_t prefixLength, i, idx;
   1000 
   1001         char c;
   1002 
   1003         /* name = prefix factorized-elements */
   1004 
   1005         /* copy prefix */
   1006         suffix=buffer;
   1007         prefixLength=0;
   1008         while((c=*s++)!=0) {
   1009             *suffix++=c;
   1010             ++prefixLength;
   1011         }
   1012 
   1013         /* append the suffix of the start character */
   1014         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
   1015                                               s, (uint32_t)start-range->start,
   1016                                               indexes, elementBases, elements,
   1017                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
   1018 
   1019         /* call the enumerator function with this first character */
   1020         if(!fn(context, start, nameChoice, buffer, length)) {
   1021             return FALSE;
   1022         }
   1023 
   1024         /* enumerate the rest of the names */
   1025         while(++start<limit) {
   1026             /* increment the indexes in lexical order bound by the factors */
   1027             i=count;
   1028             for (;;) {
   1029                 idx=(uint16_t)(indexes[--i]+1);
   1030                 if(idx<factors[i]) {
   1031                     /* skip one index and its element string */
   1032                     indexes[i]=idx;
   1033                     s=elements[i];
   1034                     while(*s++!=0) {
   1035                     }
   1036                     elements[i]=s;
   1037                     break;
   1038                 } else {
   1039                     /* reset this index to 0 and its element string to the first one */
   1040                     indexes[i]=0;
   1041                     elements[i]=elementBases[i];
   1042                 }
   1043             }
   1044 
   1045             /* to make matters a little easier, just append all elements to the suffix */
   1046             t=suffix;
   1047             length=prefixLength;
   1048             for(i=0; i<count; ++i) {
   1049                 s=elements[i];
   1050                 while((c=*s++)!=0) {
   1051                     *t++=c;
   1052                     ++length;
   1053                 }
   1054             }
   1055             /* zero-terminate */
   1056             *t=0;
   1057 
   1058             if(!fn(context, start, nameChoice, buffer, length)) {
   1059                 return FALSE;
   1060             }
   1061         }
   1062         break;
   1063     }
   1064     default:
   1065         /* undefined type */
   1066         break;
   1067     }
   1068 
   1069     return TRUE;
   1070 }
   1071 
   1072 /*
   1073  * findAlgName() is almost the same as enumAlgNames() except that it
   1074  * returns the code point for a name if it fits into the range.
   1075  * It returns 0xffff otherwise.
   1076  */
   1077 static UChar32
   1078 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
   1079     UChar32 code;
   1080 
   1081     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
   1082         return 0xffff;
   1083     }
   1084 
   1085     switch(range->type) {
   1086     case 0: {
   1087         /* name = prefix hex-digits */
   1088         const char *s=(const char *)(range+1);
   1089         char c;
   1090 
   1091         uint16_t i, count;
   1092 
   1093         /* compare prefix */
   1094         while((c=*s++)!=0) {
   1095             if((char)c!=*otherName++) {
   1096                 return 0xffff;
   1097             }
   1098         }
   1099 
   1100         /* read hexadecimal code point value */
   1101         count=range->variant;
   1102         code=0;
   1103         for(i=0; i<count; ++i) {
   1104             c=*otherName++;
   1105             if('0'<=c && c<='9') {
   1106                 code=(code<<4)|(c-'0');
   1107             } else if('A'<=c && c<='F') {
   1108                 code=(code<<4)|(c-'A'+10);
   1109             } else {
   1110                 return 0xffff;
   1111             }
   1112         }
   1113 
   1114         /* does it fit into the range? */
   1115         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
   1116             return code;
   1117         }
   1118         break;
   1119     }
   1120     case 1: {
   1121         char buffer[64];
   1122         uint16_t indexes[8];
   1123         const char *elementBases[8], *elements[8];
   1124         const uint16_t *factors=(const uint16_t *)(range+1);
   1125         uint16_t count=range->variant;
   1126         const char *s=(const char *)(factors+count), *t;
   1127         UChar32 start, limit;
   1128         uint16_t i, idx;
   1129 
   1130         char c;
   1131 
   1132         /* name = prefix factorized-elements */
   1133 
   1134         /* compare prefix */
   1135         while((c=*s++)!=0) {
   1136             if((char)c!=*otherName++) {
   1137                 return 0xffff;
   1138             }
   1139         }
   1140 
   1141         start=(UChar32)range->start;
   1142         limit=(UChar32)(range->end+1);
   1143 
   1144         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
   1145         writeFactorSuffix(factors, count, s, 0,
   1146                           indexes, elementBases, elements, buffer, sizeof(buffer));
   1147 
   1148         /* compare the first suffix */
   1149         if(0==uprv_strcmp(otherName, buffer)) {
   1150             return start;
   1151         }
   1152 
   1153         /* enumerate and compare the rest of the suffixes */
   1154         while(++start<limit) {
   1155             /* increment the indexes in lexical order bound by the factors */
   1156             i=count;
   1157             for (;;) {
   1158                 idx=(uint16_t)(indexes[--i]+1);
   1159                 if(idx<factors[i]) {
   1160                     /* skip one index and its element string */
   1161                     indexes[i]=idx;
   1162                     s=elements[i];
   1163                     while(*s++!=0) {}
   1164                     elements[i]=s;
   1165                     break;
   1166                 } else {
   1167                     /* reset this index to 0 and its element string to the first one */
   1168                     indexes[i]=0;
   1169                     elements[i]=elementBases[i];
   1170                 }
   1171             }
   1172 
   1173             /* to make matters a little easier, just compare all elements of the suffix */
   1174             t=otherName;
   1175             for(i=0; i<count; ++i) {
   1176                 s=elements[i];
   1177                 while((c=*s++)!=0) {
   1178                     if(c!=*t++) {
   1179                         s=""; /* does not match */
   1180                         i=99;
   1181                     }
   1182                 }
   1183             }
   1184             if(i<99 && *t==0) {
   1185                 return start;
   1186             }
   1187         }
   1188         break;
   1189     }
   1190     default:
   1191         /* undefined type */
   1192         break;
   1193     }
   1194 
   1195     return 0xffff;
   1196 }
   1197 
   1198 /* sets of name characters, maximum name lengths ---------------------------- */
   1199 
   1200 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
   1201 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
   1202 
   1203 static int32_t
   1204 calcStringSetLength(uint32_t set[8], const char *s) {
   1205     int32_t length=0;
   1206     char c;
   1207 
   1208     while((c=*s++)!=0) {
   1209         SET_ADD(set, c);
   1210         ++length;
   1211     }
   1212     return length;
   1213 }
   1214 
   1215 static int32_t
   1216 calcAlgNameSetsLengths(int32_t maxNameLength) {
   1217     AlgorithmicRange *range;
   1218     uint32_t *p;
   1219     uint32_t rangeCount;
   1220     int32_t length;
   1221 
   1222     /* enumerate algorithmic ranges */
   1223     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1224     rangeCount=*p;
   1225     range=(AlgorithmicRange *)(p+1);
   1226     while(rangeCount>0) {
   1227         switch(range->type) {
   1228         case 0:
   1229             /* name = prefix + (range->variant times) hex-digits */
   1230             /* prefix */
   1231             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
   1232             if(length>maxNameLength) {
   1233                 maxNameLength=length;
   1234             }
   1235             break;
   1236         case 1: {
   1237             /* name = prefix factorized-elements */
   1238             const uint16_t *factors=(const uint16_t *)(range+1);
   1239             const char *s;
   1240             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
   1241 
   1242             /* prefix length */
   1243             s=(const char *)(factors+count);
   1244             length=calcStringSetLength(gNameSet, s);
   1245             s+=length+1; /* start of factor suffixes */
   1246 
   1247             /* get the set and maximum factor suffix length for each factor */
   1248             for(i=0; i<count; ++i) {
   1249                 maxFactorLength=0;
   1250                 for(factor=factors[i]; factor>0; --factor) {
   1251                     factorLength=calcStringSetLength(gNameSet, s);
   1252                     s+=factorLength+1;
   1253                     if(factorLength>maxFactorLength) {
   1254                         maxFactorLength=factorLength;
   1255                     }
   1256                 }
   1257                 length+=maxFactorLength;
   1258             }
   1259 
   1260             if(length>maxNameLength) {
   1261                 maxNameLength=length;
   1262             }
   1263             break;
   1264         }
   1265         default:
   1266             /* unknown type */
   1267             break;
   1268         }
   1269 
   1270         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
   1271         --rangeCount;
   1272     }
   1273     return maxNameLength;
   1274 }
   1275 
   1276 static int32_t
   1277 calcExtNameSetsLengths(int32_t maxNameLength) {
   1278     int32_t i, length;
   1279 
   1280     for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
   1281         /*
   1282          * for each category, count the length of the category name
   1283          * plus 9=
   1284          * 2 for <>
   1285          * 1 for -
   1286          * 6 for most hex digits per code point
   1287          */
   1288         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
   1289         if(length>maxNameLength) {
   1290             maxNameLength=length;
   1291         }
   1292     }
   1293     return maxNameLength;
   1294 }
   1295 
   1296 static int32_t
   1297 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
   1298                   uint32_t set[8],
   1299                   const uint8_t **pLine, const uint8_t *lineLimit) {
   1300     const uint8_t *line=*pLine;
   1301     int32_t length=0, tokenLength;
   1302     uint16_t c, token;
   1303 
   1304     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
   1305         if(c>=tokenCount) {
   1306             /* implicit letter */
   1307             SET_ADD(set, c);
   1308             ++length;
   1309         } else {
   1310             token=tokens[c];
   1311             if(token==(uint16_t)(-2)) {
   1312                 /* this is a lead byte for a double-byte token */
   1313                 c=c<<8|*line++;
   1314                 token=tokens[c];
   1315             }
   1316             if(token==(uint16_t)(-1)) {
   1317                 /* explicit letter */
   1318                 SET_ADD(set, c);
   1319                 ++length;
   1320             } else {
   1321                 /* count token word */
   1322                 if(tokenLengths!=NULL) {
   1323                     /* use cached token length */
   1324                     tokenLength=tokenLengths[c];
   1325                     if(tokenLength==0) {
   1326                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
   1327                         tokenLengths[c]=(int8_t)tokenLength;
   1328                     }
   1329                 } else {
   1330                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
   1331                 }
   1332                 length+=tokenLength;
   1333             }
   1334         }
   1335     }
   1336 
   1337     *pLine=line;
   1338     return length;
   1339 }
   1340 
   1341 static void
   1342 calcGroupNameSetsLengths(int32_t maxNameLength) {
   1343     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
   1344 
   1345     uint16_t *tokens=(uint16_t *)uCharNames+8;
   1346     uint16_t tokenCount=*tokens++;
   1347     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
   1348 
   1349     int8_t *tokenLengths;
   1350 
   1351     const uint16_t *group;
   1352     const uint8_t *s, *line, *lineLimit;
   1353 
   1354     int32_t groupCount, lineNumber, length;
   1355 
   1356     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
   1357     if(tokenLengths!=NULL) {
   1358         uprv_memset(tokenLengths, 0, tokenCount);
   1359     }
   1360 
   1361     group=GET_GROUPS(uCharNames);
   1362     groupCount=*group++;
   1363 
   1364     /* enumerate all groups */
   1365     while(groupCount>0) {
   1366         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
   1367         s=expandGroupLengths(s, offsets, lengths);
   1368 
   1369         /* enumerate all lines in each group */
   1370         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
   1371             line=s+offsets[lineNumber];
   1372             length=lengths[lineNumber];
   1373             if(length==0) {
   1374                 continue;
   1375             }
   1376 
   1377             lineLimit=line+length;
   1378 
   1379             /* read regular name */
   1380             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
   1381             if(length>maxNameLength) {
   1382                 maxNameLength=length;
   1383             }
   1384             if(line==lineLimit) {
   1385                 continue;
   1386             }
   1387 
   1388             /* read Unicode 1.0 name */
   1389             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
   1390             if(length>maxNameLength) {
   1391                 maxNameLength=length;
   1392             }
   1393             if(line==lineLimit) {
   1394                 continue;
   1395             }
   1396 
   1397             /* read ISO comment */
   1398             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
   1399         }
   1400 
   1401         group=NEXT_GROUP(group);
   1402         --groupCount;
   1403     }
   1404 
   1405     if(tokenLengths!=NULL) {
   1406         uprv_free(tokenLengths);
   1407     }
   1408 
   1409     /* set gMax... - name length last for threading */
   1410     gMaxNameLength=maxNameLength;
   1411 }
   1412 
   1413 static UBool
   1414 calcNameSetsLengths(UErrorCode *pErrorCode) {
   1415     static const char extChars[]="0123456789ABCDEF<>-";
   1416     int32_t i, maxNameLength;
   1417 
   1418     if(gMaxNameLength!=0) {
   1419         return TRUE;
   1420     }
   1421 
   1422     if(!isDataLoaded(pErrorCode)) {
   1423         return FALSE;
   1424     }
   1425 
   1426     /* set hex digits, used in various names, and <>-, used in extended names */
   1427     for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
   1428         SET_ADD(gNameSet, extChars[i]);
   1429     }
   1430 
   1431     /* set sets and lengths from algorithmic names */
   1432     maxNameLength=calcAlgNameSetsLengths(0);
   1433 
   1434     /* set sets and lengths from extended names */
   1435     maxNameLength=calcExtNameSetsLengths(maxNameLength);
   1436 
   1437     /* set sets and lengths from group names, set global maximum values */
   1438     calcGroupNameSetsLengths(maxNameLength);
   1439 
   1440     return TRUE;
   1441 }
   1442 
   1443 U_NAMESPACE_END
   1444 
   1445 /* public API --------------------------------------------------------------- */
   1446 
   1447 U_NAMESPACE_USE
   1448 
   1449 U_CAPI int32_t U_EXPORT2
   1450 u_charName(UChar32 code, UCharNameChoice nameChoice,
   1451            char *buffer, int32_t bufferLength,
   1452            UErrorCode *pErrorCode) {
   1453      AlgorithmicRange *algRange;
   1454     uint32_t *p;
   1455     uint32_t i;
   1456     int32_t length;
   1457 
   1458     /* check the argument values */
   1459     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1460         return 0;
   1461     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
   1462               bufferLength<0 || (bufferLength>0 && buffer==NULL)
   1463     ) {
   1464         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1465         return 0;
   1466     }
   1467 
   1468     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
   1469         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
   1470     }
   1471 
   1472     length=0;
   1473 
   1474     /* try algorithmic names first */
   1475     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1476     i=*p;
   1477     algRange=(AlgorithmicRange *)(p+1);
   1478     while(i>0) {
   1479         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
   1480             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
   1481             break;
   1482         }
   1483         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1484         --i;
   1485     }
   1486 
   1487     if(i==0) {
   1488         if (nameChoice == U_EXTENDED_CHAR_NAME) {
   1489             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
   1490             if (!length) {
   1491                 /* extended character name */
   1492                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
   1493             }
   1494         } else {
   1495             /* normal character name */
   1496             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
   1497         }
   1498     }
   1499 
   1500     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
   1501 }
   1502 
   1503 U_CAPI int32_t U_EXPORT2
   1504 u_getISOComment(UChar32 /*c*/,
   1505                 char *dest, int32_t destCapacity,
   1506                 UErrorCode *pErrorCode) {
   1507     /* check the argument values */
   1508     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1509         return 0;
   1510     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
   1511         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1512         return 0;
   1513     }
   1514 
   1515     return u_terminateChars(dest, destCapacity, 0, pErrorCode);
   1516 }
   1517 
   1518 U_CAPI UChar32 U_EXPORT2
   1519 u_charFromName(UCharNameChoice nameChoice,
   1520                const char *name,
   1521                UErrorCode *pErrorCode) {
   1522     char upper[120], lower[120];
   1523     FindName findName;
   1524     AlgorithmicRange *algRange;
   1525     uint32_t *p;
   1526     uint32_t i;
   1527     UChar32 cp = 0;
   1528     char c0;
   1529     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
   1530 
   1531     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1532         return error;
   1533     }
   1534 
   1535     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
   1536         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1537         return error;
   1538     }
   1539 
   1540     if(!isDataLoaded(pErrorCode)) {
   1541         return error;
   1542     }
   1543 
   1544     /* construct the uppercase and lowercase of the name first */
   1545     for(i=0; i<sizeof(upper); ++i) {
   1546         if((c0=*name++)!=0) {
   1547             upper[i]=uprv_toupper(c0);
   1548             lower[i]=uprv_tolower(c0);
   1549         } else {
   1550             upper[i]=lower[i]=0;
   1551             break;
   1552         }
   1553     }
   1554     if(i==sizeof(upper)) {
   1555         /* name too long, there is no such character */
   1556         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1557         return error;
   1558     }
   1559     // i==strlen(name)==strlen(lower)==strlen(upper)
   1560 
   1561     /* try extended names first */
   1562     if (lower[0] == '<') {
   1563         if (nameChoice == U_EXTENDED_CHAR_NAME) {
   1564             // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
   1565             if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
   1566                 while (i >= 3 && lower[--i] != '-') {}
   1567 
   1568                 if (i >= 2 && lower[i] == '-') {
   1569                     uint32_t cIdx;
   1570 
   1571                     lower[i] = 0;
   1572 
   1573                     for (++i; lower[i] != '>'; ++i) {
   1574                         if (lower[i] >= '0' && lower[i] <= '9') {
   1575                             cp = (cp << 4) + lower[i] - '0';
   1576                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
   1577                             cp = (cp << 4) + lower[i] - 'a' + 10;
   1578                         } else {
   1579                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1580                             return error;
   1581                         }
   1582                     }
   1583 
   1584                     /* Now validate the category name.
   1585                        We could use a binary search, or a trie, if
   1586                        we really wanted to. */
   1587 
   1588                     for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
   1589 
   1590                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
   1591                             if (getCharCat(cp) == cIdx) {
   1592                                 return cp;
   1593                             }
   1594                             break;
   1595                         }
   1596                     }
   1597                 }
   1598             }
   1599         }
   1600 
   1601         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1602         return error;
   1603     }
   1604 
   1605     /* try algorithmic names now */
   1606     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1607     i=*p;
   1608     algRange=(AlgorithmicRange *)(p+1);
   1609     while(i>0) {
   1610         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
   1611             return cp;
   1612         }
   1613         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1614         --i;
   1615     }
   1616 
   1617     /* normal character name */
   1618     findName.otherName=upper;
   1619     findName.code=error;
   1620     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
   1621     if (findName.code == error) {
   1622          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1623     }
   1624     return findName.code;
   1625 }
   1626 
   1627 U_CAPI void U_EXPORT2
   1628 u_enumCharNames(UChar32 start, UChar32 limit,
   1629                 UEnumCharNamesFn *fn,
   1630                 void *context,
   1631                 UCharNameChoice nameChoice,
   1632                 UErrorCode *pErrorCode) {
   1633     AlgorithmicRange *algRange;
   1634     uint32_t *p;
   1635     uint32_t i;
   1636 
   1637     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1638         return;
   1639     }
   1640 
   1641     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
   1642         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1643         return;
   1644     }
   1645 
   1646     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
   1647         limit = UCHAR_MAX_VALUE + 1;
   1648     }
   1649     if((uint32_t)start>=(uint32_t)limit) {
   1650         return;
   1651     }
   1652 
   1653     if(!isDataLoaded(pErrorCode)) {
   1654         return;
   1655     }
   1656 
   1657     /* interleave the data-driven ones with the algorithmic ones */
   1658     /* iterate over all algorithmic ranges; assume that they are in ascending order */
   1659     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1660     i=*p;
   1661     algRange=(AlgorithmicRange *)(p+1);
   1662     while(i>0) {
   1663         /* enumerate the character names before the current algorithmic range */
   1664         /* here: start<limit */
   1665         if((uint32_t)start<algRange->start) {
   1666             if((uint32_t)limit<=algRange->start) {
   1667                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
   1668                 return;
   1669             }
   1670             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
   1671                 return;
   1672             }
   1673             start=(UChar32)algRange->start;
   1674         }
   1675         /* enumerate the character names in the current algorithmic range */
   1676         /* here: algRange->start<=start<limit */
   1677         if((uint32_t)start<=algRange->end) {
   1678             if((uint32_t)limit<=(algRange->end+1)) {
   1679                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
   1680                 return;
   1681             }
   1682             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
   1683                 return;
   1684             }
   1685             start=(UChar32)algRange->end+1;
   1686         }
   1687         /* continue to the next algorithmic range (here: start<limit) */
   1688         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1689         --i;
   1690     }
   1691     /* enumerate the character names after the last algorithmic range */
   1692     enumNames(uCharNames, start, limit, fn, context, nameChoice);
   1693 }
   1694 
   1695 U_CAPI int32_t U_EXPORT2
   1696 uprv_getMaxCharNameLength() {
   1697     UErrorCode errorCode=U_ZERO_ERROR;
   1698     if(calcNameSetsLengths(&errorCode)) {
   1699         return gMaxNameLength;
   1700     } else {
   1701         return 0;
   1702     }
   1703 }
   1704 
   1705 /**
   1706  * Converts the char set cset into a Unicode set uset.
   1707  * @param cset Set of 256 bit flags corresponding to a set of chars.
   1708  * @param uset USet to receive characters. Existing contents are deleted.
   1709  */
   1710 static void
   1711 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
   1712     UChar us[256];
   1713     char cs[256];
   1714 
   1715     int32_t i, length;
   1716     UErrorCode errorCode;
   1717 
   1718     errorCode=U_ZERO_ERROR;
   1719 
   1720     if(!calcNameSetsLengths(&errorCode)) {
   1721         return;
   1722     }
   1723 
   1724     /* build a char string with all chars that are used in character names */
   1725     length=0;
   1726     for(i=0; i<256; ++i) {
   1727         if(SET_CONTAINS(cset, i)) {
   1728             cs[length++]=(char)i;
   1729         }
   1730     }
   1731 
   1732     /* convert the char string to a UChar string */
   1733     u_charsToUChars(cs, us, length);
   1734 
   1735     /* add each UChar to the USet */
   1736     for(i=0; i<length; ++i) {
   1737         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
   1738             sa->add(sa->set, us[i]);
   1739         }
   1740     }
   1741 }
   1742 
   1743 /**
   1744  * Fills set with characters that are used in Unicode character names.
   1745  * @param set USet to receive characters.
   1746  */
   1747 U_CAPI void U_EXPORT2
   1748 uprv_getCharNameCharacters(const USetAdder *sa) {
   1749     charSetToUSet(gNameSet, sa);
   1750 }
   1751 
   1752 /* data swapping ------------------------------------------------------------ */
   1753 
   1754 /*
   1755  * The token table contains non-negative entries for token bytes,
   1756  * and -1 for bytes that represent themselves in the data file's charset.
   1757  * -2 entries are used for lead bytes.
   1758  *
   1759  * Direct bytes (-1 entries) must be translated from the input charset family
   1760  * to the output charset family.
   1761  * makeTokenMap() writes a permutation mapping for this.
   1762  * Use it once for single-/lead-byte tokens and once more for all trail byte
   1763  * tokens. (';' is an unused trail byte marked with -1.)
   1764  */
   1765 static void
   1766 makeTokenMap(const UDataSwapper *ds,
   1767              int16_t tokens[], uint16_t tokenCount,
   1768              uint8_t map[256],
   1769              UErrorCode *pErrorCode) {
   1770     UBool usedOutChar[256];
   1771     uint16_t i, j;
   1772     uint8_t c1, c2;
   1773 
   1774     if(U_FAILURE(*pErrorCode)) {
   1775         return;
   1776     }
   1777 
   1778     if(ds->inCharset==ds->outCharset) {
   1779         /* Same charset family: identity permutation */
   1780         for(i=0; i<256; ++i) {
   1781             map[i]=(uint8_t)i;
   1782         }
   1783     } else {
   1784         uprv_memset(map, 0, 256);
   1785         uprv_memset(usedOutChar, 0, 256);
   1786 
   1787         if(tokenCount>256) {
   1788             tokenCount=256;
   1789         }
   1790 
   1791         /* set the direct bytes (byte 0 always maps to itself) */
   1792         for(i=1; i<tokenCount; ++i) {
   1793             if(tokens[i]==-1) {
   1794                 /* convert the direct byte character */
   1795                 c1=(uint8_t)i;
   1796                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
   1797                 if(U_FAILURE(*pErrorCode)) {
   1798                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
   1799                                      i, ds->inCharset);
   1800                     return;
   1801                 }
   1802 
   1803                 /* enter the converted character into the map and mark it used */
   1804                 map[c1]=c2;
   1805                 usedOutChar[c2]=TRUE;
   1806             }
   1807         }
   1808 
   1809         /* set the mappings for the rest of the permutation */
   1810         for(i=j=1; i<tokenCount; ++i) {
   1811             /* set mappings that were not set for direct bytes */
   1812             if(map[i]==0) {
   1813                 /* set an output byte value that was not used as an output byte above */
   1814                 while(usedOutChar[j]) {
   1815                     ++j;
   1816                 }
   1817                 map[i]=(uint8_t)j++;
   1818             }
   1819         }
   1820 
   1821         /*
   1822          * leave mappings at tokenCount and above unset if tokenCount<256
   1823          * because they won't be used
   1824          */
   1825     }
   1826 }
   1827 
   1828 U_CAPI int32_t U_EXPORT2
   1829 uchar_swapNames(const UDataSwapper *ds,
   1830                 const void *inData, int32_t length, void *outData,
   1831                 UErrorCode *pErrorCode) {
   1832     const UDataInfo *pInfo;
   1833     int32_t headerSize;
   1834 
   1835     const uint8_t *inBytes;
   1836     uint8_t *outBytes;
   1837 
   1838     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
   1839              offset, i, count, stringsCount;
   1840 
   1841     const AlgorithmicRange *inRange;
   1842     AlgorithmicRange *outRange;
   1843 
   1844     /* udata_swapDataHeader checks the arguments */
   1845     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1846     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1847         return 0;
   1848     }
   1849 
   1850     /* check data format and format version */
   1851     pInfo=(const UDataInfo *)((const char *)inData+4);
   1852     if(!(
   1853         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
   1854         pInfo->dataFormat[1]==0x6e &&
   1855         pInfo->dataFormat[2]==0x61 &&
   1856         pInfo->dataFormat[3]==0x6d &&
   1857         pInfo->formatVersion[0]==1
   1858     )) {
   1859         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
   1860                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   1861                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   1862                          pInfo->formatVersion[0]);
   1863         *pErrorCode=U_UNSUPPORTED_ERROR;
   1864         return 0;
   1865     }
   1866 
   1867     inBytes=(const uint8_t *)inData+headerSize;
   1868     outBytes=(uint8_t *)outData+headerSize;
   1869     if(length<0) {
   1870         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
   1871     } else {
   1872         length-=headerSize;
   1873         if( length<20 ||
   1874             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
   1875         ) {
   1876             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
   1877                              length);
   1878             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1879             return 0;
   1880         }
   1881     }
   1882 
   1883     if(length<0) {
   1884         /* preflighting: iterate through algorithmic ranges */
   1885         offset=algNamesOffset;
   1886         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
   1887         offset+=4;
   1888 
   1889         for(i=0; i<count; ++i) {
   1890             inRange=(const AlgorithmicRange *)(inBytes+offset);
   1891             offset+=ds->readUInt16(inRange->size);
   1892         }
   1893     } else {
   1894         /* swap data */
   1895         const uint16_t *p;
   1896         uint16_t *q, *temp;
   1897 
   1898         int16_t tokens[512];
   1899         uint16_t tokenCount;
   1900 
   1901         uint8_t map[256], trailMap[256];
   1902 
   1903         /* copy the data for inaccessible bytes */
   1904         if(inBytes!=outBytes) {
   1905             uprv_memcpy(outBytes, inBytes, length);
   1906         }
   1907 
   1908         /* the initial 4 offsets first */
   1909         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
   1910         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
   1911         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
   1912         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
   1913 
   1914         /*
   1915          * now the tokens table
   1916          * it needs to be permutated along with the compressed name strings
   1917          */
   1918         p=(const uint16_t *)(inBytes+16);
   1919         q=(uint16_t *)(outBytes+16);
   1920 
   1921         /* read and swap the tokenCount */
   1922         tokenCount=ds->readUInt16(*p);
   1923         ds->swapArray16(ds, p, 2, q, pErrorCode);
   1924         ++p;
   1925         ++q;
   1926 
   1927         /* read the first 512 tokens and make the token maps */
   1928         if(tokenCount<=512) {
   1929             count=tokenCount;
   1930         } else {
   1931             count=512;
   1932         }
   1933         for(i=0; i<count; ++i) {
   1934             tokens[i]=udata_readInt16(ds, p[i]);
   1935         }
   1936         for(; i<512; ++i) {
   1937             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
   1938         }
   1939         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
   1940         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
   1941         if(U_FAILURE(*pErrorCode)) {
   1942             return 0;
   1943         }
   1944 
   1945         /*
   1946          * swap and permutate the tokens
   1947          * go through a temporary array to support in-place swapping
   1948          */
   1949         temp=(uint16_t *)uprv_malloc(tokenCount*2);
   1950         if(temp==NULL) {
   1951             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
   1952                              tokenCount);
   1953             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1954             return 0;
   1955         }
   1956 
   1957         /* swap and permutate single-/lead-byte tokens */
   1958         for(i=0; i<tokenCount && i<256; ++i) {
   1959             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
   1960         }
   1961 
   1962         /* swap and permutate trail-byte tokens */
   1963         for(; i<tokenCount; ++i) {
   1964             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
   1965         }
   1966 
   1967         /* copy the result into the output and free the temporary array */
   1968         uprv_memcpy(q, temp, tokenCount*2);
   1969         uprv_free(temp);
   1970 
   1971         /*
   1972          * swap the token strings but not a possible padding byte after
   1973          * the terminating NUL of the last string
   1974          */
   1975         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
   1976                                     outBytes+tokenStringOffset, pErrorCode);
   1977         if(U_FAILURE(*pErrorCode)) {
   1978             udata_printError(ds, "uchar_swapNames(token strings) failed\n");
   1979             return 0;
   1980         }
   1981 
   1982         /* swap the group table */
   1983         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
   1984         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
   1985                            outBytes+groupsOffset, pErrorCode);
   1986 
   1987         /*
   1988          * swap the group strings
   1989          * swap the string bytes but not the nibble-encoded string lengths
   1990          */
   1991         if(ds->inCharset!=ds->outCharset) {
   1992             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
   1993 
   1994             const uint8_t *inStrings, *nextInStrings;
   1995             uint8_t *outStrings;
   1996 
   1997             uint8_t c;
   1998 
   1999             inStrings=inBytes+groupStringOffset;
   2000             outStrings=outBytes+groupStringOffset;
   2001 
   2002             stringsCount=algNamesOffset-groupStringOffset;
   2003 
   2004             /* iterate through string groups until only a few padding bytes are left */
   2005             while(stringsCount>32) {
   2006                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
   2007 
   2008                 /* move past the length bytes */
   2009                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
   2010                 outStrings+=nextInStrings-inStrings;
   2011                 inStrings=nextInStrings;
   2012 
   2013                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
   2014                 stringsCount-=count;
   2015 
   2016                 /* swap the string bytes using map[] and trailMap[] */
   2017                 while(count>0) {
   2018                     c=*inStrings++;
   2019                     *outStrings++=map[c];
   2020                     if(tokens[c]!=-2) {
   2021                         --count;
   2022                     } else {
   2023                         /* token lead byte: swap the trail byte, too */
   2024                         *outStrings++=trailMap[*inStrings++];
   2025                         count-=2;
   2026                     }
   2027                 }
   2028             }
   2029         }
   2030 
   2031         /* swap the algorithmic ranges */
   2032         offset=algNamesOffset;
   2033         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
   2034         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
   2035         offset+=4;
   2036 
   2037         for(i=0; i<count; ++i) {
   2038             if(offset>(uint32_t)length) {
   2039                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
   2040                                  length, i);
   2041                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   2042                 return 0;
   2043             }
   2044 
   2045             inRange=(const AlgorithmicRange *)(inBytes+offset);
   2046             outRange=(AlgorithmicRange *)(outBytes+offset);
   2047             offset+=ds->readUInt16(inRange->size);
   2048 
   2049             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
   2050             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
   2051             switch(inRange->type) {
   2052             case 0:
   2053                 /* swap prefix string */
   2054                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
   2055                                     outRange+1, pErrorCode);
   2056                 if(U_FAILURE(*pErrorCode)) {
   2057                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
   2058                                      i);
   2059                     return 0;
   2060                 }
   2061                 break;
   2062             case 1:
   2063                 {
   2064                     /* swap factors and the prefix and factor strings */
   2065                     uint32_t factorsCount;
   2066 
   2067                     factorsCount=inRange->variant;
   2068                     p=(const uint16_t *)(inRange+1);
   2069                     q=(uint16_t *)(outRange+1);
   2070                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
   2071 
   2072                     /* swap the strings, up to the last terminating NUL */
   2073                     p+=factorsCount;
   2074                     q+=factorsCount;
   2075                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
   2076                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
   2077                         --stringsCount;
   2078                     }
   2079                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
   2080                 }
   2081                 break;
   2082             default:
   2083                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
   2084                                  inRange->type, i);
   2085                 *pErrorCode=U_UNSUPPORTED_ERROR;
   2086                 return 0;
   2087             }
   2088         }
   2089     }
   2090 
   2091     return headerSize+(int32_t)offset;
   2092 }
   2093 
   2094 /*
   2095  * Hey, Emacs, please set the following:
   2096  *
   2097  * Local Variables:
   2098  * indent-tabs-mode: nil
   2099  * End:
   2100  *
   2101  */
   2102