Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2014, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  unames.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 1999oct04
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/putil.h"
     19 #include "unicode/uchar.h"
     20 #include "unicode/udata.h"
     21 #include "unicode/utf.h"
     22 #include "unicode/utf16.h"
     23 #include "uassert.h"
     24 #include "ustr_imp.h"
     25 #include "umutex.h"
     26 #include "cmemory.h"
     27 #include "cstring.h"
     28 #include "ucln_cmn.h"
     29 #include "udataswp.h"
     30 #include "uprops.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 /* prototypes ------------------------------------------------------------- */
     35 
     36 static const char DATA_NAME[] = "unames";
     37 static const char DATA_TYPE[] = "icu";
     38 
     39 #define GROUP_SHIFT 5
     40 #define LINES_PER_GROUP (1L<<GROUP_SHIFT)
     41 #define GROUP_MASK (LINES_PER_GROUP-1)
     42 
     43 /*
     44  * This struct was replaced by explicitly accessing equivalent
     45  * fields from triples of uint16_t.
     46  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
     47  * which broke the assumption that sizeof(Group)==6 and that the ++ operator
     48  * would advance by 6 bytes (3 uint16_t).
     49  *
     50  * We can't just change the data structure because it's loaded from a data file,
     51  * and we don't want to make it less compact, so we changed the access code.
     52  *
     53  * For details see ICU tickets 6331 and 6008.
     54 typedef struct {
     55     uint16_t groupMSB,
     56              offsetHigh, offsetLow; / * avoid padding * /
     57 } Group;
     58  */
     59 enum {
     60     GROUP_MSB,
     61     GROUP_OFFSET_HIGH,
     62     GROUP_OFFSET_LOW,
     63     GROUP_LENGTH
     64 };
     65 
     66 /*
     67  * Get the 32-bit group offset.
     68  * @param group (const uint16_t *) pointer to a Group triple of uint16_t
     69  * @return group offset (int32_t)
     70  */
     71 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
     72 
     73 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
     74 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
     75 
     76 typedef struct {
     77     uint32_t start, end;
     78     uint8_t type, variant;
     79     uint16_t size;
     80 } AlgorithmicRange;
     81 
     82 typedef struct {
     83     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
     84 } UCharNames;
     85 
     86 /*
     87  * Get the groups table from a UCharNames struct.
     88  * The groups table consists of one uint16_t groupCount followed by
     89  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
     90  * and the comment for the old struct Group above.
     91  *
     92  * @param names (const UCharNames *) pointer to the UCharNames indexes
     93  * @return (const uint16_t *) pointer to the groups table
     94  */
     95 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
     96 
     97 typedef struct {
     98     const char *otherName;
     99     UChar32 code;
    100 } FindName;
    101 
    102 #define DO_FIND_NAME NULL
    103 
    104 static UDataMemory *uCharNamesData=NULL;
    105 static UCharNames *uCharNames=NULL;
    106 static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
    107 
    108 /*
    109  * Maximum length of character names (regular & 1.0).
    110  */
    111 static int32_t gMaxNameLength=0;
    112 
    113 /*
    114  * Set of chars used in character names (regular & 1.0).
    115  * Chars are platform-dependent (can be EBCDIC).
    116  */
    117 static uint32_t gNameSet[8]={ 0 };
    118 
    119 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
    120 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
    121 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
    122 
    123 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
    124 
    125 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
    126     "unassigned",
    127     "uppercase letter",
    128     "lowercase letter",
    129     "titlecase letter",
    130     "modifier letter",
    131     "other letter",
    132     "non spacing mark",
    133     "enclosing mark",
    134     "combining spacing mark",
    135     "decimal digit number",
    136     "letter number",
    137     "other number",
    138     "space separator",
    139     "line separator",
    140     "paragraph separator",
    141     "control",
    142     "format",
    143     "private use area",
    144     "surrogate",
    145     "dash punctuation",
    146     "start punctuation",
    147     "end punctuation",
    148     "connector punctuation",
    149     "other punctuation",
    150     "math symbol",
    151     "currency symbol",
    152     "modifier symbol",
    153     "other symbol",
    154     "initial punctuation",
    155     "final punctuation",
    156     "noncharacter",
    157     "lead surrogate",
    158     "trail surrogate"
    159 };
    160 
    161 /* implementation ----------------------------------------------------------- */
    162 
    163 static UBool U_CALLCONV unames_cleanup(void)
    164 {
    165     if(uCharNamesData) {
    166         udata_close(uCharNamesData);
    167         uCharNamesData = NULL;
    168     }
    169     if(uCharNames) {
    170         uCharNames = NULL;
    171     }
    172     gCharNamesInitOnce.reset();
    173     gMaxNameLength=0;
    174     return TRUE;
    175 }
    176 
    177 static UBool U_CALLCONV
    178 isAcceptable(void * /*context*/,
    179              const char * /*type*/, const char * /*name*/,
    180              const UDataInfo *pInfo) {
    181     return (UBool)(
    182         pInfo->size>=20 &&
    183         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    184         pInfo->charsetFamily==U_CHARSET_FAMILY &&
    185         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
    186         pInfo->dataFormat[1]==0x6e &&
    187         pInfo->dataFormat[2]==0x61 &&
    188         pInfo->dataFormat[3]==0x6d &&
    189         pInfo->formatVersion[0]==1);
    190 }
    191 
    192 static void U_CALLCONV
    193 loadCharNames(UErrorCode &status) {
    194     U_ASSERT(uCharNamesData == NULL);
    195     U_ASSERT(uCharNames == NULL);
    196 
    197     uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
    198     if(U_FAILURE(status)) {
    199         uCharNamesData = NULL;
    200     } else {
    201         uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
    202     }
    203     ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
    204 }
    205 
    206 
    207 static UBool
    208 isDataLoaded(UErrorCode *pErrorCode) {
    209     umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
    210     return U_SUCCESS(*pErrorCode);
    211 }
    212 
    213 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
    214     if((bufferLength)>0) { \
    215         *(buffer)++=c; \
    216         --(bufferLength); \
    217     } \
    218     ++(bufferPos); \
    219 }
    220 
    221 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
    222 
    223 /*
    224  * Important: expandName() and compareName() are almost the same -
    225  * apply fixes to both.
    226  *
    227  * UnicodeData.txt uses ';' as a field separator, so no
    228  * field can contain ';' as part of its contents.
    229  * In unames.dat, it is marked as token[';']==-1 only if the
    230  * semicolon is used in the data file - which is iff we
    231  * have Unicode 1.0 names or ISO comments or aliases.
    232  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
    233  * although we know that it will never be part of a name.
    234  */
    235 static uint16_t
    236 expandName(UCharNames *names,
    237            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
    238            char *buffer, uint16_t bufferLength) {
    239     uint16_t *tokens=(uint16_t *)names+8;
    240     uint16_t token, tokenCount=*tokens++, bufferPos=0;
    241     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
    242     uint8_t c;
    243 
    244     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    245         /*
    246          * skip the modern name if it is not requested _and_
    247          * if the semicolon byte value is a character, not a token number
    248          */
    249         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    250             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
    251             do {
    252                 while(nameLength>0) {
    253                     --nameLength;
    254                     if(*name++==';') {
    255                         break;
    256                     }
    257                 }
    258             } while(--fieldIndex>0);
    259         } else {
    260             /*
    261              * the semicolon byte value is a token number, therefore
    262              * only modern names are stored in unames.dat and there is no
    263              * such requested alternate name here
    264              */
    265             nameLength=0;
    266         }
    267     }
    268 
    269     /* write each letter directly, and write a token word per token */
    270     while(nameLength>0) {
    271         --nameLength;
    272         c=*name++;
    273 
    274         if(c>=tokenCount) {
    275             if(c!=';') {
    276                 /* implicit letter */
    277                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    278             } else {
    279                 /* finished */
    280                 break;
    281             }
    282         } else {
    283             token=tokens[c];
    284             if(token==(uint16_t)(-2)) {
    285                 /* this is a lead byte for a double-byte token */
    286                 token=tokens[c<<8|*name++];
    287                 --nameLength;
    288             }
    289             if(token==(uint16_t)(-1)) {
    290                 if(c!=';') {
    291                     /* explicit letter */
    292                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    293                 } else {
    294                     /* stop, but skip the semicolon if we are seeking
    295                        extended names and there was no 2.0 name but there
    296                        is a 1.0 name. */
    297                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
    298                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    299                             continue;
    300                         }
    301                     }
    302                     /* finished */
    303                     break;
    304                 }
    305             } else {
    306                 /* write token word */
    307                 uint8_t *tokenString=tokenStrings+token;
    308                 while((c=*tokenString++)!=0) {
    309                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    310                 }
    311             }
    312         }
    313     }
    314 
    315     /* zero-terminate */
    316     if(bufferLength>0) {
    317         *buffer=0;
    318     }
    319 
    320     return bufferPos;
    321 }
    322 
    323 /*
    324  * compareName() is almost the same as expandName() except that it compares
    325  * the currently expanded name to an input name.
    326  * It returns the match/no match result as soon as possible.
    327  */
    328 static UBool
    329 compareName(UCharNames *names,
    330             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
    331             const char *otherName) {
    332     uint16_t *tokens=(uint16_t *)names+8;
    333     uint16_t token, tokenCount=*tokens++;
    334     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
    335     uint8_t c;
    336     const char *origOtherName = otherName;
    337 
    338     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    339         /*
    340          * skip the modern name if it is not requested _and_
    341          * if the semicolon byte value is a character, not a token number
    342          */
    343         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    344             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
    345             do {
    346                 while(nameLength>0) {
    347                     --nameLength;
    348                     if(*name++==';') {
    349                         break;
    350                     }
    351                 }
    352             } while(--fieldIndex>0);
    353         } else {
    354             /*
    355              * the semicolon byte value is a token number, therefore
    356              * only modern names are stored in unames.dat and there is no
    357              * such requested alternate name here
    358              */
    359             nameLength=0;
    360         }
    361     }
    362 
    363     /* compare each letter directly, and compare a token word per token */
    364     while(nameLength>0) {
    365         --nameLength;
    366         c=*name++;
    367 
    368         if(c>=tokenCount) {
    369             if(c!=';') {
    370                 /* implicit letter */
    371                 if((char)c!=*otherName++) {
    372                     return FALSE;
    373                 }
    374             } else {
    375                 /* finished */
    376                 break;
    377             }
    378         } else {
    379             token=tokens[c];
    380             if(token==(uint16_t)(-2)) {
    381                 /* this is a lead byte for a double-byte token */
    382                 token=tokens[c<<8|*name++];
    383                 --nameLength;
    384             }
    385             if(token==(uint16_t)(-1)) {
    386                 if(c!=';') {
    387                     /* explicit letter */
    388                     if((char)c!=*otherName++) {
    389                         return FALSE;
    390                     }
    391                 } else {
    392                     /* stop, but skip the semicolon if we are seeking
    393                        extended names and there was no 2.0 name but there
    394                        is a 1.0 name. */
    395                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
    396                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    397                             continue;
    398                         }
    399                     }
    400                     /* finished */
    401                     break;
    402                 }
    403             } else {
    404                 /* write token word */
    405                 uint8_t *tokenString=tokenStrings+token;
    406                 while((c=*tokenString++)!=0) {
    407                     if((char)c!=*otherName++) {
    408                         return FALSE;
    409                     }
    410                 }
    411             }
    412         }
    413     }
    414 
    415     /* complete match? */
    416     return (UBool)(*otherName==0);
    417 }
    418 
    419 static uint8_t getCharCat(UChar32 cp) {
    420     uint8_t cat;
    421 
    422     if (U_IS_UNICODE_NONCHAR(cp)) {
    423         return U_NONCHARACTER_CODE_POINT;
    424     }
    425 
    426     if ((cat = u_charType(cp)) == U_SURROGATE) {
    427         cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
    428     }
    429 
    430     return cat;
    431 }
    432 
    433 static const char *getCharCatName(UChar32 cp) {
    434     uint8_t cat = getCharCat(cp);
    435 
    436     /* Return unknown if the table of names above is not up to
    437        date. */
    438 
    439     if (cat >= UPRV_LENGTHOF(charCatNames)) {
    440         return "unknown";
    441     } else {
    442         return charCatNames[cat];
    443     }
    444 }
    445 
    446 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
    447     const char *catname = getCharCatName(code);
    448     uint16_t length = 0;
    449 
    450     UChar32 cp;
    451     int ndigits, i;
    452 
    453     WRITE_CHAR(buffer, bufferLength, length, '<');
    454     while (catname[length - 1]) {
    455         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
    456     }
    457     WRITE_CHAR(buffer, bufferLength, length, '-');
    458     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
    459         ;
    460     if (ndigits < 4)
    461         ndigits = 4;
    462     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
    463         uint8_t v = (uint8_t)(cp & 0xf);
    464         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
    465     }
    466     buffer += ndigits;
    467     length += ndigits;
    468     WRITE_CHAR(buffer, bufferLength, length, '>');
    469 
    470     return length;
    471 }
    472 
    473 /*
    474  * getGroup() does a binary search for the group that contains the
    475  * Unicode code point "code".
    476  * The return value is always a valid Group* that may contain "code"
    477  * or else is the highest group before "code".
    478  * If the lowest group is after "code", then that one is returned.
    479  */
    480 static const uint16_t *
    481 getGroup(UCharNames *names, uint32_t code) {
    482     const uint16_t *groups=GET_GROUPS(names);
    483     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
    484              start=0,
    485              limit=*groups++,
    486              number;
    487 
    488     /* binary search for the group of names that contains the one for code */
    489     while(start<limit-1) {
    490         number=(uint16_t)((start+limit)/2);
    491         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
    492             limit=number;
    493         } else {
    494             start=number;
    495         }
    496     }
    497 
    498     /* return this regardless of whether it is an exact match */
    499     return groups+start*GROUP_LENGTH;
    500 }
    501 
    502 /*
    503  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
    504  * expands them into offsets and lengths for each string.
    505  * Lengths are stored with a variable-width encoding in consecutive nibbles:
    506  * If a nibble<0xc, then it is the length itself (0=empty string).
    507  * If a nibble>=0xc, then it forms a length value with the following nibble.
    508  * Calculation see below.
    509  * The offsets and lengths arrays must be at least 33 (one more) long because
    510  * there is no check here at the end if the last nibble is still used.
    511  */
    512 static const uint8_t *
    513 expandGroupLengths(const uint8_t *s,
    514                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
    515     /* read the lengths of the 32 strings in this group and get each string's offset */
    516     uint16_t i=0, offset=0, length=0;
    517     uint8_t lengthByte;
    518 
    519     /* all 32 lengths must be read to get the offset of the first group string */
    520     while(i<LINES_PER_GROUP) {
    521         lengthByte=*s++;
    522 
    523         /* read even nibble - MSBs of lengthByte */
    524         if(length>=12) {
    525             /* double-nibble length spread across two bytes */
    526             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
    527             lengthByte&=0xf;
    528         } else if((lengthByte /* &0xf0 */)>=0xc0) {
    529             /* double-nibble length spread across this one byte */
    530             length=(uint16_t)((lengthByte&0x3f)+12);
    531         } else {
    532             /* single-nibble length in MSBs */
    533             length=(uint16_t)(lengthByte>>4);
    534             lengthByte&=0xf;
    535         }
    536 
    537         *offsets++=offset;
    538         *lengths++=length;
    539 
    540         offset+=length;
    541         ++i;
    542 
    543         /* read odd nibble - LSBs of lengthByte */
    544         if((lengthByte&0xf0)==0) {
    545             /* this nibble was not consumed for a double-nibble length above */
    546             length=lengthByte;
    547             if(length<12) {
    548                 /* single-nibble length in LSBs */
    549                 *offsets++=offset;
    550                 *lengths++=length;
    551 
    552                 offset+=length;
    553                 ++i;
    554             }
    555         } else {
    556             length=0;   /* prevent double-nibble detection in the next iteration */
    557         }
    558     }
    559 
    560     /* now, s is at the first group string */
    561     return s;
    562 }
    563 
    564 static uint16_t
    565 expandGroupName(UCharNames *names, const uint16_t *group,
    566                 uint16_t lineNumber, UCharNameChoice nameChoice,
    567                 char *buffer, uint16_t bufferLength) {
    568     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
    569     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
    570     s=expandGroupLengths(s, offsets, lengths);
    571     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
    572                       buffer, bufferLength);
    573 }
    574 
    575 static uint16_t
    576 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
    577         char *buffer, uint16_t bufferLength) {
    578     const uint16_t *group=getGroup(names, code);
    579     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
    580         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
    581                                buffer, bufferLength);
    582     } else {
    583         /* group not found */
    584         /* zero-terminate */
    585         if(bufferLength>0) {
    586             *buffer=0;
    587         }
    588         return 0;
    589     }
    590 }
    591 
    592 /*
    593  * enumGroupNames() enumerates all the names in a 32-group
    594  * and either calls the enumerator function or finds a given input name.
    595  */
    596 static UBool
    597 enumGroupNames(UCharNames *names, const uint16_t *group,
    598                UChar32 start, UChar32 end,
    599                UEnumCharNamesFn *fn, void *context,
    600                UCharNameChoice nameChoice) {
    601     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
    602     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
    603 
    604     s=expandGroupLengths(s, offsets, lengths);
    605     if(fn!=DO_FIND_NAME) {
    606         char buffer[200];
    607         uint16_t length;
    608 
    609         while(start<=end) {
    610             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
    611             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
    612                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
    613             }
    614             /* here, we assume that the buffer is large enough */
    615             if(length>0) {
    616                 if(!fn(context, start, nameChoice, buffer, length)) {
    617                     return FALSE;
    618                 }
    619             }
    620             ++start;
    621         }
    622     } else {
    623         const char *otherName=((FindName *)context)->otherName;
    624         while(start<=end) {
    625             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
    626                 ((FindName *)context)->code=start;
    627                 return FALSE;
    628             }
    629             ++start;
    630         }
    631     }
    632     return TRUE;
    633 }
    634 
    635 /*
    636  * enumExtNames enumerate extended names.
    637  * It only needs to do it if it is called with a real function and not
    638  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
    639  * for extended names by itself.
    640  */
    641 static UBool
    642 enumExtNames(UChar32 start, UChar32 end,
    643              UEnumCharNamesFn *fn, void *context)
    644 {
    645     if(fn!=DO_FIND_NAME) {
    646         char buffer[200];
    647         uint16_t length;
    648 
    649         while(start<=end) {
    650             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
    651             /* here, we assume that the buffer is large enough */
    652             if(length>0) {
    653                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
    654                     return FALSE;
    655                 }
    656             }
    657             ++start;
    658         }
    659     }
    660 
    661     return TRUE;
    662 }
    663 
    664 static UBool
    665 enumNames(UCharNames *names,
    666           UChar32 start, UChar32 limit,
    667           UEnumCharNamesFn *fn, void *context,
    668           UCharNameChoice nameChoice) {
    669     uint16_t startGroupMSB, endGroupMSB, groupCount;
    670     const uint16_t *group, *groupLimit;
    671 
    672     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
    673     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
    674 
    675     /* find the group that contains start, or the highest before it */
    676     group=getGroup(names, start);
    677 
    678     if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
    679         /* enumerate synthetic names between start and the group start */
    680         UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
    681         if(extLimit>limit) {
    682             extLimit=limit;
    683         }
    684         if(!enumExtNames(start, extLimit-1, fn, context)) {
    685             return FALSE;
    686         }
    687         start=extLimit;
    688     }
    689 
    690     if(startGroupMSB==endGroupMSB) {
    691         if(startGroupMSB==group[GROUP_MSB]) {
    692             /* if start and limit-1 are in the same group, then enumerate only in that one */
    693             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
    694         }
    695     } else {
    696         const uint16_t *groups=GET_GROUPS(names);
    697         groupCount=*groups++;
    698         groupLimit=groups+groupCount*GROUP_LENGTH;
    699 
    700         if(startGroupMSB==group[GROUP_MSB]) {
    701             /* enumerate characters in the partial start group */
    702             if((start&GROUP_MASK)!=0) {
    703                 if(!enumGroupNames(names, group,
    704                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
    705                                    fn, context, nameChoice)) {
    706                     return FALSE;
    707                 }
    708                 group=NEXT_GROUP(group); /* continue with the next group */
    709             }
    710         } else if(startGroupMSB>group[GROUP_MSB]) {
    711             /* make sure that we start enumerating with the first group after start */
    712             const uint16_t *nextGroup=NEXT_GROUP(group);
    713             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
    714                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
    715                 if (end > limit) {
    716                     end = limit;
    717                 }
    718                 if (!enumExtNames(start, end - 1, fn, context)) {
    719                     return FALSE;
    720                 }
    721             }
    722             group=nextGroup;
    723         }
    724 
    725         /* enumerate entire groups between the start- and end-groups */
    726         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
    727             const uint16_t *nextGroup;
    728             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
    729             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
    730                 return FALSE;
    731             }
    732             nextGroup=NEXT_GROUP(group);
    733             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
    734                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
    735                 if (end > limit) {
    736                     end = limit;
    737                 }
    738                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
    739                     return FALSE;
    740                 }
    741             }
    742             group=nextGroup;
    743         }
    744 
    745         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
    746         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
    747             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
    748         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
    749             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
    750             if (next > start) {
    751                 start = next;
    752             }
    753         } else {
    754             return TRUE;
    755         }
    756     }
    757 
    758     /* we have not found a group, which means everything is made of
    759        extended names. */
    760     if (nameChoice == U_EXTENDED_CHAR_NAME) {
    761         if (limit > UCHAR_MAX_VALUE + 1) {
    762             limit = UCHAR_MAX_VALUE + 1;
    763         }
    764         return enumExtNames(start, limit - 1, fn, context);
    765     }
    766 
    767     return TRUE;
    768 }
    769 
    770 static uint16_t
    771 writeFactorSuffix(const uint16_t *factors, uint16_t count,
    772                   const char *s, /* suffix elements */
    773                   uint32_t code,
    774                   uint16_t indexes[8], /* output fields from here */
    775                   const char *elementBases[8], const char *elements[8],
    776                   char *buffer, uint16_t bufferLength) {
    777     uint16_t i, factor, bufferPos=0;
    778     char c;
    779 
    780     /* write elements according to the factors */
    781 
    782     /*
    783      * the factorized elements are determined by modulo arithmetic
    784      * with the factors of this algorithm
    785      *
    786      * note that for fewer operations, count is decremented here
    787      */
    788     --count;
    789     for(i=count; i>0; --i) {
    790         factor=factors[i];
    791         indexes[i]=(uint16_t)(code%factor);
    792         code/=factor;
    793     }
    794     /*
    795      * we don't need to calculate the last modulus because start<=code<=end
    796      * guarantees here that code<=factors[0]
    797      */
    798     indexes[0]=(uint16_t)code;
    799 
    800     /* write each element */
    801     for(;;) {
    802         if(elementBases!=NULL) {
    803             *elementBases++=s;
    804         }
    805 
    806         /* skip indexes[i] strings */
    807         factor=indexes[i];
    808         while(factor>0) {
    809             while(*s++!=0) {}
    810             --factor;
    811         }
    812         if(elements!=NULL) {
    813             *elements++=s;
    814         }
    815 
    816         /* write element */
    817         while((c=*s++)!=0) {
    818             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    819         }
    820 
    821         /* we do not need to perform the rest of this loop for i==count - break here */
    822         if(i>=count) {
    823             break;
    824         }
    825 
    826         /* skip the rest of the strings for this factors[i] */
    827         factor=(uint16_t)(factors[i]-indexes[i]-1);
    828         while(factor>0) {
    829             while(*s++!=0) {}
    830             --factor;
    831         }
    832 
    833         ++i;
    834     }
    835 
    836     /* zero-terminate */
    837     if(bufferLength>0) {
    838         *buffer=0;
    839     }
    840 
    841     return bufferPos;
    842 }
    843 
    844 /*
    845  * Important:
    846  * Parts of findAlgName() are almost the same as some of getAlgName().
    847  * Fixes must be applied to both.
    848  */
    849 static uint16_t
    850 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
    851         char *buffer, uint16_t bufferLength) {
    852     uint16_t bufferPos=0;
    853 
    854     /* Only the normative character name can be algorithmic. */
    855     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    856         /* zero-terminate */
    857         if(bufferLength>0) {
    858             *buffer=0;
    859         }
    860         return 0;
    861     }
    862 
    863     switch(range->type) {
    864     case 0: {
    865         /* name = prefix hex-digits */
    866         const char *s=(const char *)(range+1);
    867         char c;
    868 
    869         uint16_t i, count;
    870 
    871         /* copy prefix */
    872         while((c=*s++)!=0) {
    873             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    874         }
    875 
    876         /* write hexadecimal code point value */
    877         count=range->variant;
    878 
    879         /* zero-terminate */
    880         if(count<bufferLength) {
    881             buffer[count]=0;
    882         }
    883 
    884         for(i=count; i>0;) {
    885             if(--i<bufferLength) {
    886                 c=(char)(code&0xf);
    887                 if(c<10) {
    888                     c+='0';
    889                 } else {
    890                     c+='A'-10;
    891                 }
    892                 buffer[i]=c;
    893             }
    894             code>>=4;
    895         }
    896 
    897         bufferPos+=count;
    898         break;
    899     }
    900     case 1: {
    901         /* name = prefix factorized-elements */
    902         uint16_t indexes[8];
    903         const uint16_t *factors=(const uint16_t *)(range+1);
    904         uint16_t count=range->variant;
    905         const char *s=(const char *)(factors+count);
    906         char c;
    907 
    908         /* copy prefix */
    909         while((c=*s++)!=0) {
    910             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    911         }
    912 
    913         bufferPos+=writeFactorSuffix(factors, count,
    914                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
    915         break;
    916     }
    917     default:
    918         /* undefined type */
    919         /* zero-terminate */
    920         if(bufferLength>0) {
    921             *buffer=0;
    922         }
    923         break;
    924     }
    925 
    926     return bufferPos;
    927 }
    928 
    929 /*
    930  * Important: enumAlgNames() and findAlgName() are almost the same.
    931  * Any fix must be applied to both.
    932  */
    933 static UBool
    934 enumAlgNames(AlgorithmicRange *range,
    935              UChar32 start, UChar32 limit,
    936              UEnumCharNamesFn *fn, void *context,
    937              UCharNameChoice nameChoice) {
    938     char buffer[200];
    939     uint16_t length;
    940 
    941     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    942         return TRUE;
    943     }
    944 
    945     switch(range->type) {
    946     case 0: {
    947         char *s, *end;
    948         char c;
    949 
    950         /* get the full name of the start character */
    951         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
    952         if(length<=0) {
    953             return TRUE;
    954         }
    955 
    956         /* call the enumerator function with this first character */
    957         if(!fn(context, start, nameChoice, buffer, length)) {
    958             return FALSE;
    959         }
    960 
    961         /* go to the end of the name; all these names have the same length */
    962         end=buffer;
    963         while(*end!=0) {
    964             ++end;
    965         }
    966 
    967         /* enumerate the rest of the names */
    968         while(++start<limit) {
    969             /* increment the hexadecimal number on a character-basis */
    970             s=end;
    971             for (;;) {
    972                 c=*--s;
    973                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
    974                     *s=(char)(c+1);
    975                     break;
    976                 } else if(c=='9') {
    977                     *s='A';
    978                     break;
    979                 } else if(c=='F') {
    980                     *s='0';
    981                 }
    982             }
    983 
    984             if(!fn(context, start, nameChoice, buffer, length)) {
    985                 return FALSE;
    986             }
    987         }
    988         break;
    989     }
    990     case 1: {
    991         uint16_t indexes[8];
    992         const char *elementBases[8], *elements[8];
    993         const uint16_t *factors=(const uint16_t *)(range+1);
    994         uint16_t count=range->variant;
    995         const char *s=(const char *)(factors+count);
    996         char *suffix, *t;
    997         uint16_t prefixLength, i, idx;
    998 
    999         char c;
   1000 
   1001         /* name = prefix factorized-elements */
   1002 
   1003         /* copy prefix */
   1004         suffix=buffer;
   1005         prefixLength=0;
   1006         while((c=*s++)!=0) {
   1007             *suffix++=c;
   1008             ++prefixLength;
   1009         }
   1010 
   1011         /* append the suffix of the start character */
   1012         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
   1013                                               s, (uint32_t)start-range->start,
   1014                                               indexes, elementBases, elements,
   1015                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
   1016 
   1017         /* call the enumerator function with this first character */
   1018         if(!fn(context, start, nameChoice, buffer, length)) {
   1019             return FALSE;
   1020         }
   1021 
   1022         /* enumerate the rest of the names */
   1023         while(++start<limit) {
   1024             /* increment the indexes in lexical order bound by the factors */
   1025             i=count;
   1026             for (;;) {
   1027                 idx=(uint16_t)(indexes[--i]+1);
   1028                 if(idx<factors[i]) {
   1029                     /* skip one index and its element string */
   1030                     indexes[i]=idx;
   1031                     s=elements[i];
   1032                     while(*s++!=0) {
   1033                     }
   1034                     elements[i]=s;
   1035                     break;
   1036                 } else {
   1037                     /* reset this index to 0 and its element string to the first one */
   1038                     indexes[i]=0;
   1039                     elements[i]=elementBases[i];
   1040                 }
   1041             }
   1042 
   1043             /* to make matters a little easier, just append all elements to the suffix */
   1044             t=suffix;
   1045             length=prefixLength;
   1046             for(i=0; i<count; ++i) {
   1047                 s=elements[i];
   1048                 while((c=*s++)!=0) {
   1049                     *t++=c;
   1050                     ++length;
   1051                 }
   1052             }
   1053             /* zero-terminate */
   1054             *t=0;
   1055 
   1056             if(!fn(context, start, nameChoice, buffer, length)) {
   1057                 return FALSE;
   1058             }
   1059         }
   1060         break;
   1061     }
   1062     default:
   1063         /* undefined type */
   1064         break;
   1065     }
   1066 
   1067     return TRUE;
   1068 }
   1069 
   1070 /*
   1071  * findAlgName() is almost the same as enumAlgNames() except that it
   1072  * returns the code point for a name if it fits into the range.
   1073  * It returns 0xffff otherwise.
   1074  */
   1075 static UChar32
   1076 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
   1077     UChar32 code;
   1078 
   1079     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
   1080         return 0xffff;
   1081     }
   1082 
   1083     switch(range->type) {
   1084     case 0: {
   1085         /* name = prefix hex-digits */
   1086         const char *s=(const char *)(range+1);
   1087         char c;
   1088 
   1089         uint16_t i, count;
   1090 
   1091         /* compare prefix */
   1092         while((c=*s++)!=0) {
   1093             if((char)c!=*otherName++) {
   1094                 return 0xffff;
   1095             }
   1096         }
   1097 
   1098         /* read hexadecimal code point value */
   1099         count=range->variant;
   1100         code=0;
   1101         for(i=0; i<count; ++i) {
   1102             c=*otherName++;
   1103             if('0'<=c && c<='9') {
   1104                 code=(code<<4)|(c-'0');
   1105             } else if('A'<=c && c<='F') {
   1106                 code=(code<<4)|(c-'A'+10);
   1107             } else {
   1108                 return 0xffff;
   1109             }
   1110         }
   1111 
   1112         /* does it fit into the range? */
   1113         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
   1114             return code;
   1115         }
   1116         break;
   1117     }
   1118     case 1: {
   1119         char buffer[64];
   1120         uint16_t indexes[8];
   1121         const char *elementBases[8], *elements[8];
   1122         const uint16_t *factors=(const uint16_t *)(range+1);
   1123         uint16_t count=range->variant;
   1124         const char *s=(const char *)(factors+count), *t;
   1125         UChar32 start, limit;
   1126         uint16_t i, idx;
   1127 
   1128         char c;
   1129 
   1130         /* name = prefix factorized-elements */
   1131 
   1132         /* compare prefix */
   1133         while((c=*s++)!=0) {
   1134             if((char)c!=*otherName++) {
   1135                 return 0xffff;
   1136             }
   1137         }
   1138 
   1139         start=(UChar32)range->start;
   1140         limit=(UChar32)(range->end+1);
   1141 
   1142         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
   1143         writeFactorSuffix(factors, count, s, 0,
   1144                           indexes, elementBases, elements, buffer, sizeof(buffer));
   1145 
   1146         /* compare the first suffix */
   1147         if(0==uprv_strcmp(otherName, buffer)) {
   1148             return start;
   1149         }
   1150 
   1151         /* enumerate and compare the rest of the suffixes */
   1152         while(++start<limit) {
   1153             /* increment the indexes in lexical order bound by the factors */
   1154             i=count;
   1155             for (;;) {
   1156                 idx=(uint16_t)(indexes[--i]+1);
   1157                 if(idx<factors[i]) {
   1158                     /* skip one index and its element string */
   1159                     indexes[i]=idx;
   1160                     s=elements[i];
   1161                     while(*s++!=0) {}
   1162                     elements[i]=s;
   1163                     break;
   1164                 } else {
   1165                     /* reset this index to 0 and its element string to the first one */
   1166                     indexes[i]=0;
   1167                     elements[i]=elementBases[i];
   1168                 }
   1169             }
   1170 
   1171             /* to make matters a little easier, just compare all elements of the suffix */
   1172             t=otherName;
   1173             for(i=0; i<count; ++i) {
   1174                 s=elements[i];
   1175                 while((c=*s++)!=0) {
   1176                     if(c!=*t++) {
   1177                         s=""; /* does not match */
   1178                         i=99;
   1179                     }
   1180                 }
   1181             }
   1182             if(i<99 && *t==0) {
   1183                 return start;
   1184             }
   1185         }
   1186         break;
   1187     }
   1188     default:
   1189         /* undefined type */
   1190         break;
   1191     }
   1192 
   1193     return 0xffff;
   1194 }
   1195 
   1196 /* sets of name characters, maximum name lengths ---------------------------- */
   1197 
   1198 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
   1199 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
   1200 
   1201 static int32_t
   1202 calcStringSetLength(uint32_t set[8], const char *s) {
   1203     int32_t length=0;
   1204     char c;
   1205 
   1206     while((c=*s++)!=0) {
   1207         SET_ADD(set, c);
   1208         ++length;
   1209     }
   1210     return length;
   1211 }
   1212 
   1213 static int32_t
   1214 calcAlgNameSetsLengths(int32_t maxNameLength) {
   1215     AlgorithmicRange *range;
   1216     uint32_t *p;
   1217     uint32_t rangeCount;
   1218     int32_t length;
   1219 
   1220     /* enumerate algorithmic ranges */
   1221     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1222     rangeCount=*p;
   1223     range=(AlgorithmicRange *)(p+1);
   1224     while(rangeCount>0) {
   1225         switch(range->type) {
   1226         case 0:
   1227             /* name = prefix + (range->variant times) hex-digits */
   1228             /* prefix */
   1229             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
   1230             if(length>maxNameLength) {
   1231                 maxNameLength=length;
   1232             }
   1233             break;
   1234         case 1: {
   1235             /* name = prefix factorized-elements */
   1236             const uint16_t *factors=(const uint16_t *)(range+1);
   1237             const char *s;
   1238             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
   1239 
   1240             /* prefix length */
   1241             s=(const char *)(factors+count);
   1242             length=calcStringSetLength(gNameSet, s);
   1243             s+=length+1; /* start of factor suffixes */
   1244 
   1245             /* get the set and maximum factor suffix length for each factor */
   1246             for(i=0; i<count; ++i) {
   1247                 maxFactorLength=0;
   1248                 for(factor=factors[i]; factor>0; --factor) {
   1249                     factorLength=calcStringSetLength(gNameSet, s);
   1250                     s+=factorLength+1;
   1251                     if(factorLength>maxFactorLength) {
   1252                         maxFactorLength=factorLength;
   1253                     }
   1254                 }
   1255                 length+=maxFactorLength;
   1256             }
   1257 
   1258             if(length>maxNameLength) {
   1259                 maxNameLength=length;
   1260             }
   1261             break;
   1262         }
   1263         default:
   1264             /* unknown type */
   1265             break;
   1266         }
   1267 
   1268         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
   1269         --rangeCount;
   1270     }
   1271     return maxNameLength;
   1272 }
   1273 
   1274 static int32_t
   1275 calcExtNameSetsLengths(int32_t maxNameLength) {
   1276     int32_t i, length;
   1277 
   1278     for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
   1279         /*
   1280          * for each category, count the length of the category name
   1281          * plus 9=
   1282          * 2 for <>
   1283          * 1 for -
   1284          * 6 for most hex digits per code point
   1285          */
   1286         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
   1287         if(length>maxNameLength) {
   1288             maxNameLength=length;
   1289         }
   1290     }
   1291     return maxNameLength;
   1292 }
   1293 
   1294 static int32_t
   1295 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
   1296                   uint32_t set[8],
   1297                   const uint8_t **pLine, const uint8_t *lineLimit) {
   1298     const uint8_t *line=*pLine;
   1299     int32_t length=0, tokenLength;
   1300     uint16_t c, token;
   1301 
   1302     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
   1303         if(c>=tokenCount) {
   1304             /* implicit letter */
   1305             SET_ADD(set, c);
   1306             ++length;
   1307         } else {
   1308             token=tokens[c];
   1309             if(token==(uint16_t)(-2)) {
   1310                 /* this is a lead byte for a double-byte token */
   1311                 c=c<<8|*line++;
   1312                 token=tokens[c];
   1313             }
   1314             if(token==(uint16_t)(-1)) {
   1315                 /* explicit letter */
   1316                 SET_ADD(set, c);
   1317                 ++length;
   1318             } else {
   1319                 /* count token word */
   1320                 if(tokenLengths!=NULL) {
   1321                     /* use cached token length */
   1322                     tokenLength=tokenLengths[c];
   1323                     if(tokenLength==0) {
   1324                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
   1325                         tokenLengths[c]=(int8_t)tokenLength;
   1326                     }
   1327                 } else {
   1328                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
   1329                 }
   1330                 length+=tokenLength;
   1331             }
   1332         }
   1333     }
   1334 
   1335     *pLine=line;
   1336     return length;
   1337 }
   1338 
   1339 static void
   1340 calcGroupNameSetsLengths(int32_t maxNameLength) {
   1341     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
   1342 
   1343     uint16_t *tokens=(uint16_t *)uCharNames+8;
   1344     uint16_t tokenCount=*tokens++;
   1345     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
   1346 
   1347     int8_t *tokenLengths;
   1348 
   1349     const uint16_t *group;
   1350     const uint8_t *s, *line, *lineLimit;
   1351 
   1352     int32_t groupCount, lineNumber, length;
   1353 
   1354     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
   1355     if(tokenLengths!=NULL) {
   1356         uprv_memset(tokenLengths, 0, tokenCount);
   1357     }
   1358 
   1359     group=GET_GROUPS(uCharNames);
   1360     groupCount=*group++;
   1361 
   1362     /* enumerate all groups */
   1363     while(groupCount>0) {
   1364         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
   1365         s=expandGroupLengths(s, offsets, lengths);
   1366 
   1367         /* enumerate all lines in each group */
   1368         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
   1369             line=s+offsets[lineNumber];
   1370             length=lengths[lineNumber];
   1371             if(length==0) {
   1372                 continue;
   1373             }
   1374 
   1375             lineLimit=line+length;
   1376 
   1377             /* read regular name */
   1378             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
   1379             if(length>maxNameLength) {
   1380                 maxNameLength=length;
   1381             }
   1382             if(line==lineLimit) {
   1383                 continue;
   1384             }
   1385 
   1386             /* read Unicode 1.0 name */
   1387             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
   1388             if(length>maxNameLength) {
   1389                 maxNameLength=length;
   1390             }
   1391             if(line==lineLimit) {
   1392                 continue;
   1393             }
   1394 
   1395             /* read ISO comment */
   1396             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
   1397         }
   1398 
   1399         group=NEXT_GROUP(group);
   1400         --groupCount;
   1401     }
   1402 
   1403     if(tokenLengths!=NULL) {
   1404         uprv_free(tokenLengths);
   1405     }
   1406 
   1407     /* set gMax... - name length last for threading */
   1408     gMaxNameLength=maxNameLength;
   1409 }
   1410 
   1411 static UBool
   1412 calcNameSetsLengths(UErrorCode *pErrorCode) {
   1413     static const char extChars[]="0123456789ABCDEF<>-";
   1414     int32_t i, maxNameLength;
   1415 
   1416     if(gMaxNameLength!=0) {
   1417         return TRUE;
   1418     }
   1419 
   1420     if(!isDataLoaded(pErrorCode)) {
   1421         return FALSE;
   1422     }
   1423 
   1424     /* set hex digits, used in various names, and <>-, used in extended names */
   1425     for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
   1426         SET_ADD(gNameSet, extChars[i]);
   1427     }
   1428 
   1429     /* set sets and lengths from algorithmic names */
   1430     maxNameLength=calcAlgNameSetsLengths(0);
   1431 
   1432     /* set sets and lengths from extended names */
   1433     maxNameLength=calcExtNameSetsLengths(maxNameLength);
   1434 
   1435     /* set sets and lengths from group names, set global maximum values */
   1436     calcGroupNameSetsLengths(maxNameLength);
   1437 
   1438     return TRUE;
   1439 }
   1440 
   1441 U_NAMESPACE_END
   1442 
   1443 /* public API --------------------------------------------------------------- */
   1444 
   1445 U_NAMESPACE_USE
   1446 
   1447 U_CAPI int32_t U_EXPORT2
   1448 u_charName(UChar32 code, UCharNameChoice nameChoice,
   1449            char *buffer, int32_t bufferLength,
   1450            UErrorCode *pErrorCode) {
   1451      AlgorithmicRange *algRange;
   1452     uint32_t *p;
   1453     uint32_t i;
   1454     int32_t length;
   1455 
   1456     /* check the argument values */
   1457     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1458         return 0;
   1459     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
   1460               bufferLength<0 || (bufferLength>0 && buffer==NULL)
   1461     ) {
   1462         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1463         return 0;
   1464     }
   1465 
   1466     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
   1467         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
   1468     }
   1469 
   1470     length=0;
   1471 
   1472     /* try algorithmic names first */
   1473     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1474     i=*p;
   1475     algRange=(AlgorithmicRange *)(p+1);
   1476     while(i>0) {
   1477         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
   1478             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
   1479             break;
   1480         }
   1481         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1482         --i;
   1483     }
   1484 
   1485     if(i==0) {
   1486         if (nameChoice == U_EXTENDED_CHAR_NAME) {
   1487             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
   1488             if (!length) {
   1489                 /* extended character name */
   1490                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
   1491             }
   1492         } else {
   1493             /* normal character name */
   1494             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
   1495         }
   1496     }
   1497 
   1498     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
   1499 }
   1500 
   1501 U_CAPI int32_t U_EXPORT2
   1502 u_getISOComment(UChar32 /*c*/,
   1503                 char *dest, int32_t destCapacity,
   1504                 UErrorCode *pErrorCode) {
   1505     /* check the argument values */
   1506     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1507         return 0;
   1508     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
   1509         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1510         return 0;
   1511     }
   1512 
   1513     return u_terminateChars(dest, destCapacity, 0, pErrorCode);
   1514 }
   1515 
   1516 U_CAPI UChar32 U_EXPORT2
   1517 u_charFromName(UCharNameChoice nameChoice,
   1518                const char *name,
   1519                UErrorCode *pErrorCode) {
   1520     char upper[120], lower[120];
   1521     FindName findName;
   1522     AlgorithmicRange *algRange;
   1523     uint32_t *p;
   1524     uint32_t i;
   1525     UChar32 cp = 0;
   1526     char c0;
   1527     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
   1528 
   1529     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1530         return error;
   1531     }
   1532 
   1533     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
   1534         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1535         return error;
   1536     }
   1537 
   1538     if(!isDataLoaded(pErrorCode)) {
   1539         return error;
   1540     }
   1541 
   1542     /* construct the uppercase and lowercase of the name first */
   1543     for(i=0; i<sizeof(upper); ++i) {
   1544         if((c0=*name++)!=0) {
   1545             upper[i]=uprv_toupper(c0);
   1546             lower[i]=uprv_tolower(c0);
   1547         } else {
   1548             upper[i]=lower[i]=0;
   1549             break;
   1550         }
   1551     }
   1552     if(i==sizeof(upper)) {
   1553         /* name too long, there is no such character */
   1554         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1555         return error;
   1556     }
   1557     // i==strlen(name)==strlen(lower)==strlen(upper)
   1558 
   1559     /* try extended names first */
   1560     if (lower[0] == '<') {
   1561         if (nameChoice == U_EXTENDED_CHAR_NAME) {
   1562             // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
   1563             if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
   1564                 while (i >= 3 && lower[--i] != '-') {}
   1565 
   1566                 if (i >= 2 && lower[i] == '-') {
   1567                     uint32_t cIdx;
   1568 
   1569                     lower[i] = 0;
   1570 
   1571                     for (++i; lower[i] != '>'; ++i) {
   1572                         if (lower[i] >= '0' && lower[i] <= '9') {
   1573                             cp = (cp << 4) + lower[i] - '0';
   1574                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
   1575                             cp = (cp << 4) + lower[i] - 'a' + 10;
   1576                         } else {
   1577                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1578                             return error;
   1579                         }
   1580                     }
   1581 
   1582                     /* Now validate the category name.
   1583                        We could use a binary search, or a trie, if
   1584                        we really wanted to. */
   1585 
   1586                     for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
   1587 
   1588                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
   1589                             if (getCharCat(cp) == cIdx) {
   1590                                 return cp;
   1591                             }
   1592                             break;
   1593                         }
   1594                     }
   1595                 }
   1596             }
   1597         }
   1598 
   1599         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1600         return error;
   1601     }
   1602 
   1603     /* try algorithmic names now */
   1604     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1605     i=*p;
   1606     algRange=(AlgorithmicRange *)(p+1);
   1607     while(i>0) {
   1608         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
   1609             return cp;
   1610         }
   1611         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1612         --i;
   1613     }
   1614 
   1615     /* normal character name */
   1616     findName.otherName=upper;
   1617     findName.code=error;
   1618     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
   1619     if (findName.code == error) {
   1620          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1621     }
   1622     return findName.code;
   1623 }
   1624 
   1625 U_CAPI void U_EXPORT2
   1626 u_enumCharNames(UChar32 start, UChar32 limit,
   1627                 UEnumCharNamesFn *fn,
   1628                 void *context,
   1629                 UCharNameChoice nameChoice,
   1630                 UErrorCode *pErrorCode) {
   1631     AlgorithmicRange *algRange;
   1632     uint32_t *p;
   1633     uint32_t i;
   1634 
   1635     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1636         return;
   1637     }
   1638 
   1639     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
   1640         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1641         return;
   1642     }
   1643 
   1644     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
   1645         limit = UCHAR_MAX_VALUE + 1;
   1646     }
   1647     if((uint32_t)start>=(uint32_t)limit) {
   1648         return;
   1649     }
   1650 
   1651     if(!isDataLoaded(pErrorCode)) {
   1652         return;
   1653     }
   1654 
   1655     /* interleave the data-driven ones with the algorithmic ones */
   1656     /* iterate over all algorithmic ranges; assume that they are in ascending order */
   1657     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1658     i=*p;
   1659     algRange=(AlgorithmicRange *)(p+1);
   1660     while(i>0) {
   1661         /* enumerate the character names before the current algorithmic range */
   1662         /* here: start<limit */
   1663         if((uint32_t)start<algRange->start) {
   1664             if((uint32_t)limit<=algRange->start) {
   1665                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
   1666                 return;
   1667             }
   1668             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
   1669                 return;
   1670             }
   1671             start=(UChar32)algRange->start;
   1672         }
   1673         /* enumerate the character names in the current algorithmic range */
   1674         /* here: algRange->start<=start<limit */
   1675         if((uint32_t)start<=algRange->end) {
   1676             if((uint32_t)limit<=(algRange->end+1)) {
   1677                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
   1678                 return;
   1679             }
   1680             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
   1681                 return;
   1682             }
   1683             start=(UChar32)algRange->end+1;
   1684         }
   1685         /* continue to the next algorithmic range (here: start<limit) */
   1686         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1687         --i;
   1688     }
   1689     /* enumerate the character names after the last algorithmic range */
   1690     enumNames(uCharNames, start, limit, fn, context, nameChoice);
   1691 }
   1692 
   1693 U_CAPI int32_t U_EXPORT2
   1694 uprv_getMaxCharNameLength() {
   1695     UErrorCode errorCode=U_ZERO_ERROR;
   1696     if(calcNameSetsLengths(&errorCode)) {
   1697         return gMaxNameLength;
   1698     } else {
   1699         return 0;
   1700     }
   1701 }
   1702 
   1703 /**
   1704  * Converts the char set cset into a Unicode set uset.
   1705  * @param cset Set of 256 bit flags corresponding to a set of chars.
   1706  * @param uset USet to receive characters. Existing contents are deleted.
   1707  */
   1708 static void
   1709 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
   1710     UChar us[256];
   1711     char cs[256];
   1712 
   1713     int32_t i, length;
   1714     UErrorCode errorCode;
   1715 
   1716     errorCode=U_ZERO_ERROR;
   1717 
   1718     if(!calcNameSetsLengths(&errorCode)) {
   1719         return;
   1720     }
   1721 
   1722     /* build a char string with all chars that are used in character names */
   1723     length=0;
   1724     for(i=0; i<256; ++i) {
   1725         if(SET_CONTAINS(cset, i)) {
   1726             cs[length++]=(char)i;
   1727         }
   1728     }
   1729 
   1730     /* convert the char string to a UChar string */
   1731     u_charsToUChars(cs, us, length);
   1732 
   1733     /* add each UChar to the USet */
   1734     for(i=0; i<length; ++i) {
   1735         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
   1736             sa->add(sa->set, us[i]);
   1737         }
   1738     }
   1739 }
   1740 
   1741 /**
   1742  * Fills set with characters that are used in Unicode character names.
   1743  * @param set USet to receive characters.
   1744  */
   1745 U_CAPI void U_EXPORT2
   1746 uprv_getCharNameCharacters(const USetAdder *sa) {
   1747     charSetToUSet(gNameSet, sa);
   1748 }
   1749 
   1750 /* data swapping ------------------------------------------------------------ */
   1751 
   1752 /*
   1753  * The token table contains non-negative entries for token bytes,
   1754  * and -1 for bytes that represent themselves in the data file's charset.
   1755  * -2 entries are used for lead bytes.
   1756  *
   1757  * Direct bytes (-1 entries) must be translated from the input charset family
   1758  * to the output charset family.
   1759  * makeTokenMap() writes a permutation mapping for this.
   1760  * Use it once for single-/lead-byte tokens and once more for all trail byte
   1761  * tokens. (';' is an unused trail byte marked with -1.)
   1762  */
   1763 static void
   1764 makeTokenMap(const UDataSwapper *ds,
   1765              int16_t tokens[], uint16_t tokenCount,
   1766              uint8_t map[256],
   1767              UErrorCode *pErrorCode) {
   1768     UBool usedOutChar[256];
   1769     uint16_t i, j;
   1770     uint8_t c1, c2;
   1771 
   1772     if(U_FAILURE(*pErrorCode)) {
   1773         return;
   1774     }
   1775 
   1776     if(ds->inCharset==ds->outCharset) {
   1777         /* Same charset family: identity permutation */
   1778         for(i=0; i<256; ++i) {
   1779             map[i]=(uint8_t)i;
   1780         }
   1781     } else {
   1782         uprv_memset(map, 0, 256);
   1783         uprv_memset(usedOutChar, 0, 256);
   1784 
   1785         if(tokenCount>256) {
   1786             tokenCount=256;
   1787         }
   1788 
   1789         /* set the direct bytes (byte 0 always maps to itself) */
   1790         for(i=1; i<tokenCount; ++i) {
   1791             if(tokens[i]==-1) {
   1792                 /* convert the direct byte character */
   1793                 c1=(uint8_t)i;
   1794                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
   1795                 if(U_FAILURE(*pErrorCode)) {
   1796                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
   1797                                      i, ds->inCharset);
   1798                     return;
   1799                 }
   1800 
   1801                 /* enter the converted character into the map and mark it used */
   1802                 map[c1]=c2;
   1803                 usedOutChar[c2]=TRUE;
   1804             }
   1805         }
   1806 
   1807         /* set the mappings for the rest of the permutation */
   1808         for(i=j=1; i<tokenCount; ++i) {
   1809             /* set mappings that were not set for direct bytes */
   1810             if(map[i]==0) {
   1811                 /* set an output byte value that was not used as an output byte above */
   1812                 while(usedOutChar[j]) {
   1813                     ++j;
   1814                 }
   1815                 map[i]=(uint8_t)j++;
   1816             }
   1817         }
   1818 
   1819         /*
   1820          * leave mappings at tokenCount and above unset if tokenCount<256
   1821          * because they won't be used
   1822          */
   1823     }
   1824 }
   1825 
   1826 U_CAPI int32_t U_EXPORT2
   1827 uchar_swapNames(const UDataSwapper *ds,
   1828                 const void *inData, int32_t length, void *outData,
   1829                 UErrorCode *pErrorCode) {
   1830     const UDataInfo *pInfo;
   1831     int32_t headerSize;
   1832 
   1833     const uint8_t *inBytes;
   1834     uint8_t *outBytes;
   1835 
   1836     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
   1837              offset, i, count, stringsCount;
   1838 
   1839     const AlgorithmicRange *inRange;
   1840     AlgorithmicRange *outRange;
   1841 
   1842     /* udata_swapDataHeader checks the arguments */
   1843     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1844     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1845         return 0;
   1846     }
   1847 
   1848     /* check data format and format version */
   1849     pInfo=(const UDataInfo *)((const char *)inData+4);
   1850     if(!(
   1851         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
   1852         pInfo->dataFormat[1]==0x6e &&
   1853         pInfo->dataFormat[2]==0x61 &&
   1854         pInfo->dataFormat[3]==0x6d &&
   1855         pInfo->formatVersion[0]==1
   1856     )) {
   1857         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
   1858                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   1859                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   1860                          pInfo->formatVersion[0]);
   1861         *pErrorCode=U_UNSUPPORTED_ERROR;
   1862         return 0;
   1863     }
   1864 
   1865     inBytes=(const uint8_t *)inData+headerSize;
   1866     outBytes=(uint8_t *)outData+headerSize;
   1867     if(length<0) {
   1868         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
   1869     } else {
   1870         length-=headerSize;
   1871         if( length<20 ||
   1872             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
   1873         ) {
   1874             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
   1875                              length);
   1876             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1877             return 0;
   1878         }
   1879     }
   1880 
   1881     if(length<0) {
   1882         /* preflighting: iterate through algorithmic ranges */
   1883         offset=algNamesOffset;
   1884         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
   1885         offset+=4;
   1886 
   1887         for(i=0; i<count; ++i) {
   1888             inRange=(const AlgorithmicRange *)(inBytes+offset);
   1889             offset+=ds->readUInt16(inRange->size);
   1890         }
   1891     } else {
   1892         /* swap data */
   1893         const uint16_t *p;
   1894         uint16_t *q, *temp;
   1895 
   1896         int16_t tokens[512];
   1897         uint16_t tokenCount;
   1898 
   1899         uint8_t map[256], trailMap[256];
   1900 
   1901         /* copy the data for inaccessible bytes */
   1902         if(inBytes!=outBytes) {
   1903             uprv_memcpy(outBytes, inBytes, length);
   1904         }
   1905 
   1906         /* the initial 4 offsets first */
   1907         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
   1908         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
   1909         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
   1910         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
   1911 
   1912         /*
   1913          * now the tokens table
   1914          * it needs to be permutated along with the compressed name strings
   1915          */
   1916         p=(const uint16_t *)(inBytes+16);
   1917         q=(uint16_t *)(outBytes+16);
   1918 
   1919         /* read and swap the tokenCount */
   1920         tokenCount=ds->readUInt16(*p);
   1921         ds->swapArray16(ds, p, 2, q, pErrorCode);
   1922         ++p;
   1923         ++q;
   1924 
   1925         /* read the first 512 tokens and make the token maps */
   1926         if(tokenCount<=512) {
   1927             count=tokenCount;
   1928         } else {
   1929             count=512;
   1930         }
   1931         for(i=0; i<count; ++i) {
   1932             tokens[i]=udata_readInt16(ds, p[i]);
   1933         }
   1934         for(; i<512; ++i) {
   1935             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
   1936         }
   1937         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
   1938         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
   1939         if(U_FAILURE(*pErrorCode)) {
   1940             return 0;
   1941         }
   1942 
   1943         /*
   1944          * swap and permutate the tokens
   1945          * go through a temporary array to support in-place swapping
   1946          */
   1947         temp=(uint16_t *)uprv_malloc(tokenCount*2);
   1948         if(temp==NULL) {
   1949             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
   1950                              tokenCount);
   1951             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1952             return 0;
   1953         }
   1954 
   1955         /* swap and permutate single-/lead-byte tokens */
   1956         for(i=0; i<tokenCount && i<256; ++i) {
   1957             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
   1958         }
   1959 
   1960         /* swap and permutate trail-byte tokens */
   1961         for(; i<tokenCount; ++i) {
   1962             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
   1963         }
   1964 
   1965         /* copy the result into the output and free the temporary array */
   1966         uprv_memcpy(q, temp, tokenCount*2);
   1967         uprv_free(temp);
   1968 
   1969         /*
   1970          * swap the token strings but not a possible padding byte after
   1971          * the terminating NUL of the last string
   1972          */
   1973         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
   1974                                     outBytes+tokenStringOffset, pErrorCode);
   1975         if(U_FAILURE(*pErrorCode)) {
   1976             udata_printError(ds, "uchar_swapNames(token strings) failed\n");
   1977             return 0;
   1978         }
   1979 
   1980         /* swap the group table */
   1981         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
   1982         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
   1983                            outBytes+groupsOffset, pErrorCode);
   1984 
   1985         /*
   1986          * swap the group strings
   1987          * swap the string bytes but not the nibble-encoded string lengths
   1988          */
   1989         if(ds->inCharset!=ds->outCharset) {
   1990             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
   1991 
   1992             const uint8_t *inStrings, *nextInStrings;
   1993             uint8_t *outStrings;
   1994 
   1995             uint8_t c;
   1996 
   1997             inStrings=inBytes+groupStringOffset;
   1998             outStrings=outBytes+groupStringOffset;
   1999 
   2000             stringsCount=algNamesOffset-groupStringOffset;
   2001 
   2002             /* iterate through string groups until only a few padding bytes are left */
   2003             while(stringsCount>32) {
   2004                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
   2005 
   2006                 /* move past the length bytes */
   2007                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
   2008                 outStrings+=nextInStrings-inStrings;
   2009                 inStrings=nextInStrings;
   2010 
   2011                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
   2012                 stringsCount-=count;
   2013 
   2014                 /* swap the string bytes using map[] and trailMap[] */
   2015                 while(count>0) {
   2016                     c=*inStrings++;
   2017                     *outStrings++=map[c];
   2018                     if(tokens[c]!=-2) {
   2019                         --count;
   2020                     } else {
   2021                         /* token lead byte: swap the trail byte, too */
   2022                         *outStrings++=trailMap[*inStrings++];
   2023                         count-=2;
   2024                     }
   2025                 }
   2026             }
   2027         }
   2028 
   2029         /* swap the algorithmic ranges */
   2030         offset=algNamesOffset;
   2031         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
   2032         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
   2033         offset+=4;
   2034 
   2035         for(i=0; i<count; ++i) {
   2036             if(offset>(uint32_t)length) {
   2037                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
   2038                                  length, i);
   2039                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   2040                 return 0;
   2041             }
   2042 
   2043             inRange=(const AlgorithmicRange *)(inBytes+offset);
   2044             outRange=(AlgorithmicRange *)(outBytes+offset);
   2045             offset+=ds->readUInt16(inRange->size);
   2046 
   2047             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
   2048             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
   2049             switch(inRange->type) {
   2050             case 0:
   2051                 /* swap prefix string */
   2052                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
   2053                                     outRange+1, pErrorCode);
   2054                 if(U_FAILURE(*pErrorCode)) {
   2055                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
   2056                                      i);
   2057                     return 0;
   2058                 }
   2059                 break;
   2060             case 1:
   2061                 {
   2062                     /* swap factors and the prefix and factor strings */
   2063                     uint32_t factorsCount;
   2064 
   2065                     factorsCount=inRange->variant;
   2066                     p=(const uint16_t *)(inRange+1);
   2067                     q=(uint16_t *)(outRange+1);
   2068                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
   2069 
   2070                     /* swap the strings, up to the last terminating NUL */
   2071                     p+=factorsCount;
   2072                     q+=factorsCount;
   2073                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
   2074                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
   2075                         --stringsCount;
   2076                     }
   2077                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
   2078                 }
   2079                 break;
   2080             default:
   2081                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
   2082                                  inRange->type, i);
   2083                 *pErrorCode=U_UNSUPPORTED_ERROR;
   2084                 return 0;
   2085             }
   2086         }
   2087     }
   2088 
   2089     return headerSize+(int32_t)offset;
   2090 }
   2091 
   2092 /*
   2093  * Hey, Emacs, please set the following:
   2094  *
   2095  * Local Variables:
   2096  * indent-tabs-mode: nil
   2097  * End:
   2098  *
   2099  */
   2100