Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *   file name:  unames.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 1999oct04
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/putil.h"
     19 #include "unicode/uchar.h"
     20 #include "unicode/udata.h"
     21 #include "ustr_imp.h"
     22 #include "umutex.h"
     23 #include "cmemory.h"
     24 #include "cstring.h"
     25 #include "ucln_cmn.h"
     26 #include "udataswp.h"
     27 #include "uprops.h"
     28 
     29 /* prototypes ------------------------------------------------------------- */
     30 
     31 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
     32 
     33 static const char DATA_NAME[] = "unames";
     34 static const char DATA_TYPE[] = "icu";
     35 
     36 #define GROUP_SHIFT 5
     37 #define LINES_PER_GROUP (1UL<<GROUP_SHIFT)
     38 #define GROUP_MASK (LINES_PER_GROUP-1)
     39 
     40 /*
     41  * This struct was replaced by explicitly accessing equivalent
     42  * fields from triples of uint16_t.
     43  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
     44  * which broke the assumption that sizeof(Group)==6 and that the ++ operator
     45  * would advance by 6 bytes (3 uint16_t).
     46  *
     47  * We can't just change the data structure because it's loaded from a data file,
     48  * and we don't want to make it less compact, so we changed the access code.
     49  *
     50  * For details see ICU tickets 6331 and 6008.
     51 typedef struct {
     52     uint16_t groupMSB,
     53              offsetHigh, offsetLow; / * avoid padding * /
     54 } Group;
     55  */
     56 enum {
     57     GROUP_MSB,
     58     GROUP_OFFSET_HIGH,
     59     GROUP_OFFSET_LOW,
     60     GROUP_LENGTH
     61 };
     62 
     63 /*
     64  * Get the 32-bit group offset.
     65  * @param group (const uint16_t *) pointer to a Group triple of uint16_t
     66  * @return group offset (int32_t)
     67  */
     68 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
     69 
     70 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
     71 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
     72 
     73 typedef struct {
     74     uint32_t start, end;
     75     uint8_t type, variant;
     76     uint16_t size;
     77 } AlgorithmicRange;
     78 
     79 typedef struct {
     80     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
     81 } UCharNames;
     82 
     83 /*
     84  * Get the groups table from a UCharNames struct.
     85  * The groups table consists of one uint16_t groupCount followed by
     86  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
     87  * and the comment for the old struct Group above.
     88  *
     89  * @param names (const UCharNames *) pointer to the UCharNames indexes
     90  * @return (const uint16_t *) pointer to the groups table
     91  */
     92 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
     93 
     94 typedef struct {
     95     const char *otherName;
     96     UChar32 code;
     97 } FindName;
     98 
     99 #define DO_FIND_NAME NULL
    100 
    101 static UDataMemory *uCharNamesData=NULL;
    102 static UCharNames *uCharNames=NULL;
    103 static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
    104 
    105 /*
    106  * Maximum length of character names (regular & 1.0).
    107  */
    108 static int32_t gMaxNameLength=0;
    109 
    110 /*
    111  * Set of chars used in character names (regular & 1.0).
    112  * Chars are platform-dependent (can be EBCDIC).
    113  */
    114 static uint32_t gNameSet[8]={ 0 };
    115 
    116 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
    117 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
    118 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
    119 
    120 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
    121 
    122 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
    123     "unassigned",
    124     "uppercase letter",
    125     "lowercase letter",
    126     "titlecase letter",
    127     "modifier letter",
    128     "other letter",
    129     "non spacing mark",
    130     "enclosing mark",
    131     "combining spacing mark",
    132     "decimal digit number",
    133     "letter number",
    134     "other number",
    135     "space separator",
    136     "line separator",
    137     "paragraph separator",
    138     "control",
    139     "format",
    140     "private use area",
    141     "surrogate",
    142     "dash punctuation",
    143     "start punctuation",
    144     "end punctuation",
    145     "connector punctuation",
    146     "other punctuation",
    147     "math symbol",
    148     "currency symbol",
    149     "modifier symbol",
    150     "other symbol",
    151     "initial punctuation",
    152     "final punctuation",
    153     "noncharacter",
    154     "lead surrogate",
    155     "trail surrogate"
    156 };
    157 
    158 /* implementation ----------------------------------------------------------- */
    159 
    160 static UBool U_CALLCONV unames_cleanup(void)
    161 {
    162     if(uCharNamesData) {
    163         udata_close(uCharNamesData);
    164         uCharNamesData = NULL;
    165     }
    166     if(uCharNames) {
    167         uCharNames = NULL;
    168     }
    169     gMaxNameLength=0;
    170     return TRUE;
    171 }
    172 
    173 static UBool U_CALLCONV
    174 isAcceptable(void *context,
    175              const char *type, const char *name,
    176              const UDataInfo *pInfo) {
    177     return (UBool)(
    178         pInfo->size>=20 &&
    179         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    180         pInfo->charsetFamily==U_CHARSET_FAMILY &&
    181         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
    182         pInfo->dataFormat[1]==0x6e &&
    183         pInfo->dataFormat[2]==0x61 &&
    184         pInfo->dataFormat[3]==0x6d &&
    185         pInfo->formatVersion[0]==1);
    186 }
    187 
    188 static UBool
    189 isDataLoaded(UErrorCode *pErrorCode) {
    190     /* load UCharNames from file if necessary */
    191     UBool isCached;
    192 
    193     /* do this because double-checked locking is broken */
    194     UMTX_CHECK(NULL, (uCharNames!=NULL), isCached);
    195 
    196     if(!isCached) {
    197         UCharNames *names;
    198         UDataMemory *data;
    199 
    200         /* check error code from previous attempt */
    201         if(U_FAILURE(gLoadErrorCode)) {
    202             *pErrorCode=gLoadErrorCode;
    203             return FALSE;
    204         }
    205 
    206         /* open the data outside the mutex block */
    207         data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
    208         if(U_FAILURE(*pErrorCode)) {
    209             gLoadErrorCode=*pErrorCode;
    210             return FALSE;
    211         }
    212 
    213         names=(UCharNames *)udata_getMemory(data);
    214 
    215         /* in the mutex block, set the data for this process */
    216         {
    217             umtx_lock(NULL);
    218             if(uCharNames==NULL) {
    219                 uCharNamesData=data;
    220                 uCharNames=names;
    221                 data=NULL;
    222                 names=NULL;
    223                 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
    224             }
    225             umtx_unlock(NULL);
    226         }
    227 
    228         /* if a different thread set it first, then close the extra data */
    229         if(data!=NULL) {
    230             udata_close(data); /* NULL if it was set correctly */
    231         }
    232     }
    233     return TRUE;
    234 }
    235 
    236 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
    237     if((bufferLength)>0) { \
    238         *(buffer)++=c; \
    239         --(bufferLength); \
    240     } \
    241     ++(bufferPos); \
    242 }
    243 
    244 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
    245 
    246 /*
    247  * Important: expandName() and compareName() are almost the same -
    248  * apply fixes to both.
    249  *
    250  * UnicodeData.txt uses ';' as a field separator, so no
    251  * field can contain ';' as part of its contents.
    252  * In unames.dat, it is marked as token[';']==-1 only if the
    253  * semicolon is used in the data file - which is iff we
    254  * have Unicode 1.0 names or ISO comments or aliases.
    255  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
    256  * although we know that it will never be part of a name.
    257  */
    258 static uint16_t
    259 expandName(UCharNames *names,
    260            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
    261            char *buffer, uint16_t bufferLength) {
    262     uint16_t *tokens=(uint16_t *)names+8;
    263     uint16_t token, tokenCount=*tokens++, bufferPos=0;
    264     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
    265     uint8_t c;
    266 
    267     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    268         /*
    269          * skip the modern name if it is not requested _and_
    270          * if the semicolon byte value is a character, not a token number
    271          */
    272         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    273             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
    274             do {
    275                 while(nameLength>0) {
    276                     --nameLength;
    277                     if(*name++==';') {
    278                         break;
    279                     }
    280                 }
    281             } while(--fieldIndex>0);
    282         } else {
    283             /*
    284              * the semicolon byte value is a token number, therefore
    285              * only modern names are stored in unames.dat and there is no
    286              * such requested alternate name here
    287              */
    288             nameLength=0;
    289         }
    290     }
    291 
    292     /* write each letter directly, and write a token word per token */
    293     while(nameLength>0) {
    294         --nameLength;
    295         c=*name++;
    296 
    297         if(c>=tokenCount) {
    298             if(c!=';') {
    299                 /* implicit letter */
    300                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    301             } else {
    302                 /* finished */
    303                 break;
    304             }
    305         } else {
    306             token=tokens[c];
    307             if(token==(uint16_t)(-2)) {
    308                 /* this is a lead byte for a double-byte token */
    309                 token=tokens[c<<8|*name++];
    310                 --nameLength;
    311             }
    312             if(token==(uint16_t)(-1)) {
    313                 if(c!=';') {
    314                     /* explicit letter */
    315                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    316                 } else {
    317                     /* stop, but skip the semicolon if we are seeking
    318                        extended names and there was no 2.0 name but there
    319                        is a 1.0 name. */
    320                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
    321                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    322                             continue;
    323                         }
    324                     }
    325                     /* finished */
    326                     break;
    327                 }
    328             } else {
    329                 /* write token word */
    330                 uint8_t *tokenString=tokenStrings+token;
    331                 while((c=*tokenString++)!=0) {
    332                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    333                 }
    334             }
    335         }
    336     }
    337 
    338     /* zero-terminate */
    339     if(bufferLength>0) {
    340         *buffer=0;
    341     }
    342 
    343     return bufferPos;
    344 }
    345 
    346 /*
    347  * compareName() is almost the same as expandName() except that it compares
    348  * the currently expanded name to an input name.
    349  * It returns the match/no match result as soon as possible.
    350  */
    351 static UBool
    352 compareName(UCharNames *names,
    353             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
    354             const char *otherName) {
    355     uint16_t *tokens=(uint16_t *)names+8;
    356     uint16_t token, tokenCount=*tokens++;
    357     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
    358     uint8_t c;
    359     const char *origOtherName = otherName;
    360 
    361     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    362         /*
    363          * skip the modern name if it is not requested _and_
    364          * if the semicolon byte value is a character, not a token number
    365          */
    366         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    367             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
    368             do {
    369                 while(nameLength>0) {
    370                     --nameLength;
    371                     if(*name++==';') {
    372                         break;
    373                     }
    374                 }
    375             } while(--fieldIndex>0);
    376         } else {
    377             /*
    378              * the semicolon byte value is a token number, therefore
    379              * only modern names are stored in unames.dat and there is no
    380              * such requested alternate name here
    381              */
    382             nameLength=0;
    383         }
    384     }
    385 
    386     /* compare each letter directly, and compare a token word per token */
    387     while(nameLength>0) {
    388         --nameLength;
    389         c=*name++;
    390 
    391         if(c>=tokenCount) {
    392             if(c!=';') {
    393                 /* implicit letter */
    394                 if((char)c!=*otherName++) {
    395                     return FALSE;
    396                 }
    397             } else {
    398                 /* finished */
    399                 break;
    400             }
    401         } else {
    402             token=tokens[c];
    403             if(token==(uint16_t)(-2)) {
    404                 /* this is a lead byte for a double-byte token */
    405                 token=tokens[c<<8|*name++];
    406                 --nameLength;
    407             }
    408             if(token==(uint16_t)(-1)) {
    409                 if(c!=';') {
    410                     /* explicit letter */
    411                     if((char)c!=*otherName++) {
    412                         return FALSE;
    413                     }
    414                 } else {
    415                     /* stop, but skip the semicolon if we are seeking
    416                        extended names and there was no 2.0 name but there
    417                        is a 1.0 name. */
    418                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
    419                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
    420                             continue;
    421                         }
    422                     }
    423                     /* finished */
    424                     break;
    425                 }
    426             } else {
    427                 /* write token word */
    428                 uint8_t *tokenString=tokenStrings+token;
    429                 while((c=*tokenString++)!=0) {
    430                     if((char)c!=*otherName++) {
    431                         return FALSE;
    432                     }
    433                 }
    434             }
    435         }
    436     }
    437 
    438     /* complete match? */
    439     return (UBool)(*otherName==0);
    440 }
    441 
    442 static uint8_t getCharCat(UChar32 cp) {
    443     uint8_t cat;
    444 
    445     if (UTF_IS_UNICODE_NONCHAR(cp)) {
    446         return U_NONCHARACTER_CODE_POINT;
    447     }
    448 
    449     if ((cat = u_charType(cp)) == U_SURROGATE) {
    450         cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
    451     }
    452 
    453     return cat;
    454 }
    455 
    456 static const char *getCharCatName(UChar32 cp) {
    457     uint8_t cat = getCharCat(cp);
    458 
    459     /* Return unknown if the table of names above is not up to
    460        date. */
    461 
    462     if (cat >= LENGTHOF(charCatNames)) {
    463         return "unknown";
    464     } else {
    465         return charCatNames[cat];
    466     }
    467 }
    468 
    469 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
    470     const char *catname = getCharCatName(code);
    471     uint16_t length = 0;
    472 
    473     UChar32 cp;
    474     int ndigits, i;
    475 
    476     WRITE_CHAR(buffer, bufferLength, length, '<');
    477     while (catname[length - 1]) {
    478         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
    479     }
    480     WRITE_CHAR(buffer, bufferLength, length, '-');
    481     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
    482         ;
    483     if (ndigits < 4)
    484         ndigits = 4;
    485     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
    486         uint8_t v = (uint8_t)(cp & 0xf);
    487         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
    488     }
    489     buffer += ndigits;
    490     length += ndigits;
    491     WRITE_CHAR(buffer, bufferLength, length, '>');
    492 
    493     return length;
    494 }
    495 
    496 /*
    497  * getGroup() does a binary search for the group that contains the
    498  * Unicode code point "code".
    499  * The return value is always a valid Group* that may contain "code"
    500  * or else is the highest group before "code".
    501  * If the lowest group is after "code", then that one is returned.
    502  */
    503 static const uint16_t *
    504 getGroup(UCharNames *names, uint32_t code) {
    505     const uint16_t *groups=GET_GROUPS(names);
    506     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
    507              start=0,
    508              limit=*groups++,
    509              number;
    510 
    511     /* binary search for the group of names that contains the one for code */
    512     while(start<limit-1) {
    513         number=(uint16_t)((start+limit)/2);
    514         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
    515             limit=number;
    516         } else {
    517             start=number;
    518         }
    519     }
    520 
    521     /* return this regardless of whether it is an exact match */
    522     return groups+start*GROUP_LENGTH;
    523 }
    524 
    525 /*
    526  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
    527  * expands them into offsets and lengths for each string.
    528  * Lengths are stored with a variable-width encoding in consecutive nibbles:
    529  * If a nibble<0xc, then it is the length itself (0=empty string).
    530  * If a nibble>=0xc, then it forms a length value with the following nibble.
    531  * Calculation see below.
    532  * The offsets and lengths arrays must be at least 33 (one more) long because
    533  * there is no check here at the end if the last nibble is still used.
    534  */
    535 static const uint8_t *
    536 expandGroupLengths(const uint8_t *s,
    537                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
    538     /* read the lengths of the 32 strings in this group and get each string's offset */
    539     uint16_t i=0, offset=0, length=0;
    540     uint8_t lengthByte;
    541 
    542     /* all 32 lengths must be read to get the offset of the first group string */
    543     while(i<LINES_PER_GROUP) {
    544         lengthByte=*s++;
    545 
    546         /* read even nibble - MSBs of lengthByte */
    547         if(length>=12) {
    548             /* double-nibble length spread across two bytes */
    549             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
    550             lengthByte&=0xf;
    551         } else if((lengthByte /* &0xf0 */)>=0xc0) {
    552             /* double-nibble length spread across this one byte */
    553             length=(uint16_t)((lengthByte&0x3f)+12);
    554         } else {
    555             /* single-nibble length in MSBs */
    556             length=(uint16_t)(lengthByte>>4);
    557             lengthByte&=0xf;
    558         }
    559 
    560         *offsets++=offset;
    561         *lengths++=length;
    562 
    563         offset+=length;
    564         ++i;
    565 
    566         /* read odd nibble - LSBs of lengthByte */
    567         if((lengthByte&0xf0)==0) {
    568             /* this nibble was not consumed for a double-nibble length above */
    569             length=lengthByte;
    570             if(length<12) {
    571                 /* single-nibble length in LSBs */
    572                 *offsets++=offset;
    573                 *lengths++=length;
    574 
    575                 offset+=length;
    576                 ++i;
    577             }
    578         } else {
    579             length=0;   /* prevent double-nibble detection in the next iteration */
    580         }
    581     }
    582 
    583     /* now, s is at the first group string */
    584     return s;
    585 }
    586 
    587 static uint16_t
    588 expandGroupName(UCharNames *names, const uint16_t *group,
    589                 uint16_t lineNumber, UCharNameChoice nameChoice,
    590                 char *buffer, uint16_t bufferLength) {
    591     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
    592     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
    593     s=expandGroupLengths(s, offsets, lengths);
    594     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
    595                       buffer, bufferLength);
    596 }
    597 
    598 static uint16_t
    599 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
    600         char *buffer, uint16_t bufferLength) {
    601     const uint16_t *group=getGroup(names, code);
    602     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
    603         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
    604                                buffer, bufferLength);
    605     } else {
    606         /* group not found */
    607         /* zero-terminate */
    608         if(bufferLength>0) {
    609             *buffer=0;
    610         }
    611         return 0;
    612     }
    613 }
    614 
    615 /*
    616  * enumGroupNames() enumerates all the names in a 32-group
    617  * and either calls the enumerator function or finds a given input name.
    618  */
    619 static UBool
    620 enumGroupNames(UCharNames *names, const uint16_t *group,
    621                UChar32 start, UChar32 end,
    622                UEnumCharNamesFn *fn, void *context,
    623                UCharNameChoice nameChoice) {
    624     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
    625     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
    626 
    627     s=expandGroupLengths(s, offsets, lengths);
    628     if(fn!=DO_FIND_NAME) {
    629         char buffer[200];
    630         uint16_t length;
    631 
    632         while(start<=end) {
    633             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
    634             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
    635                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
    636             }
    637             /* here, we assume that the buffer is large enough */
    638             if(length>0) {
    639                 if(!fn(context, start, nameChoice, buffer, length)) {
    640                     return FALSE;
    641                 }
    642             }
    643             ++start;
    644         }
    645     } else {
    646         const char *otherName=((FindName *)context)->otherName;
    647         while(start<=end) {
    648             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
    649                 ((FindName *)context)->code=start;
    650                 return FALSE;
    651             }
    652             ++start;
    653         }
    654     }
    655     return TRUE;
    656 }
    657 
    658 /*
    659  * enumExtNames enumerate extended names.
    660  * It only needs to do it if it is called with a real function and not
    661  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
    662  * for extended names by itself.
    663  */
    664 static UBool
    665 enumExtNames(UChar32 start, UChar32 end,
    666              UEnumCharNamesFn *fn, void *context)
    667 {
    668     if(fn!=DO_FIND_NAME) {
    669         char buffer[200];
    670         uint16_t length;
    671 
    672         while(start<=end) {
    673             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
    674             /* here, we assume that the buffer is large enough */
    675             if(length>0) {
    676                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
    677                     return FALSE;
    678                 }
    679             }
    680             ++start;
    681         }
    682     }
    683 
    684     return TRUE;
    685 }
    686 
    687 static UBool
    688 enumNames(UCharNames *names,
    689           UChar32 start, UChar32 limit,
    690           UEnumCharNamesFn *fn, void *context,
    691           UCharNameChoice nameChoice) {
    692     uint16_t startGroupMSB, endGroupMSB, groupCount;
    693     const uint16_t *group, *groupLimit;
    694 
    695     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
    696     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
    697 
    698     /* find the group that contains start, or the highest before it */
    699     group=getGroup(names, start);
    700 
    701     if(startGroupMSB==endGroupMSB) {
    702         if(startGroupMSB==group[GROUP_MSB]) {
    703             /* if start and limit-1 are in the same group, then enumerate only in that one */
    704             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
    705         }
    706     } else {
    707         const uint16_t *groups=GET_GROUPS(names);
    708         groupCount=*groups++;
    709         groupLimit=groups+groupCount*GROUP_LENGTH;
    710 
    711         if(startGroupMSB==group[GROUP_MSB]) {
    712             /* enumerate characters in the partial start group */
    713             if((start&GROUP_MASK)!=0) {
    714                 if(!enumGroupNames(names, group,
    715                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
    716                                    fn, context, nameChoice)) {
    717                     return FALSE;
    718                 }
    719                 group=NEXT_GROUP(group); /* continue with the next group */
    720             }
    721         } else if(startGroupMSB>group[GROUP_MSB]) {
    722             /* make sure that we start enumerating with the first group after start */
    723             const uint16_t *nextGroup=NEXT_GROUP(group);
    724             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
    725                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
    726                 if (end > limit) {
    727                     end = limit;
    728                 }
    729                 if (!enumExtNames(start, end - 1, fn, context)) {
    730                     return FALSE;
    731                 }
    732             }
    733             group=nextGroup;
    734         }
    735 
    736         /* enumerate entire groups between the start- and end-groups */
    737         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
    738             const uint16_t *nextGroup;
    739             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
    740             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
    741                 return FALSE;
    742             }
    743             nextGroup=NEXT_GROUP(group);
    744             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
    745                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
    746                 if (end > limit) {
    747                     end = limit;
    748                 }
    749                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
    750                     return FALSE;
    751                 }
    752             }
    753             group=nextGroup;
    754         }
    755 
    756         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
    757         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
    758             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
    759         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
    760             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
    761             if (next > start) {
    762                 start = next;
    763             }
    764         } else {
    765             return TRUE;
    766         }
    767     }
    768 
    769     /* we have not found a group, which means everything is made of
    770        extended names. */
    771     if (nameChoice == U_EXTENDED_CHAR_NAME) {
    772         if (limit > UCHAR_MAX_VALUE + 1) {
    773             limit = UCHAR_MAX_VALUE + 1;
    774         }
    775         return enumExtNames(start, limit - 1, fn, context);
    776     }
    777 
    778     return TRUE;
    779 }
    780 
    781 static uint16_t
    782 writeFactorSuffix(const uint16_t *factors, uint16_t count,
    783                   const char *s, /* suffix elements */
    784                   uint32_t code,
    785                   uint16_t indexes[8], /* output fields from here */
    786                   const char *elementBases[8], const char *elements[8],
    787                   char *buffer, uint16_t bufferLength) {
    788     uint16_t i, factor, bufferPos=0;
    789     char c;
    790 
    791     /* write elements according to the factors */
    792 
    793     /*
    794      * the factorized elements are determined by modulo arithmetic
    795      * with the factors of this algorithm
    796      *
    797      * note that for fewer operations, count is decremented here
    798      */
    799     --count;
    800     for(i=count; i>0; --i) {
    801         factor=factors[i];
    802         indexes[i]=(uint16_t)(code%factor);
    803         code/=factor;
    804     }
    805     /*
    806      * we don't need to calculate the last modulus because start<=code<=end
    807      * guarantees here that code<=factors[0]
    808      */
    809     indexes[0]=(uint16_t)code;
    810 
    811     /* write each element */
    812     for(;;) {
    813         if(elementBases!=NULL) {
    814             *elementBases++=s;
    815         }
    816 
    817         /* skip indexes[i] strings */
    818         factor=indexes[i];
    819         while(factor>0) {
    820             while(*s++!=0) {}
    821             --factor;
    822         }
    823         if(elements!=NULL) {
    824             *elements++=s;
    825         }
    826 
    827         /* write element */
    828         while((c=*s++)!=0) {
    829             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    830         }
    831 
    832         /* we do not need to perform the rest of this loop for i==count - break here */
    833         if(i>=count) {
    834             break;
    835         }
    836 
    837         /* skip the rest of the strings for this factors[i] */
    838         factor=(uint16_t)(factors[i]-indexes[i]-1);
    839         while(factor>0) {
    840             while(*s++!=0) {}
    841             --factor;
    842         }
    843 
    844         ++i;
    845     }
    846 
    847     /* zero-terminate */
    848     if(bufferLength>0) {
    849         *buffer=0;
    850     }
    851 
    852     return bufferPos;
    853 }
    854 
    855 /*
    856  * Important:
    857  * Parts of findAlgName() are almost the same as some of getAlgName().
    858  * Fixes must be applied to both.
    859  */
    860 static uint16_t
    861 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
    862         char *buffer, uint16_t bufferLength) {
    863     uint16_t bufferPos=0;
    864 
    865     /* Only the normative character name can be algorithmic. */
    866     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    867         /* zero-terminate */
    868         if(bufferLength>0) {
    869             *buffer=0;
    870         }
    871         return 0;
    872     }
    873 
    874     switch(range->type) {
    875     case 0: {
    876         /* name = prefix hex-digits */
    877         const char *s=(const char *)(range+1);
    878         char c;
    879 
    880         uint16_t i, count;
    881 
    882         /* copy prefix */
    883         while((c=*s++)!=0) {
    884             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    885         }
    886 
    887         /* write hexadecimal code point value */
    888         count=range->variant;
    889 
    890         /* zero-terminate */
    891         if(count<bufferLength) {
    892             buffer[count]=0;
    893         }
    894 
    895         for(i=count; i>0;) {
    896             if(--i<bufferLength) {
    897                 c=(char)(code&0xf);
    898                 if(c<10) {
    899                     c+='0';
    900                 } else {
    901                     c+='A'-10;
    902                 }
    903                 buffer[i]=c;
    904             }
    905             code>>=4;
    906         }
    907 
    908         bufferPos+=count;
    909         break;
    910     }
    911     case 1: {
    912         /* name = prefix factorized-elements */
    913         uint16_t indexes[8];
    914         const uint16_t *factors=(const uint16_t *)(range+1);
    915         uint16_t count=range->variant;
    916         const char *s=(const char *)(factors+count);
    917         char c;
    918 
    919         /* copy prefix */
    920         while((c=*s++)!=0) {
    921             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
    922         }
    923 
    924         bufferPos+=writeFactorSuffix(factors, count,
    925                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
    926         break;
    927     }
    928     default:
    929         /* undefined type */
    930         /* zero-terminate */
    931         if(bufferLength>0) {
    932             *buffer=0;
    933         }
    934         break;
    935     }
    936 
    937     return bufferPos;
    938 }
    939 
    940 /*
    941  * Important: enumAlgNames() and findAlgName() are almost the same.
    942  * Any fix must be applied to both.
    943  */
    944 static UBool
    945 enumAlgNames(AlgorithmicRange *range,
    946              UChar32 start, UChar32 limit,
    947              UEnumCharNamesFn *fn, void *context,
    948              UCharNameChoice nameChoice) {
    949     char buffer[200];
    950     uint16_t length;
    951 
    952     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    953         return TRUE;
    954     }
    955 
    956     switch(range->type) {
    957     case 0: {
    958         char *s, *end;
    959         char c;
    960 
    961         /* get the full name of the start character */
    962         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
    963         if(length<=0) {
    964             return TRUE;
    965         }
    966 
    967         /* call the enumerator function with this first character */
    968         if(!fn(context, start, nameChoice, buffer, length)) {
    969             return FALSE;
    970         }
    971 
    972         /* go to the end of the name; all these names have the same length */
    973         end=buffer;
    974         while(*end!=0) {
    975             ++end;
    976         }
    977 
    978         /* enumerate the rest of the names */
    979         while(++start<limit) {
    980             /* increment the hexadecimal number on a character-basis */
    981             s=end;
    982             for (;;) {
    983                 c=*--s;
    984                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
    985                     *s=(char)(c+1);
    986                     break;
    987                 } else if(c=='9') {
    988                     *s='A';
    989                     break;
    990                 } else if(c=='F') {
    991                     *s='0';
    992                 }
    993             }
    994 
    995             if(!fn(context, start, nameChoice, buffer, length)) {
    996                 return FALSE;
    997             }
    998         }
    999         break;
   1000     }
   1001     case 1: {
   1002         uint16_t indexes[8];
   1003         const char *elementBases[8], *elements[8];
   1004         const uint16_t *factors=(const uint16_t *)(range+1);
   1005         uint16_t count=range->variant;
   1006         const char *s=(const char *)(factors+count);
   1007         char *suffix, *t;
   1008         uint16_t prefixLength, i, idx;
   1009 
   1010         char c;
   1011 
   1012         /* name = prefix factorized-elements */
   1013 
   1014         /* copy prefix */
   1015         suffix=buffer;
   1016         prefixLength=0;
   1017         while((c=*s++)!=0) {
   1018             *suffix++=c;
   1019             ++prefixLength;
   1020         }
   1021 
   1022         /* append the suffix of the start character */
   1023         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
   1024                                               s, (uint32_t)start-range->start,
   1025                                               indexes, elementBases, elements,
   1026                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
   1027 
   1028         /* call the enumerator function with this first character */
   1029         if(!fn(context, start, nameChoice, buffer, length)) {
   1030             return FALSE;
   1031         }
   1032 
   1033         /* enumerate the rest of the names */
   1034         while(++start<limit) {
   1035             /* increment the indexes in lexical order bound by the factors */
   1036             i=count;
   1037             for (;;) {
   1038                 idx=(uint16_t)(indexes[--i]+1);
   1039                 if(idx<factors[i]) {
   1040                     /* skip one index and its element string */
   1041                     indexes[i]=idx;
   1042                     s=elements[i];
   1043                     while(*s++!=0) {
   1044                     }
   1045                     elements[i]=s;
   1046                     break;
   1047                 } else {
   1048                     /* reset this index to 0 and its element string to the first one */
   1049                     indexes[i]=0;
   1050                     elements[i]=elementBases[i];
   1051                 }
   1052             }
   1053 
   1054             /* to make matters a little easier, just append all elements to the suffix */
   1055             t=suffix;
   1056             length=prefixLength;
   1057             for(i=0; i<count; ++i) {
   1058                 s=elements[i];
   1059                 while((c=*s++)!=0) {
   1060                     *t++=c;
   1061                     ++length;
   1062                 }
   1063             }
   1064             /* zero-terminate */
   1065             *t=0;
   1066 
   1067             if(!fn(context, start, nameChoice, buffer, length)) {
   1068                 return FALSE;
   1069             }
   1070         }
   1071         break;
   1072     }
   1073     default:
   1074         /* undefined type */
   1075         break;
   1076     }
   1077 
   1078     return TRUE;
   1079 }
   1080 
   1081 /*
   1082  * findAlgName() is almost the same as enumAlgNames() except that it
   1083  * returns the code point for a name if it fits into the range.
   1084  * It returns 0xffff otherwise.
   1085  */
   1086 static UChar32
   1087 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
   1088     UChar32 code;
   1089 
   1090     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
   1091         return 0xffff;
   1092     }
   1093 
   1094     switch(range->type) {
   1095     case 0: {
   1096         /* name = prefix hex-digits */
   1097         const char *s=(const char *)(range+1);
   1098         char c;
   1099 
   1100         uint16_t i, count;
   1101 
   1102         /* compare prefix */
   1103         while((c=*s++)!=0) {
   1104             if((char)c!=*otherName++) {
   1105                 return 0xffff;
   1106             }
   1107         }
   1108 
   1109         /* read hexadecimal code point value */
   1110         count=range->variant;
   1111         code=0;
   1112         for(i=0; i<count; ++i) {
   1113             c=*otherName++;
   1114             if('0'<=c && c<='9') {
   1115                 code=(code<<4)|(c-'0');
   1116             } else if('A'<=c && c<='F') {
   1117                 code=(code<<4)|(c-'A'+10);
   1118             } else {
   1119                 return 0xffff;
   1120             }
   1121         }
   1122 
   1123         /* does it fit into the range? */
   1124         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
   1125             return code;
   1126         }
   1127         break;
   1128     }
   1129     case 1: {
   1130         char buffer[64];
   1131         uint16_t indexes[8];
   1132         const char *elementBases[8], *elements[8];
   1133         const uint16_t *factors=(const uint16_t *)(range+1);
   1134         uint16_t count=range->variant;
   1135         const char *s=(const char *)(factors+count), *t;
   1136         UChar32 start, limit;
   1137         uint16_t i, idx;
   1138 
   1139         char c;
   1140 
   1141         /* name = prefix factorized-elements */
   1142 
   1143         /* compare prefix */
   1144         while((c=*s++)!=0) {
   1145             if((char)c!=*otherName++) {
   1146                 return 0xffff;
   1147             }
   1148         }
   1149 
   1150         start=(UChar32)range->start;
   1151         limit=(UChar32)(range->end+1);
   1152 
   1153         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
   1154         writeFactorSuffix(factors, count, s, 0,
   1155                           indexes, elementBases, elements, buffer, sizeof(buffer));
   1156 
   1157         /* compare the first suffix */
   1158         if(0==uprv_strcmp(otherName, buffer)) {
   1159             return start;
   1160         }
   1161 
   1162         /* enumerate and compare the rest of the suffixes */
   1163         while(++start<limit) {
   1164             /* increment the indexes in lexical order bound by the factors */
   1165             i=count;
   1166             for (;;) {
   1167                 idx=(uint16_t)(indexes[--i]+1);
   1168                 if(idx<factors[i]) {
   1169                     /* skip one index and its element string */
   1170                     indexes[i]=idx;
   1171                     s=elements[i];
   1172                     while(*s++!=0) {}
   1173                     elements[i]=s;
   1174                     break;
   1175                 } else {
   1176                     /* reset this index to 0 and its element string to the first one */
   1177                     indexes[i]=0;
   1178                     elements[i]=elementBases[i];
   1179                 }
   1180             }
   1181 
   1182             /* to make matters a little easier, just compare all elements of the suffix */
   1183             t=otherName;
   1184             for(i=0; i<count; ++i) {
   1185                 s=elements[i];
   1186                 while((c=*s++)!=0) {
   1187                     if(c!=*t++) {
   1188                         s=""; /* does not match */
   1189                         i=99;
   1190                     }
   1191                 }
   1192             }
   1193             if(i<99 && *t==0) {
   1194                 return start;
   1195             }
   1196         }
   1197         break;
   1198     }
   1199     default:
   1200         /* undefined type */
   1201         break;
   1202     }
   1203 
   1204     return 0xffff;
   1205 }
   1206 
   1207 /* sets of name characters, maximum name lengths ---------------------------- */
   1208 
   1209 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
   1210 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
   1211 
   1212 static int32_t
   1213 calcStringSetLength(uint32_t set[8], const char *s) {
   1214     int32_t length=0;
   1215     char c;
   1216 
   1217     while((c=*s++)!=0) {
   1218         SET_ADD(set, c);
   1219         ++length;
   1220     }
   1221     return length;
   1222 }
   1223 
   1224 static int32_t
   1225 calcAlgNameSetsLengths(int32_t maxNameLength) {
   1226     AlgorithmicRange *range;
   1227     uint32_t *p;
   1228     uint32_t rangeCount;
   1229     int32_t length;
   1230 
   1231     /* enumerate algorithmic ranges */
   1232     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1233     rangeCount=*p;
   1234     range=(AlgorithmicRange *)(p+1);
   1235     while(rangeCount>0) {
   1236         switch(range->type) {
   1237         case 0:
   1238             /* name = prefix + (range->variant times) hex-digits */
   1239             /* prefix */
   1240             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
   1241             if(length>maxNameLength) {
   1242                 maxNameLength=length;
   1243             }
   1244             break;
   1245         case 1: {
   1246             /* name = prefix factorized-elements */
   1247             const uint16_t *factors=(const uint16_t *)(range+1);
   1248             const char *s;
   1249             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
   1250 
   1251             /* prefix length */
   1252             s=(const char *)(factors+count);
   1253             length=calcStringSetLength(gNameSet, s);
   1254             s+=length+1; /* start of factor suffixes */
   1255 
   1256             /* get the set and maximum factor suffix length for each factor */
   1257             for(i=0; i<count; ++i) {
   1258                 maxFactorLength=0;
   1259                 for(factor=factors[i]; factor>0; --factor) {
   1260                     factorLength=calcStringSetLength(gNameSet, s);
   1261                     s+=factorLength+1;
   1262                     if(factorLength>maxFactorLength) {
   1263                         maxFactorLength=factorLength;
   1264                     }
   1265                 }
   1266                 length+=maxFactorLength;
   1267             }
   1268 
   1269             if(length>maxNameLength) {
   1270                 maxNameLength=length;
   1271             }
   1272             break;
   1273         }
   1274         default:
   1275             /* unknown type */
   1276             break;
   1277         }
   1278 
   1279         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
   1280         --rangeCount;
   1281     }
   1282     return maxNameLength;
   1283 }
   1284 
   1285 static int32_t
   1286 calcExtNameSetsLengths(int32_t maxNameLength) {
   1287     int32_t i, length;
   1288 
   1289     for(i=0; i<LENGTHOF(charCatNames); ++i) {
   1290         /*
   1291          * for each category, count the length of the category name
   1292          * plus 9=
   1293          * 2 for <>
   1294          * 1 for -
   1295          * 6 for most hex digits per code point
   1296          */
   1297         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
   1298         if(length>maxNameLength) {
   1299             maxNameLength=length;
   1300         }
   1301     }
   1302     return maxNameLength;
   1303 }
   1304 
   1305 static int32_t
   1306 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
   1307                   uint32_t set[8],
   1308                   const uint8_t **pLine, const uint8_t *lineLimit) {
   1309     const uint8_t *line=*pLine;
   1310     int32_t length=0, tokenLength;
   1311     uint16_t c, token;
   1312 
   1313     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
   1314         if(c>=tokenCount) {
   1315             /* implicit letter */
   1316             SET_ADD(set, c);
   1317             ++length;
   1318         } else {
   1319             token=tokens[c];
   1320             if(token==(uint16_t)(-2)) {
   1321                 /* this is a lead byte for a double-byte token */
   1322                 c=c<<8|*line++;
   1323                 token=tokens[c];
   1324             }
   1325             if(token==(uint16_t)(-1)) {
   1326                 /* explicit letter */
   1327                 SET_ADD(set, c);
   1328                 ++length;
   1329             } else {
   1330                 /* count token word */
   1331                 if(tokenLengths!=NULL) {
   1332                     /* use cached token length */
   1333                     tokenLength=tokenLengths[c];
   1334                     if(tokenLength==0) {
   1335                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
   1336                         tokenLengths[c]=(int8_t)tokenLength;
   1337                     }
   1338                 } else {
   1339                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
   1340                 }
   1341                 length+=tokenLength;
   1342             }
   1343         }
   1344     }
   1345 
   1346     *pLine=line;
   1347     return length;
   1348 }
   1349 
   1350 static void
   1351 calcGroupNameSetsLengths(int32_t maxNameLength) {
   1352     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
   1353 
   1354     uint16_t *tokens=(uint16_t *)uCharNames+8;
   1355     uint16_t tokenCount=*tokens++;
   1356     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
   1357 
   1358     int8_t *tokenLengths;
   1359 
   1360     const uint16_t *group;
   1361     const uint8_t *s, *line, *lineLimit;
   1362 
   1363     int32_t groupCount, lineNumber, length;
   1364 
   1365     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
   1366     if(tokenLengths!=NULL) {
   1367         uprv_memset(tokenLengths, 0, tokenCount);
   1368     }
   1369 
   1370     group=GET_GROUPS(uCharNames);
   1371     groupCount=*group++;
   1372 
   1373     /* enumerate all groups */
   1374     while(groupCount>0) {
   1375         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
   1376         s=expandGroupLengths(s, offsets, lengths);
   1377 
   1378         /* enumerate all lines in each group */
   1379         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
   1380             line=s+offsets[lineNumber];
   1381             length=lengths[lineNumber];
   1382             if(length==0) {
   1383                 continue;
   1384             }
   1385 
   1386             lineLimit=line+length;
   1387 
   1388             /* read regular name */
   1389             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
   1390             if(length>maxNameLength) {
   1391                 maxNameLength=length;
   1392             }
   1393             if(line==lineLimit) {
   1394                 continue;
   1395             }
   1396 
   1397             /* read Unicode 1.0 name */
   1398             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
   1399             if(length>maxNameLength) {
   1400                 maxNameLength=length;
   1401             }
   1402             if(line==lineLimit) {
   1403                 continue;
   1404             }
   1405 
   1406             /* read ISO comment */
   1407             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
   1408         }
   1409 
   1410         group=NEXT_GROUP(group);
   1411         --groupCount;
   1412     }
   1413 
   1414     if(tokenLengths!=NULL) {
   1415         uprv_free(tokenLengths);
   1416     }
   1417 
   1418     /* set gMax... - name length last for threading */
   1419     gMaxNameLength=maxNameLength;
   1420 }
   1421 
   1422 static UBool
   1423 calcNameSetsLengths(UErrorCode *pErrorCode) {
   1424     static const char extChars[]="0123456789ABCDEF<>-";
   1425     int32_t i, maxNameLength;
   1426 
   1427     if(gMaxNameLength!=0) {
   1428         return TRUE;
   1429     }
   1430 
   1431     if(!isDataLoaded(pErrorCode)) {
   1432         return FALSE;
   1433     }
   1434 
   1435     /* set hex digits, used in various names, and <>-, used in extended names */
   1436     for(i=0; i<sizeof(extChars)-1; ++i) {
   1437         SET_ADD(gNameSet, extChars[i]);
   1438     }
   1439 
   1440     /* set sets and lengths from algorithmic names */
   1441     maxNameLength=calcAlgNameSetsLengths(0);
   1442 
   1443     /* set sets and lengths from extended names */
   1444     maxNameLength=calcExtNameSetsLengths(maxNameLength);
   1445 
   1446     /* set sets and lengths from group names, set global maximum values */
   1447     calcGroupNameSetsLengths(maxNameLength);
   1448 
   1449     return TRUE;
   1450 }
   1451 
   1452 /* public API --------------------------------------------------------------- */
   1453 
   1454 U_CAPI int32_t U_EXPORT2
   1455 u_charName(UChar32 code, UCharNameChoice nameChoice,
   1456            char *buffer, int32_t bufferLength,
   1457            UErrorCode *pErrorCode) {
   1458     AlgorithmicRange *algRange;
   1459     uint32_t *p;
   1460     uint32_t i;
   1461     int32_t length;
   1462 
   1463     /* check the argument values */
   1464     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1465         return 0;
   1466     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
   1467               bufferLength<0 || (bufferLength>0 && buffer==NULL)
   1468     ) {
   1469         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1470         return 0;
   1471     }
   1472 
   1473     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
   1474         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
   1475     }
   1476 
   1477     length=0;
   1478 
   1479     /* try algorithmic names first */
   1480     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1481     i=*p;
   1482     algRange=(AlgorithmicRange *)(p+1);
   1483     while(i>0) {
   1484         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
   1485             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
   1486             break;
   1487         }
   1488         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1489         --i;
   1490     }
   1491 
   1492     if(i==0) {
   1493         if (nameChoice == U_EXTENDED_CHAR_NAME) {
   1494             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
   1495             if (!length) {
   1496                 /* extended character name */
   1497                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
   1498             }
   1499         } else {
   1500             /* normal character name */
   1501             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
   1502         }
   1503     }
   1504 
   1505     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
   1506 }
   1507 
   1508 U_CAPI int32_t U_EXPORT2
   1509 u_getISOComment(UChar32 c,
   1510                 char *dest, int32_t destCapacity,
   1511                 UErrorCode *pErrorCode) {
   1512     int32_t length;
   1513 
   1514     /* check the argument values */
   1515     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1516         return 0;
   1517     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
   1518         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1519         return 0;
   1520     }
   1521 
   1522     if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
   1523         return u_terminateChars(dest, destCapacity, 0, pErrorCode);
   1524     }
   1525 
   1526     /* the ISO comment is stored like a normal character name */
   1527     length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity);
   1528     return u_terminateChars(dest, destCapacity, length, pErrorCode);
   1529 }
   1530 
   1531 U_CAPI UChar32 U_EXPORT2
   1532 u_charFromName(UCharNameChoice nameChoice,
   1533                const char *name,
   1534                UErrorCode *pErrorCode) {
   1535     char upper[120], lower[120];
   1536     FindName findName;
   1537     AlgorithmicRange *algRange;
   1538     uint32_t *p;
   1539     uint32_t i;
   1540     UChar32 cp = 0;
   1541     char c0;
   1542     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
   1543 
   1544     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1545         return error;
   1546     }
   1547 
   1548     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
   1549         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1550         return error;
   1551     }
   1552 
   1553     if(!isDataLoaded(pErrorCode)) {
   1554         return error;
   1555     }
   1556 
   1557     /* construct the uppercase and lowercase of the name first */
   1558     for(i=0; i<sizeof(upper); ++i) {
   1559         if((c0=*name++)!=0) {
   1560             upper[i]=uprv_toupper(c0);
   1561             lower[i]=uprv_tolower(c0);
   1562         } else {
   1563             upper[i]=lower[i]=0;
   1564             break;
   1565         }
   1566     }
   1567     if(i==sizeof(upper)) {
   1568         /* name too long, there is no such character */
   1569         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1570         return error;
   1571     }
   1572 
   1573     /* try extended names first */
   1574     if (lower[0] == '<') {
   1575         if (nameChoice == U_EXTENDED_CHAR_NAME) {
   1576             if (lower[--i] == '>') {
   1577                 for (--i; lower[i] && lower[i] != '-'; --i) {
   1578                 }
   1579 
   1580                 if (lower[i] == '-') { /* We've got a category. */
   1581                     uint32_t cIdx;
   1582 
   1583                     lower[i] = 0;
   1584 
   1585                     for (++i; lower[i] != '>'; ++i) {
   1586                         if (lower[i] >= '0' && lower[i] <= '9') {
   1587                             cp = (cp << 4) + lower[i] - '0';
   1588                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
   1589                             cp = (cp << 4) + lower[i] - 'a' + 10;
   1590                         } else {
   1591                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1592                             return error;
   1593                         }
   1594                     }
   1595 
   1596                     /* Now validate the category name.
   1597                        We could use a binary search, or a trie, if
   1598                        we really wanted to. */
   1599 
   1600                     for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
   1601 
   1602                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
   1603                             if (getCharCat(cp) == cIdx) {
   1604                                 return cp;
   1605                             }
   1606                             break;
   1607                         }
   1608                     }
   1609                 }
   1610             }
   1611         }
   1612 
   1613         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1614         return error;
   1615     }
   1616 
   1617     /* try algorithmic names now */
   1618     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1619     i=*p;
   1620     algRange=(AlgorithmicRange *)(p+1);
   1621     while(i>0) {
   1622         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
   1623             return cp;
   1624         }
   1625         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1626         --i;
   1627     }
   1628 
   1629     /* normal character name */
   1630     findName.otherName=upper;
   1631     findName.code=error;
   1632     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
   1633     if (findName.code == error) {
   1634          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   1635     }
   1636     return findName.code;
   1637 }
   1638 
   1639 U_CAPI void U_EXPORT2
   1640 u_enumCharNames(UChar32 start, UChar32 limit,
   1641                 UEnumCharNamesFn *fn,
   1642                 void *context,
   1643                 UCharNameChoice nameChoice,
   1644                 UErrorCode *pErrorCode) {
   1645     AlgorithmicRange *algRange;
   1646     uint32_t *p;
   1647     uint32_t i;
   1648 
   1649     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1650         return;
   1651     }
   1652 
   1653     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
   1654         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1655         return;
   1656     }
   1657 
   1658     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
   1659         limit = UCHAR_MAX_VALUE + 1;
   1660     }
   1661     if((uint32_t)start>=(uint32_t)limit) {
   1662         return;
   1663     }
   1664 
   1665     if(!isDataLoaded(pErrorCode)) {
   1666         return;
   1667     }
   1668 
   1669     /* interleave the data-driven ones with the algorithmic ones */
   1670     /* iterate over all algorithmic ranges; assume that they are in ascending order */
   1671     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
   1672     i=*p;
   1673     algRange=(AlgorithmicRange *)(p+1);
   1674     while(i>0) {
   1675         /* enumerate the character names before the current algorithmic range */
   1676         /* here: start<limit */
   1677         if((uint32_t)start<algRange->start) {
   1678             if((uint32_t)limit<=algRange->start) {
   1679                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
   1680                 return;
   1681             }
   1682             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
   1683                 return;
   1684             }
   1685             start=(UChar32)algRange->start;
   1686         }
   1687         /* enumerate the character names in the current algorithmic range */
   1688         /* here: algRange->start<=start<limit */
   1689         if((uint32_t)start<=algRange->end) {
   1690             if((uint32_t)limit<=(algRange->end+1)) {
   1691                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
   1692                 return;
   1693             }
   1694             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
   1695                 return;
   1696             }
   1697             start=(UChar32)algRange->end+1;
   1698         }
   1699         /* continue to the next algorithmic range (here: start<limit) */
   1700         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
   1701         --i;
   1702     }
   1703     /* enumerate the character names after the last algorithmic range */
   1704     enumNames(uCharNames, start, limit, fn, context, nameChoice);
   1705 }
   1706 
   1707 U_CAPI int32_t U_EXPORT2
   1708 uprv_getMaxCharNameLength() {
   1709     UErrorCode errorCode=U_ZERO_ERROR;
   1710     if(calcNameSetsLengths(&errorCode)) {
   1711         return gMaxNameLength;
   1712     } else {
   1713         return 0;
   1714     }
   1715 }
   1716 
   1717 /**
   1718  * Converts the char set cset into a Unicode set uset.
   1719  * @param cset Set of 256 bit flags corresponding to a set of chars.
   1720  * @param uset USet to receive characters. Existing contents are deleted.
   1721  */
   1722 static void
   1723 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
   1724     UChar us[256];
   1725     char cs[256];
   1726 
   1727     int32_t i, length;
   1728     UErrorCode errorCode;
   1729 
   1730     errorCode=U_ZERO_ERROR;
   1731 
   1732     if(!calcNameSetsLengths(&errorCode)) {
   1733         return;
   1734     }
   1735 
   1736     /* build a char string with all chars that are used in character names */
   1737     length=0;
   1738     for(i=0; i<256; ++i) {
   1739         if(SET_CONTAINS(cset, i)) {
   1740             cs[length++]=(char)i;
   1741         }
   1742     }
   1743 
   1744     /* convert the char string to a UChar string */
   1745     u_charsToUChars(cs, us, length);
   1746 
   1747     /* add each UChar to the USet */
   1748     for(i=0; i<length; ++i) {
   1749         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
   1750             sa->add(sa->set, us[i]);
   1751         }
   1752     }
   1753 }
   1754 
   1755 /**
   1756  * Fills set with characters that are used in Unicode character names.
   1757  * @param set USet to receive characters.
   1758  */
   1759 U_CAPI void U_EXPORT2
   1760 uprv_getCharNameCharacters(const USetAdder *sa) {
   1761     charSetToUSet(gNameSet, sa);
   1762 }
   1763 
   1764 /* data swapping ------------------------------------------------------------ */
   1765 
   1766 /*
   1767  * The token table contains non-negative entries for token bytes,
   1768  * and -1 for bytes that represent themselves in the data file's charset.
   1769  * -2 entries are used for lead bytes.
   1770  *
   1771  * Direct bytes (-1 entries) must be translated from the input charset family
   1772  * to the output charset family.
   1773  * makeTokenMap() writes a permutation mapping for this.
   1774  * Use it once for single-/lead-byte tokens and once more for all trail byte
   1775  * tokens. (';' is an unused trail byte marked with -1.)
   1776  */
   1777 static void
   1778 makeTokenMap(const UDataSwapper *ds,
   1779              int16_t tokens[], uint16_t tokenCount,
   1780              uint8_t map[256],
   1781              UErrorCode *pErrorCode) {
   1782     UBool usedOutChar[256];
   1783     uint16_t i, j;
   1784     uint8_t c1, c2;
   1785 
   1786     if(U_FAILURE(*pErrorCode)) {
   1787         return;
   1788     }
   1789 
   1790     if(ds->inCharset==ds->outCharset) {
   1791         /* Same charset family: identity permutation */
   1792         for(i=0; i<256; ++i) {
   1793             map[i]=(uint8_t)i;
   1794         }
   1795     } else {
   1796         uprv_memset(map, 0, 256);
   1797         uprv_memset(usedOutChar, 0, 256);
   1798 
   1799         if(tokenCount>256) {
   1800             tokenCount=256;
   1801         }
   1802 
   1803         /* set the direct bytes (byte 0 always maps to itself) */
   1804         for(i=1; i<tokenCount; ++i) {
   1805             if(tokens[i]==-1) {
   1806                 /* convert the direct byte character */
   1807                 c1=(uint8_t)i;
   1808                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
   1809                 if(U_FAILURE(*pErrorCode)) {
   1810                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
   1811                                      i, ds->inCharset);
   1812                     return;
   1813                 }
   1814 
   1815                 /* enter the converted character into the map and mark it used */
   1816                 map[c1]=c2;
   1817                 usedOutChar[c2]=TRUE;
   1818             }
   1819         }
   1820 
   1821         /* set the mappings for the rest of the permutation */
   1822         for(i=j=1; i<tokenCount; ++i) {
   1823             /* set mappings that were not set for direct bytes */
   1824             if(map[i]==0) {
   1825                 /* set an output byte value that was not used as an output byte above */
   1826                 while(usedOutChar[j]) {
   1827                     ++j;
   1828                 }
   1829                 map[i]=(uint8_t)j++;
   1830             }
   1831         }
   1832 
   1833         /*
   1834          * leave mappings at tokenCount and above unset if tokenCount<256
   1835          * because they won't be used
   1836          */
   1837     }
   1838 }
   1839 
   1840 U_CAPI int32_t U_EXPORT2
   1841 uchar_swapNames(const UDataSwapper *ds,
   1842                 const void *inData, int32_t length, void *outData,
   1843                 UErrorCode *pErrorCode) {
   1844     const UDataInfo *pInfo;
   1845     int32_t headerSize;
   1846 
   1847     const uint8_t *inBytes;
   1848     uint8_t *outBytes;
   1849 
   1850     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
   1851              offset, i, count, stringsCount;
   1852 
   1853     const AlgorithmicRange *inRange;
   1854     AlgorithmicRange *outRange;
   1855 
   1856     /* udata_swapDataHeader checks the arguments */
   1857     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1858     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1859         return 0;
   1860     }
   1861 
   1862     /* check data format and format version */
   1863     pInfo=(const UDataInfo *)((const char *)inData+4);
   1864     if(!(
   1865         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
   1866         pInfo->dataFormat[1]==0x6e &&
   1867         pInfo->dataFormat[2]==0x61 &&
   1868         pInfo->dataFormat[3]==0x6d &&
   1869         pInfo->formatVersion[0]==1
   1870     )) {
   1871         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
   1872                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   1873                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   1874                          pInfo->formatVersion[0]);
   1875         *pErrorCode=U_UNSUPPORTED_ERROR;
   1876         return 0;
   1877     }
   1878 
   1879     inBytes=(const uint8_t *)inData+headerSize;
   1880     outBytes=(uint8_t *)outData+headerSize;
   1881     if(length<0) {
   1882         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
   1883     } else {
   1884         length-=headerSize;
   1885         if( length<20 ||
   1886             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
   1887         ) {
   1888             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
   1889                              length);
   1890             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1891             return 0;
   1892         }
   1893     }
   1894 
   1895     if(length<0) {
   1896         /* preflighting: iterate through algorithmic ranges */
   1897         offset=algNamesOffset;
   1898         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
   1899         offset+=4;
   1900 
   1901         for(i=0; i<count; ++i) {
   1902             inRange=(const AlgorithmicRange *)(inBytes+offset);
   1903             offset+=ds->readUInt16(inRange->size);
   1904         }
   1905     } else {
   1906         /* swap data */
   1907         const uint16_t *p;
   1908         uint16_t *q, *temp;
   1909 
   1910         int16_t tokens[512];
   1911         uint16_t tokenCount;
   1912 
   1913         uint8_t map[256], trailMap[256];
   1914 
   1915         /* copy the data for inaccessible bytes */
   1916         if(inBytes!=outBytes) {
   1917             uprv_memcpy(outBytes, inBytes, length);
   1918         }
   1919 
   1920         /* the initial 4 offsets first */
   1921         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
   1922         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
   1923         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
   1924         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
   1925 
   1926         /*
   1927          * now the tokens table
   1928          * it needs to be permutated along with the compressed name strings
   1929          */
   1930         p=(const uint16_t *)(inBytes+16);
   1931         q=(uint16_t *)(outBytes+16);
   1932 
   1933         /* read and swap the tokenCount */
   1934         tokenCount=ds->readUInt16(*p);
   1935         ds->swapArray16(ds, p, 2, q, pErrorCode);
   1936         ++p;
   1937         ++q;
   1938 
   1939         /* read the first 512 tokens and make the token maps */
   1940         if(tokenCount<=512) {
   1941             count=tokenCount;
   1942         } else {
   1943             count=512;
   1944         }
   1945         for(i=0; i<count; ++i) {
   1946             tokens[i]=udata_readInt16(ds, p[i]);
   1947         }
   1948         for(; i<512; ++i) {
   1949             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
   1950         }
   1951         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
   1952         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
   1953         if(U_FAILURE(*pErrorCode)) {
   1954             return 0;
   1955         }
   1956 
   1957         /*
   1958          * swap and permutate the tokens
   1959          * go through a temporary array to support in-place swapping
   1960          */
   1961         temp=(uint16_t *)uprv_malloc(tokenCount*2);
   1962         if(temp==NULL) {
   1963             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
   1964                              tokenCount);
   1965             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1966             return 0;
   1967         }
   1968 
   1969         /* swap and permutate single-/lead-byte tokens */
   1970         for(i=0; i<tokenCount && i<256; ++i) {
   1971             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
   1972         }
   1973 
   1974         /* swap and permutate trail-byte tokens */
   1975         for(; i<tokenCount; ++i) {
   1976             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
   1977         }
   1978 
   1979         /* copy the result into the output and free the temporary array */
   1980         uprv_memcpy(q, temp, tokenCount*2);
   1981         uprv_free(temp);
   1982 
   1983         /*
   1984          * swap the token strings but not a possible padding byte after
   1985          * the terminating NUL of the last string
   1986          */
   1987         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
   1988                                     outBytes+tokenStringOffset, pErrorCode);
   1989         if(U_FAILURE(*pErrorCode)) {
   1990             udata_printError(ds, "uchar_swapNames(token strings) failed\n");
   1991             return 0;
   1992         }
   1993 
   1994         /* swap the group table */
   1995         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
   1996         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
   1997                            outBytes+groupsOffset, pErrorCode);
   1998 
   1999         /*
   2000          * swap the group strings
   2001          * swap the string bytes but not the nibble-encoded string lengths
   2002          */
   2003         if(ds->inCharset!=ds->outCharset) {
   2004             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
   2005 
   2006             const uint8_t *inStrings, *nextInStrings;
   2007             uint8_t *outStrings;
   2008 
   2009             uint8_t c;
   2010 
   2011             inStrings=inBytes+groupStringOffset;
   2012             outStrings=outBytes+groupStringOffset;
   2013 
   2014             stringsCount=algNamesOffset-groupStringOffset;
   2015 
   2016             /* iterate through string groups until only a few padding bytes are left */
   2017             while(stringsCount>32) {
   2018                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
   2019 
   2020                 /* move past the length bytes */
   2021                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
   2022                 outStrings+=nextInStrings-inStrings;
   2023                 inStrings=nextInStrings;
   2024 
   2025                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
   2026                 stringsCount-=count;
   2027 
   2028                 /* swap the string bytes using map[] and trailMap[] */
   2029                 while(count>0) {
   2030                     c=*inStrings++;
   2031                     *outStrings++=map[c];
   2032                     if(tokens[c]!=-2) {
   2033                         --count;
   2034                     } else {
   2035                         /* token lead byte: swap the trail byte, too */
   2036                         *outStrings++=trailMap[*inStrings++];
   2037                         count-=2;
   2038                     }
   2039                 }
   2040             }
   2041         }
   2042 
   2043         /* swap the algorithmic ranges */
   2044         offset=algNamesOffset;
   2045         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
   2046         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
   2047         offset+=4;
   2048 
   2049         for(i=0; i<count; ++i) {
   2050             if(offset>(uint32_t)length) {
   2051                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
   2052                                  length, i);
   2053                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   2054                 return 0;
   2055             }
   2056 
   2057             inRange=(const AlgorithmicRange *)(inBytes+offset);
   2058             outRange=(AlgorithmicRange *)(outBytes+offset);
   2059             offset+=ds->readUInt16(inRange->size);
   2060 
   2061             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
   2062             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
   2063             switch(inRange->type) {
   2064             case 0:
   2065                 /* swap prefix string */
   2066                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
   2067                                     outRange+1, pErrorCode);
   2068                 if(U_FAILURE(*pErrorCode)) {
   2069                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
   2070                                      i);
   2071                     return 0;
   2072                 }
   2073                 break;
   2074             case 1:
   2075                 {
   2076                     /* swap factors and the prefix and factor strings */
   2077                     uint32_t factorsCount;
   2078 
   2079                     factorsCount=inRange->variant;
   2080                     p=(const uint16_t *)(inRange+1);
   2081                     q=(uint16_t *)(outRange+1);
   2082                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
   2083 
   2084                     /* swap the strings, up to the last terminating NUL */
   2085                     p+=factorsCount;
   2086                     q+=factorsCount;
   2087                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
   2088                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
   2089                         --stringsCount;
   2090                     }
   2091                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
   2092                 }
   2093                 break;
   2094             default:
   2095                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
   2096                                  inRange->type, i);
   2097                 *pErrorCode=U_UNSUPPORTED_ERROR;
   2098                 return 0;
   2099             }
   2100         }
   2101     }
   2102 
   2103     return headerSize+(int32_t)offset;
   2104 }
   2105 
   2106 /*
   2107  * Hey, Emacs, please set the following:
   2108  *
   2109  * Local Variables:
   2110  * indent-tabs-mode: nil
   2111  * End:
   2112  *
   2113  */
   2114