Home | History | Annotate | Download | only in impl
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 1996-2014, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 
     10 package com.ibm.icu.impl;
     11 
     12 import java.io.IOException;
     13 import java.nio.ByteBuffer;
     14 import java.util.Locale;
     15 import java.util.MissingResourceException;
     16 
     17 import com.ibm.icu.lang.UCharacter;
     18 import com.ibm.icu.lang.UCharacterCategory;
     19 import com.ibm.icu.text.UTF16;
     20 import com.ibm.icu.text.UnicodeSet;
     21 
     22 /**
     23 * Internal class to manage character names.
     24 * Since data for names are stored
     25 * in an array of char, by default indexes used in this class is refering to
     26 * a 2 byte count, unless otherwise stated. Cases where the index is refering
     27 * to a byte count, the index is halved and depending on whether the index is
     28 * even or odd, the MSB or LSB of the result char at the halved index is
     29 * returned. For indexes to an array of int, the index is multiplied by 2,
     30 * result char at the multiplied index and its following char is returned as an
     31 * int.
     32 * <a href=../lang/UCharacter.html>UCharacter</a> acts as a public facade for this class
     33 * Note : 0 - 0x1F are control characters without names in Unicode 3.0
     34 * @author Syn Wee Quek
     35 * @since nov0700
     36 */
     37 
     38 public final class UCharacterName
     39 {
     40     // public data members ----------------------------------------------
     41 
     42     /*
     43      * public singleton instance
     44      */
     45     public static final UCharacterName INSTANCE;
     46 
     47     static {
     48         try {
     49             INSTANCE = new UCharacterName();
     50         } catch (IOException e) {
     51             ///CLOVER:OFF
     52             throw new MissingResourceException("Could not construct UCharacterName. Missing unames.icu","","");
     53             ///CLOVER:ON
     54         }
     55     }
     56 
     57     /**
     58     * Number of lines per group
     59     * 1 << GROUP_SHIFT_
     60     */
     61     public static final int LINES_PER_GROUP_ = 1 << 5;
     62     /**
     63      * Maximum number of groups
     64      */
     65     public int m_groupcount_ = 0;
     66 
     67     // public methods ---------------------------------------------------
     68 
     69     /**
     70     * Retrieve the name of a Unicode code point.
     71     * Depending on <code>choice</code>, the character name written into the
     72     * buffer is the "modern" name or the name that was defined in Unicode
     73     * version 1.0.
     74     * The name contains only "invariant" characters
     75     * like A-Z, 0-9, space, and '-'.
     76     *
     77     * @param ch the code point for which to get the name.
     78     * @param choice Selector for which name to get.
     79     * @return if code point is above 0x1fff, null is returned
     80     */
     81     public String getName(int ch, int choice)
     82     {
     83         if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
     84             choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
     85             return null;
     86         }
     87 
     88         String result = null;
     89 
     90         result = getAlgName(ch, choice);
     91 
     92         // getting normal character name
     93         if (result == null || result.length() == 0) {
     94             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
     95                 result = getExtendedName(ch);
     96             } else {
     97                 result = getGroupName(ch, choice);
     98             }
     99         }
    100 
    101         return result;
    102     }
    103 
    104     /**
    105     * Find a character by its name and return its code point value
    106     * @param choice selector to indicate if argument name is a Unicode 1.0
    107     *        or the most current version
    108     * @param name the name to search for
    109     * @return code point
    110     */
    111     public int getCharFromName(int choice, String name)
    112     {
    113         // checks for illegal arguments
    114         if (choice >= UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT ||
    115             name == null || name.length() == 0) {
    116             return -1;
    117         }
    118 
    119         // try extended names first
    120         int result = getExtendedChar(name.toLowerCase(Locale.ENGLISH), choice);
    121         if (result >= -1) {
    122             return result;
    123         }
    124 
    125         String upperCaseName = name.toUpperCase(Locale.ENGLISH);
    126         // try algorithmic names first, if fails then try group names
    127         // int result = getAlgorithmChar(choice, uppercasename);
    128 
    129         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
    130             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
    131         ) {
    132             int count = 0;
    133             if (m_algorithm_ != null) {
    134                 count = m_algorithm_.length;
    135             }
    136             for (count --; count >= 0; count --) {
    137                 result = m_algorithm_[count].getChar(upperCaseName);
    138                 if (result >= 0) {
    139                     return result;
    140                 }
    141             }
    142         }
    143 
    144         if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
    145             result = getGroupChar(upperCaseName,
    146                                   UCharacterNameChoice.UNICODE_CHAR_NAME);
    147             if (result == -1) {
    148                 result = getGroupChar(upperCaseName,
    149                                       UCharacterNameChoice.CHAR_NAME_ALIAS);
    150             }
    151         }
    152         else {
    153             result = getGroupChar(upperCaseName, choice);
    154         }
    155         return result;
    156     }
    157 
    158     // these are all UCharacterNameIterator use methods -------------------
    159 
    160     /**
    161     * Reads a block of compressed lengths of 32 strings and expands them into
    162     * offsets and lengths for each string. Lengths are stored with a
    163     * variable-width encoding in consecutive nibbles:
    164     * If a nibble<0xc, then it is the length itself (0 = empty string).
    165     * If a nibble>=0xc, then it forms a length value with the following
    166     * nibble.
    167     * The offsets and lengths arrays must be at least 33 (one more) long
    168     * because there is no check here at the end if the last nibble is still
    169     * used.
    170     * @param index of group string object in array
    171     * @param offsets array to store the value of the string offsets
    172     * @param lengths array to store the value of the string length
    173     * @return next index of the data string immediately after the lengths
    174     *         in terms of byte address
    175     */
    176     public int getGroupLengths(int index, char offsets[], char lengths[])
    177     {
    178         char length = 0xffff;
    179         byte b = 0,
    180             n = 0;
    181         int shift;
    182         index = index * m_groupsize_; // byte count offsets of group strings
    183         int stringoffset = UCharacterUtility.toInt(
    184                                  m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
    185                                  m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
    186 
    187         offsets[0] = 0;
    188 
    189         // all 32 lengths must be read to get the offset of the first group
    190         // string
    191         for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++) {
    192             b = m_groupstring_[stringoffset];
    193             shift = 4;
    194 
    195             while (shift >= 0) {
    196                 // getting nibble
    197                 n = (byte)((b >> shift) & 0x0F);
    198                 if (length == 0xffff && n > SINGLE_NIBBLE_MAX_) {
    199                     length = (char)((n - 12) << 4);
    200                 }
    201                 else {
    202                     if (length != 0xffff) {
    203                        lengths[i] = (char)((length | n) + 12);
    204                     }
    205                     else {
    206                        lengths[i] = (char)n;
    207                     }
    208 
    209                     if (i < LINES_PER_GROUP_) {
    210                        offsets[i + 1] = (char)(offsets[i] + lengths[i]);
    211                     }
    212 
    213                     length = 0xffff;
    214                     i ++;
    215                 }
    216 
    217                 shift -= 4;
    218             }
    219         }
    220         return stringoffset;
    221     }
    222 
    223     /**
    224     * Gets the name of the argument group index.
    225     * UnicodeData.txt uses ';' as a field separator, so no field can contain
    226     * ';' as part of its contents. In unames.icu, it is marked as
    227     * token[';'] == -1 only if the semicolon is used in the data file - which
    228     * is iff we have Unicode 1.0 names or ISO comments or aliases.
    229     * So, it will be token[';'] == -1 if we store U1.0 names/ISO comments/aliases
    230     * although we know that it will never be part of a name.
    231     * Equivalent to ICU4C's expandName.
    232     * @param index of the group name string in byte count
    233     * @param length of the group name string
    234     * @param choice of Unicode 1.0 name or the most current name
    235     * @return name of the group
    236     */
    237     public String getGroupName(int index, int length, int choice)
    238     {
    239         if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
    240             choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
    241         ) {
    242             if (';' >= m_tokentable_.length || m_tokentable_[';'] == 0xFFFF) {
    243                 /*
    244                  * skip the modern name if it is not requested _and_
    245                  * if the semicolon byte value is a character, not a token number
    246                  */
    247                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
    248                 do {
    249                     int oldindex = index;
    250                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
    251                                                        index, length, (byte)';');
    252                     length -= (index - oldindex);
    253                 } while(--fieldIndex>0);
    254             }
    255             else {
    256                 // the semicolon byte is a token number, therefore only modern
    257                 // names are stored in unames.dat and there is no such
    258                 // requested alternate name here
    259                 length = 0;
    260             }
    261         }
    262 
    263         synchronized (m_utilStringBuffer_) {
    264             m_utilStringBuffer_.setLength(0);
    265             byte b;
    266             char token;
    267             for (int i = 0; i < length;) {
    268                 b = m_groupstring_[index + i];
    269                 i ++;
    270 
    271                 if (b >= m_tokentable_.length) {
    272                     if (b == ';') {
    273                         break;
    274                     }
    275                     m_utilStringBuffer_.append(b); // implicit letter
    276                 }
    277                 else {
    278                     token = m_tokentable_[b & 0x00ff];
    279                     if (token == 0xFFFE) {
    280                         // this is a lead byte for a double-byte token
    281                         token = m_tokentable_[b << 8 |
    282                                           (m_groupstring_[index + i] & 0x00ff)];
    283                         i ++;
    284                     }
    285                     if (token == 0xFFFF) {
    286                         if (b == ';') {
    287                             // skip the semicolon if we are seeking extended
    288                             // names and there was no 2.0 name but there
    289                             // is a 1.0 name.
    290                             if (m_utilStringBuffer_.length() == 0 && choice ==
    291                                    UCharacterNameChoice.EXTENDED_CHAR_NAME) {
    292                                 continue;
    293                             }
    294                             break;
    295                         }
    296                         // explicit letter
    297                         m_utilStringBuffer_.append((char)(b & 0x00ff));
    298                     }
    299                     else { // write token word
    300                         UCharacterUtility.getNullTermByteSubString(
    301                                 m_utilStringBuffer_, m_tokenstring_, token);
    302                     }
    303                 }
    304             }
    305 
    306             if (m_utilStringBuffer_.length() > 0) {
    307                 return m_utilStringBuffer_.toString();
    308             }
    309         }
    310         return null;
    311     }
    312 
    313     /**
    314     * Retrieves the extended name
    315     */
    316     public String getExtendedName(int ch)
    317     {
    318         String result = getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
    319         if (result == null) {
    320             // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
    321             result = getExtendedOr10Name(ch);
    322         }
    323         return result;
    324     }
    325 
    326     /**
    327      * Gets the group index for the codepoint, or the group before it.
    328      * @param codepoint The codepoint index.
    329      * @return group index containing codepoint or the group before it.
    330      */
    331     public int getGroup(int codepoint)
    332     {
    333         int endGroup = m_groupcount_;
    334         int msb      = getCodepointMSB(codepoint);
    335         int result   = 0;
    336         // binary search for the group of names that contains the one for
    337         // code
    338         // find the group that contains codepoint, or the highest before it
    339         while (result < endGroup - 1) {
    340             int gindex = (result + endGroup) >> 1;
    341             if (msb < getGroupMSB(gindex)) {
    342                 endGroup = gindex;
    343             }
    344             else {
    345                 result = gindex;
    346             }
    347         }
    348         return result;
    349     }
    350 
    351     /**
    352      * Gets the extended and 1.0 name when the most current unicode names
    353      * fail
    354      * @param ch codepoint
    355      * @return name of codepoint extended or 1.0
    356      */
    357     public String getExtendedOr10Name(int ch)
    358     {
    359         String result = null;
    360         // TODO: Return Name_Alias/control names for control codes 0..1F & 7F..9F.
    361         if (result == null) {
    362             int type = getType(ch);
    363             // Return unknown if the table of names above is not up to
    364             // date.
    365             if (type >= TYPE_NAMES_.length) {
    366                 result = UNKNOWN_TYPE_NAME_;
    367             }
    368             else {
    369                 result = TYPE_NAMES_[type];
    370             }
    371             synchronized (m_utilStringBuffer_) {
    372                 m_utilStringBuffer_.setLength(0);
    373                 m_utilStringBuffer_.append('<');
    374                 m_utilStringBuffer_.append(result);
    375                 m_utilStringBuffer_.append('-');
    376                 String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
    377                 int zeros = 4 - chStr.length();
    378                 while (zeros > 0) {
    379                     m_utilStringBuffer_.append('0');
    380                     zeros --;
    381                 }
    382                 m_utilStringBuffer_.append(chStr);
    383                 m_utilStringBuffer_.append('>');
    384                 result = m_utilStringBuffer_.toString();
    385             }
    386         }
    387         return result;
    388     }
    389 
    390     /**
    391      * Gets the MSB from the group index
    392      * @param gindex group index
    393      * @return the MSB of the group if gindex is valid, -1 otherwise
    394      */
    395     public int getGroupMSB(int gindex)
    396     {
    397         if (gindex >= m_groupcount_) {
    398             return -1;
    399         }
    400         return m_groupinfo_[gindex * m_groupsize_];
    401     }
    402 
    403     /**
    404      * Gets the MSB of the codepoint
    405      * @param codepoint The codepoint value.
    406      * @return the MSB of the codepoint
    407      */
    408     public static int getCodepointMSB(int codepoint)
    409     {
    410         return codepoint >> GROUP_SHIFT_;
    411     }
    412 
    413     /**
    414      * Gets the maximum codepoint + 1 of the group
    415      * @param msb most significant byte of the group
    416      * @return limit codepoint of the group
    417      */
    418     public static int getGroupLimit(int msb)
    419     {
    420         return (msb << GROUP_SHIFT_) + LINES_PER_GROUP_;
    421     }
    422 
    423     /**
    424      * Gets the minimum codepoint of the group
    425      * @param msb most significant byte of the group
    426      * @return minimum codepoint of the group
    427      */
    428     public static int getGroupMin(int msb)
    429     {
    430         return msb << GROUP_SHIFT_;
    431     }
    432 
    433     /**
    434      * Gets the offset to a group
    435      * @param codepoint The codepoint value.
    436      * @return offset to a group
    437      */
    438     public static int getGroupOffset(int codepoint)
    439     {
    440         return codepoint & GROUP_MASK_;
    441     }
    442 
    443     /**
    444      * Gets the minimum codepoint of a group
    445      * @param codepoint The codepoint value.
    446      * @return minimum codepoint in the group which codepoint belongs to
    447      */
    448     ///CLOVER:OFF
    449     public static int getGroupMinFromCodepoint(int codepoint)
    450     {
    451         return codepoint & ~GROUP_MASK_;
    452     }
    453     ///CLOVER:ON
    454 
    455     /**
    456      * Get the Algorithm range length
    457      * @return Algorithm range length
    458      */
    459     public int getAlgorithmLength()
    460     {
    461         return m_algorithm_.length;
    462     }
    463 
    464     /**
    465      * Gets the start of the range
    466      * @param index algorithm index
    467      * @return algorithm range start
    468      */
    469     public int getAlgorithmStart(int index)
    470     {
    471         return m_algorithm_[index].m_rangestart_;
    472     }
    473 
    474     /**
    475      * Gets the end of the range
    476      * @param index algorithm index
    477      * @return algorithm range end
    478      */
    479     public int getAlgorithmEnd(int index)
    480     {
    481         return m_algorithm_[index].m_rangeend_;
    482     }
    483 
    484     /**
    485      * Gets the Algorithmic name of the codepoint
    486      * @param index algorithmic range index
    487      * @param codepoint The codepoint value.
    488      * @return algorithmic name of codepoint
    489      */
    490     public String getAlgorithmName(int index, int codepoint)
    491     {
    492         String result = null;
    493         synchronized (m_utilStringBuffer_) {
    494             m_utilStringBuffer_.setLength(0);
    495             m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
    496             result = m_utilStringBuffer_.toString();
    497         }
    498         return result;
    499     }
    500 
    501     /**
    502     * Gets the group name of the character
    503     * @param ch character to get the group name
    504     * @param choice name choice selector to choose a unicode 1.0 or newer name
    505     */
    506     public synchronized String getGroupName(int ch, int choice)
    507     {
    508         // gets the msb
    509         int msb   = getCodepointMSB(ch);
    510         int group = getGroup(ch);
    511 
    512         // return this if it is an exact match
    513         if (msb == m_groupinfo_[group * m_groupsize_]) {
    514             int index = getGroupLengths(group, m_groupoffsets_,
    515                                         m_grouplengths_);
    516             int offset = ch & GROUP_MASK_;
    517             return getGroupName(index + m_groupoffsets_[offset],
    518                                 m_grouplengths_[offset], choice);
    519         }
    520 
    521         return null;
    522     }
    523 
    524     // these are transliterator use methods ---------------------------------
    525 
    526     /**
    527      * Gets the maximum length of any codepoint name.
    528      * Equivalent to uprv_getMaxCharNameLength.
    529      * @return the maximum length of any codepoint name
    530      */
    531     public int getMaxCharNameLength()
    532     {
    533         if (initNameSetsLengths()) {
    534             return m_maxNameLength_;
    535         }
    536         else {
    537             return 0;
    538         }
    539     }
    540 
    541     /**
    542      * Gets the maximum length of any iso comments.
    543      * Equivalent to uprv_getMaxISOCommentLength.
    544      * @return the maximum length of any codepoint name
    545      */
    546     ///CLOVER:OFF
    547     public int getMaxISOCommentLength()
    548     {
    549         if (initNameSetsLengths()) {
    550             return m_maxISOCommentLength_;
    551         }
    552         else {
    553             return 0;
    554         }
    555     }
    556     ///CLOVER:ON
    557 
    558     /**
    559      * Fills set with characters that are used in Unicode character names.
    560      * Equivalent to uprv_getCharNameCharacters.
    561      * @param set USet to receive characters. Existing contents are deleted.
    562      */
    563     public void getCharNameCharacters(UnicodeSet set)
    564     {
    565         convert(m_nameSet_, set);
    566     }
    567 
    568     /**
    569      * Fills set with characters that are used in Unicode character names.
    570      * Equivalent to uprv_getISOCommentCharacters.
    571      * @param set USet to receive characters. Existing contents are deleted.
    572      */
    573     ///CLOVER:OFF
    574     public void getISOCommentCharacters(UnicodeSet set)
    575     {
    576         convert(m_ISOCommentSet_, set);
    577     }
    578     ///CLOVER:ON
    579 
    580     // package private inner class --------------------------------------
    581 
    582     /**
    583     * Algorithmic name class
    584     */
    585     static final class AlgorithmName
    586     {
    587         // package private data members ----------------------------------
    588 
    589         /**
    590         * Constant type value of the different AlgorithmName
    591         */
    592         static final int TYPE_0_ = 0;
    593         static final int TYPE_1_ = 1;
    594 
    595         // package private constructors ----------------------------------
    596 
    597         /**
    598         * Constructor
    599         */
    600         AlgorithmName()
    601         {
    602         }
    603 
    604         // package private methods ---------------------------------------
    605 
    606         /**
    607         * Sets the information for accessing the algorithmic names
    608         * @param rangestart starting code point that lies within this name group
    609         * @param rangeend end code point that lies within this name group
    610         * @param type algorithm type. There's 2 kinds of algorithmic type. First
    611         *        which uses code point as part of its name and the other uses
    612         *        variant postfix strings
    613         * @param variant algorithmic variant
    614         * @return true if values are valid
    615         */
    616         boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
    617         {
    618             if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
    619                 && rangeend <= UCharacter.MAX_VALUE &&
    620                 (type == TYPE_0_ || type == TYPE_1_)) {
    621                 m_rangestart_ = rangestart;
    622                 m_rangeend_ = rangeend;
    623                 m_type_ = type;
    624                 m_variant_ = variant;
    625                 return true;
    626             }
    627             return false;
    628         }
    629 
    630         /**
    631         * Sets the factor data
    632         * @param factor Array of factor
    633         * @return true if factors are valid
    634         */
    635         boolean setFactor(char factor[])
    636         {
    637             if (factor.length == m_variant_) {
    638                 m_factor_ = factor;
    639                 return true;
    640             }
    641             return false;
    642         }
    643 
    644         /**
    645         * Sets the name prefix
    646         * @param prefix
    647         * @return true if prefix is set
    648         */
    649         boolean setPrefix(String prefix)
    650         {
    651             if (prefix != null && prefix.length() > 0) {
    652                 m_prefix_ = prefix;
    653                 return true;
    654             }
    655             return false;
    656         }
    657 
    658         /**
    659         * Sets the variant factorized name data
    660         * @param string variant factorized name data
    661         * @return true if values are set
    662         */
    663         boolean setFactorString(byte string[])
    664         {
    665             // factor and variant string can be empty for things like
    666             // hanggul code points
    667             m_factorstring_ = string;
    668             return true;
    669         }
    670 
    671         /**
    672         * Checks if code point lies in Algorithm object at index
    673         * @param ch code point
    674         */
    675         boolean contains(int ch)
    676         {
    677             return m_rangestart_ <= ch && ch <= m_rangeend_;
    678         }
    679 
    680         /**
    681         * Appends algorithm name of code point into StringBuffer.
    682         * Note this method does not check for validity of code point in Algorithm,
    683         * result is undefined if code point does not belong in Algorithm.
    684         * @param ch code point
    685         * @param str StringBuffer to append to
    686         */
    687         void appendName(int ch, StringBuffer str)
    688         {
    689             str.append(m_prefix_);
    690             switch (m_type_)
    691             {
    692                 case TYPE_0_:
    693                     // prefix followed by hex digits indicating variants
    694                 str.append(Utility.hex(ch,m_variant_));
    695                     break;
    696                 case TYPE_1_:
    697                     // prefix followed by factorized-elements
    698                     int offset = ch - m_rangestart_;
    699                     int indexes[] = m_utilIntBuffer_;
    700                     int factor;
    701 
    702                     // write elements according to the factors
    703                     // the factorized elements are determined by modulo
    704                     // arithmetic
    705                     synchronized (m_utilIntBuffer_) {
    706                         for (int i = m_variant_ - 1; i > 0; i --)
    707                         {
    708                             factor = m_factor_[i] & 0x00FF;
    709                             indexes[i] = offset % factor;
    710                             offset /= factor;
    711                         }
    712 
    713                         // we don't need to calculate the last modulus because
    714                         // start <= code <= end guarantees here that
    715                         // code <= factors[0]
    716                         indexes[0] = offset;
    717 
    718                         // joining up the factorized strings
    719                         str.append(getFactorString(indexes, m_variant_));
    720                     }
    721                     break;
    722             }
    723         }
    724 
    725         /**
    726         * Gets the character for the argument algorithmic name
    727         * @return the algorithmic char or -1 otherwise.
    728         */
    729         int getChar(String name)
    730         {
    731             int prefixlen = m_prefix_.length();
    732             if (name.length() < prefixlen ||
    733                 !m_prefix_.equals(name.substring(0, prefixlen))) {
    734                 return -1;
    735             }
    736 
    737             switch (m_type_)
    738             {
    739                 case TYPE_0_ :
    740                 try
    741                 {
    742                     int result = Integer.parseInt(name.substring(prefixlen),
    743                                                   16);
    744                     // does it fit into the range?
    745                     if (m_rangestart_ <= result && result <= m_rangeend_) {
    746                         return result;
    747                     }
    748                 }
    749                 catch (NumberFormatException e)
    750                 {
    751                     return -1;
    752                 }
    753                 break;
    754                 case TYPE_1_ :
    755                     // repetitative suffix name comparison done here
    756                     // offset is the character code - start
    757                     for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
    758                     {
    759                         int offset = ch - m_rangestart_;
    760                         int indexes[] = m_utilIntBuffer_;
    761                         int factor;
    762 
    763                         // write elements according to the factors
    764                         // the factorized elements are determined by modulo
    765                         // arithmetic
    766                         synchronized (m_utilIntBuffer_) {
    767                             for (int i = m_variant_ - 1; i > 0; i --)
    768                             {
    769                                 factor = m_factor_[i] & 0x00FF;
    770                                 indexes[i] = offset % factor;
    771                                 offset /= factor;
    772                             }
    773 
    774                             // we don't need to calculate the last modulus
    775                             // because start <= code <= end guarantees here that
    776                             // code <= factors[0]
    777                             indexes[0] = offset;
    778 
    779                             // joining up the factorized strings
    780                             if (compareFactorString(indexes, m_variant_, name,
    781                                                     prefixlen)) {
    782                                 return ch;
    783                             }
    784                         }
    785                     }
    786             }
    787 
    788             return -1;
    789         }
    790 
    791         /**
    792          * Adds all chars in the set of algorithmic names into the set.
    793          * Equivalent to part of calcAlgNameSetsLengths.
    794          * @param set int set to add the chars of the algorithm names into
    795          * @param maxlength maximum length to compare to
    796          * @return the length that is either maxlength of the length of this
    797          *         algorithm name if it is longer than maxlength
    798          */
    799         int add(int set[], int maxlength)
    800         {
    801             // prefix length
    802             int length = UCharacterName.add(set, m_prefix_);
    803             switch (m_type_) {
    804                 case TYPE_0_ : {
    805                     // name = prefix + (range->variant times) hex-digits
    806                     // prefix
    807                     length += m_variant_;
    808                     /* synwee to check
    809                      * addString(set, (const char *)(range + 1))
    810                                        + range->variant;*/
    811                     break;
    812                 }
    813                 case TYPE_1_ : {
    814                     // name = prefix factorized-elements
    815                     // get the set and maximum factor suffix length for each
    816                     // factor
    817                     for (int i = m_variant_ - 1; i > 0; i --)
    818                     {
    819                         int maxfactorlength = 0;
    820                         int count = 0;
    821                         for (int factor = m_factor_[i]; factor > 0; -- factor) {
    822                             synchronized (m_utilStringBuffer_) {
    823                                 m_utilStringBuffer_.setLength(0);
    824                                 count
    825                                   = UCharacterUtility.getNullTermByteSubString(
    826                                                 m_utilStringBuffer_,
    827                                                 m_factorstring_, count);
    828                                 UCharacterName.add(set, m_utilStringBuffer_);
    829                                 if (m_utilStringBuffer_.length()
    830                                                             > maxfactorlength)
    831                                 {
    832                                     maxfactorlength
    833                                                 = m_utilStringBuffer_.length();
    834                                 }
    835                             }
    836                         }
    837                         length += maxfactorlength;
    838                     }
    839                 }
    840             }
    841             if (length > maxlength) {
    842                 return length;
    843             }
    844             return maxlength;
    845         }
    846 
    847         // private data members ------------------------------------------
    848 
    849         /**
    850         * Algorithmic data information
    851         */
    852         private int m_rangestart_;
    853         private int m_rangeend_;
    854         private byte m_type_;
    855         private byte m_variant_;
    856         private char m_factor_[];
    857         private String m_prefix_;
    858         private byte m_factorstring_[];
    859         /**
    860          * Utility StringBuffer
    861          */
    862         private StringBuffer m_utilStringBuffer_ = new StringBuffer();
    863         /**
    864          * Utility int buffer
    865          */
    866         private int m_utilIntBuffer_[] = new int[256];
    867 
    868         // private methods -----------------------------------------------
    869 
    870         /**
    871         * Gets the indexth string in each of the argument factor block
    872         * @param index array with each index corresponding to each factor block
    873         * @param length length of the array index
    874         * @return the combined string of the array of indexth factor string in
    875         *         factor block
    876         */
    877         private String getFactorString(int index[], int length)
    878         {
    879             int size = m_factor_.length;
    880             if (index == null || length != size) {
    881                 return null;
    882             }
    883 
    884             synchronized (m_utilStringBuffer_) {
    885                 m_utilStringBuffer_.setLength(0);
    886                 int count = 0;
    887                 int factor;
    888                 size --;
    889                 for (int i = 0; i <= size; i ++) {
    890                     factor = m_factor_[i];
    891                     count = UCharacterUtility.skipNullTermByteSubString(
    892                                              m_factorstring_, count, index[i]);
    893                     count = UCharacterUtility.getNullTermByteSubString(
    894                                           m_utilStringBuffer_, m_factorstring_,
    895                                           count);
    896                     if (i != size) {
    897                         count = UCharacterUtility.skipNullTermByteSubString(
    898                                                        m_factorstring_, count,
    899                                                        factor - index[i] - 1);
    900                     }
    901                 }
    902                 return m_utilStringBuffer_.toString();
    903             }
    904         }
    905 
    906         /**
    907         * Compares the indexth string in each of the argument factor block with
    908         * the argument string
    909         * @param index array with each index corresponding to each factor block
    910         * @param length index array length
    911         * @param str string to compare with
    912         * @param offset of str to start comparison
    913         * @return true if string matches
    914         */
    915         private boolean compareFactorString(int index[], int length, String str,
    916                                             int offset)
    917         {
    918             int size = m_factor_.length;
    919             if (index == null || length != size)
    920                 return false;
    921 
    922             int count = 0;
    923             int strcount = offset;
    924             int factor;
    925             size --;
    926             for (int i = 0; i <= size; i ++)
    927             {
    928                 factor = m_factor_[i];
    929                 count = UCharacterUtility.skipNullTermByteSubString(
    930                                           m_factorstring_, count, index[i]);
    931                 strcount = UCharacterUtility.compareNullTermByteSubString(str,
    932                                           m_factorstring_, strcount, count);
    933                 if (strcount < 0) {
    934                     return false;
    935                 }
    936 
    937                 if (i != size) {
    938                     count = UCharacterUtility.skipNullTermByteSubString(
    939                                   m_factorstring_, count, factor - index[i]);
    940                 }
    941             }
    942             if (strcount != str.length()) {
    943                 return false;
    944             }
    945             return true;
    946         }
    947     }
    948 
    949     // package private data members --------------------------------------
    950 
    951     /**
    952      * Size of each groups
    953      */
    954     int m_groupsize_ = 0;
    955 
    956     // package private methods --------------------------------------------
    957 
    958     /**
    959     * Sets the token data
    960     * @param token array of tokens
    961     * @param tokenstring array of string values of the tokens
    962     * @return false if there is a data error
    963     */
    964     boolean setToken(char token[], byte tokenstring[])
    965     {
    966         if (token != null && tokenstring != null && token.length > 0 &&
    967             tokenstring.length > 0) {
    968             m_tokentable_ = token;
    969             m_tokenstring_ = tokenstring;
    970             return true;
    971         }
    972         return false;
    973     }
    974 
    975     /**
    976     * Set the algorithm name information array
    977     * @param alg Algorithm information array
    978     * @return true if the group string offset has been set correctly
    979     */
    980     boolean setAlgorithm(AlgorithmName alg[])
    981     {
    982         if (alg != null && alg.length != 0) {
    983             m_algorithm_ = alg;
    984             return true;
    985         }
    986         return false;
    987     }
    988 
    989     /**
    990     * Sets the number of group and size of each group in number of char
    991     * @param count number of groups
    992     * @param size size of group in char
    993     * @return true if group size is set correctly
    994     */
    995     boolean setGroupCountSize(int count, int size)
    996     {
    997         if (count <= 0 || size <= 0) {
    998             return false;
    999         }
   1000         m_groupcount_ = count;
   1001         m_groupsize_ = size;
   1002         return true;
   1003     }
   1004 
   1005     /**
   1006     * Sets the group name data
   1007     * @param group index information array
   1008     * @param groupstring name information array
   1009     * @return false if there is a data error
   1010     */
   1011     boolean setGroup(char group[], byte groupstring[])
   1012     {
   1013         if (group != null && groupstring != null && group.length > 0 &&
   1014             groupstring.length > 0) {
   1015             m_groupinfo_ = group;
   1016             m_groupstring_ = groupstring;
   1017             return true;
   1018         }
   1019         return false;
   1020     }
   1021 
   1022     // private data members ----------------------------------------------
   1023 
   1024     /**
   1025     * Data used in unames.icu
   1026     */
   1027     private char m_tokentable_[];
   1028     private byte m_tokenstring_[];
   1029     private char m_groupinfo_[];
   1030     private byte m_groupstring_[];
   1031     private AlgorithmName m_algorithm_[];
   1032 
   1033     /**
   1034     * Group use.  Note - access must be synchronized.
   1035     */
   1036     private char m_groupoffsets_[] = new char[LINES_PER_GROUP_ + 1];
   1037     private char m_grouplengths_[] = new char[LINES_PER_GROUP_ + 1];
   1038 
   1039     /**
   1040     * Default name of the name datafile
   1041     */
   1042     private static final String FILE_NAME_ = "unames.icu";
   1043     /**
   1044     * Shift count to retrieve group information
   1045     */
   1046     private static final int GROUP_SHIFT_ = 5;
   1047     /**
   1048     * Mask to retrieve the offset for a particular character within a group
   1049     */
   1050     private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
   1051 
   1052     /**
   1053     * Position of offsethigh in group information array
   1054     */
   1055     private static final int OFFSET_HIGH_OFFSET_ = 1;
   1056 
   1057     /**
   1058     * Position of offsetlow in group information array
   1059     */
   1060     private static final int OFFSET_LOW_OFFSET_ = 2;
   1061     /**
   1062     * Double nibble indicator, any nibble > this number has to be combined
   1063     * with its following nibble
   1064     */
   1065     private static final int SINGLE_NIBBLE_MAX_ = 11;
   1066 
   1067     /*
   1068      * Maximum length of character names (regular & 1.0).
   1069      */
   1070     //private static int MAX_NAME_LENGTH_ = 0;
   1071     /*
   1072      * Maximum length of ISO comments.
   1073      */
   1074     //private static int MAX_ISO_COMMENT_LENGTH_ = 0;
   1075 
   1076     /**
   1077      * Set of chars used in character names (regular & 1.0).
   1078      * Chars are platform-dependent (can be EBCDIC).
   1079      */
   1080     private int m_nameSet_[] = new int[8];
   1081     /**
   1082      * Set of chars used in ISO comments. (regular & 1.0).
   1083      * Chars are platform-dependent (can be EBCDIC).
   1084      */
   1085     private int m_ISOCommentSet_[] = new int[8];
   1086     /**
   1087      * Utility StringBuffer
   1088      */
   1089     private StringBuffer m_utilStringBuffer_ = new StringBuffer();
   1090     /**
   1091      * Utility int buffer
   1092      */
   1093     private int m_utilIntBuffer_[] = new int[2];
   1094     /**
   1095      * Maximum ISO comment length
   1096      */
   1097     private int m_maxISOCommentLength_;
   1098     /**
   1099      * Maximum name length
   1100      */
   1101     private int m_maxNameLength_;
   1102     /**
   1103      * Type names used for extended names
   1104      */
   1105     private static final String TYPE_NAMES_[] = {"unassigned",
   1106                                                  "uppercase letter",
   1107                                                  "lowercase letter",
   1108                                                  "titlecase letter",
   1109                                                  "modifier letter",
   1110                                                  "other letter",
   1111                                                  "non spacing mark",
   1112                                                  "enclosing mark",
   1113                                                  "combining spacing mark",
   1114                                                  "decimal digit number",
   1115                                                  "letter number",
   1116                                                  "other number",
   1117                                                  "space separator",
   1118                                                  "line separator",
   1119                                                  "paragraph separator",
   1120                                                  "control",
   1121                                                  "format",
   1122                                                  "private use area",
   1123                                                  "surrogate",
   1124                                                  "dash punctuation",
   1125                                                  "start punctuation",
   1126                                                  "end punctuation",
   1127                                                  "connector punctuation",
   1128                                                  "other punctuation",
   1129                                                  "math symbol",
   1130                                                  "currency symbol",
   1131                                                  "modifier symbol",
   1132                                                  "other symbol",
   1133                                                  "initial punctuation",
   1134                                                  "final punctuation",
   1135                                                  "noncharacter",
   1136                                                  "lead surrogate",
   1137                                                  "trail surrogate"};
   1138     /**
   1139      * Unknown type name
   1140      */
   1141     private static final String UNKNOWN_TYPE_NAME_ = "unknown";
   1142     /**
   1143      * Not a character type
   1144      */
   1145     private static final int NON_CHARACTER_
   1146                                     = UCharacterCategory.CHAR_CATEGORY_COUNT;
   1147     /**
   1148     * Lead surrogate type
   1149     */
   1150     private static final int LEAD_SURROGATE_
   1151                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 1;
   1152     /**
   1153     * Trail surrogate type
   1154     */
   1155     private static final int TRAIL_SURROGATE_
   1156                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 2;
   1157     /**
   1158     * Extended category count
   1159     */
   1160     static final int EXTENDED_CATEGORY_
   1161                                   = UCharacterCategory.CHAR_CATEGORY_COUNT + 3;
   1162 
   1163     // private constructor ------------------------------------------------
   1164 
   1165     /**
   1166     * <p>Protected constructor for use in UCharacter.</p>
   1167     * @exception IOException thrown when data reading fails
   1168     */
   1169     private UCharacterName() throws IOException
   1170     {
   1171         ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_);
   1172         UCharacterNameReader reader = new UCharacterNameReader(b);
   1173         reader.read(this);
   1174     }
   1175 
   1176     // private methods ---------------------------------------------------
   1177 
   1178     /**
   1179     * Gets the algorithmic name for the argument character
   1180     * @param ch character to determine name for
   1181     * @param choice name choice
   1182     * @return the algorithmic name or null if not found
   1183     */
   1184     private String getAlgName(int ch, int choice)
   1185     {
   1186         /* Only the normative character name can be algorithmic. */
   1187         if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME ||
   1188             choice == UCharacterNameChoice.EXTENDED_CHAR_NAME
   1189         ) {
   1190             // index in terms integer index
   1191             synchronized (m_utilStringBuffer_) {
   1192                 m_utilStringBuffer_.setLength(0);
   1193 
   1194                 for (int index = m_algorithm_.length - 1; index >= 0; index --)
   1195                 {
   1196                    if (m_algorithm_[index].contains(ch)) {
   1197                       m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
   1198                       return m_utilStringBuffer_.toString();
   1199                    }
   1200                 }
   1201             }
   1202         }
   1203         return null;
   1204     }
   1205 
   1206     /**
   1207     * Getting the character with the tokenized argument name
   1208     * @param name of the character
   1209     * @return character with the tokenized argument name or -1 if character
   1210     *         is not found
   1211     */
   1212     private synchronized int getGroupChar(String name, int choice)
   1213     {
   1214         for (int i = 0; i < m_groupcount_; i ++) {
   1215             // populating the data set of grouptable
   1216 
   1217             int startgpstrindex = getGroupLengths(i, m_groupoffsets_,
   1218                                                   m_grouplengths_);
   1219 
   1220             // shift out to function
   1221             int result = getGroupChar(startgpstrindex, m_grouplengths_, name,
   1222                                       choice);
   1223             if (result != -1) {
   1224                 return (m_groupinfo_[i * m_groupsize_] << GROUP_SHIFT_)
   1225                          | result;
   1226             }
   1227         }
   1228         return -1;
   1229     }
   1230 
   1231     /**
   1232     * Compares and retrieve character if name is found within the argument
   1233     * group
   1234     * @param index index where the set of names reside in the group block
   1235     * @param length list of lengths of the strings
   1236     * @param name character name to search for
   1237     * @param choice of either 1.0 or the most current unicode name
   1238     * @return relative character in the group which matches name, otherwise if
   1239     *         not found, -1 will be returned
   1240     */
   1241     private int getGroupChar(int index, char length[], String name,
   1242                              int choice)
   1243     {
   1244         byte b = 0;
   1245         char token;
   1246         int len;
   1247         int namelen = name.length();
   1248         int nindex;
   1249         int count;
   1250 
   1251         for (int result = 0; result <= LINES_PER_GROUP_; result ++) {
   1252             nindex = 0;
   1253             len = length[result];
   1254 
   1255             if (choice != UCharacterNameChoice.UNICODE_CHAR_NAME &&
   1256                 choice != UCharacterNameChoice.EXTENDED_CHAR_NAME
   1257             ) {
   1258                 /*
   1259                  * skip the modern name if it is not requested _and_
   1260                  * if the semicolon byte value is a character, not a token number
   1261                  */
   1262                 int fieldIndex= choice==UCharacterNameChoice.ISO_COMMENT_ ? 2 : choice;
   1263                 do {
   1264                     int oldindex = index;
   1265                     index += UCharacterUtility.skipByteSubString(m_groupstring_,
   1266                                                          index, len, (byte)';');
   1267                     len -= (index - oldindex);
   1268                 } while(--fieldIndex>0);
   1269             }
   1270 
   1271             // number of tokens is > the length of the name
   1272             // write each letter directly, and write a token word per token
   1273             for (count = 0; count < len && nindex != -1 && nindex < namelen;
   1274                 ) {
   1275                 b = m_groupstring_[index + count];
   1276                 count ++;
   1277 
   1278                 if (b >= m_tokentable_.length) {
   1279                     if (name.charAt(nindex ++) != (b & 0xFF)) {
   1280                         nindex = -1;
   1281                     }
   1282                 }
   1283                 else {
   1284                     token = m_tokentable_[b & 0xFF];
   1285                     if (token == 0xFFFE) {
   1286                         // this is a lead byte for a double-byte token
   1287                         token = m_tokentable_[b << 8 |
   1288                                    (m_groupstring_[index + count] & 0x00ff)];
   1289                         count ++;
   1290                     }
   1291                     if (token == 0xFFFF) {
   1292                         if (name.charAt(nindex ++) != (b & 0xFF)) {
   1293                             nindex = -1;
   1294                         }
   1295                     }
   1296                     else {
   1297                         // compare token with name
   1298                         nindex = UCharacterUtility.compareNullTermByteSubString(
   1299                                         name, m_tokenstring_, nindex, token);
   1300                     }
   1301                 }
   1302             }
   1303 
   1304             if (namelen == nindex &&
   1305                 (count == len || m_groupstring_[index + count] == ';')) {
   1306                 return result;
   1307             }
   1308 
   1309             index += len;
   1310         }
   1311         return -1;
   1312     }
   1313 
   1314     /**
   1315     * Gets the character extended type
   1316     * @param ch character to be tested
   1317     * @return extended type it is associated with
   1318     */
   1319     private static int getType(int ch)
   1320     {
   1321         if (UCharacterUtility.isNonCharacter(ch)) {
   1322             // not a character we return a invalid category count
   1323             return NON_CHARACTER_;
   1324         }
   1325         int result = UCharacter.getType(ch);
   1326         if (result == UCharacterCategory.SURROGATE) {
   1327             if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
   1328                 result = LEAD_SURROGATE_;
   1329             }
   1330             else {
   1331                 result = TRAIL_SURROGATE_;
   1332             }
   1333         }
   1334         return result;
   1335     }
   1336 
   1337     /**
   1338     * Getting the character with extended name of the form <....>.
   1339     * @param name of the character to be found
   1340     * @param choice name choice
   1341     * @return character associated with the name, -1 if such character is not
   1342     *                   found and -2 if we should continue with the search.
   1343     */
   1344     private static int getExtendedChar(String name, int choice)
   1345     {
   1346         if (name.charAt(0) == '<') {
   1347             if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
   1348                 int endIndex = name.length() - 1;
   1349                 if (name.charAt(endIndex) == '>') {
   1350                     int startIndex = name.lastIndexOf('-');
   1351                     if (startIndex >= 0) { // We've got a category.
   1352                         startIndex ++;
   1353                         int result = -1;
   1354                         try {
   1355                             result = Integer.parseInt(
   1356                                         name.substring(startIndex, endIndex),
   1357                                         16);
   1358                         }
   1359                         catch (NumberFormatException e) {
   1360                             return -1;
   1361                         }
   1362                         // Now validate the category name. We could use a
   1363                         // binary search, or a trie, if we really wanted to.
   1364                         String type = name.substring(1, startIndex - 1);
   1365                         int length = TYPE_NAMES_.length;
   1366                         for (int i = 0; i < length; ++ i) {
   1367                             if (type.compareTo(TYPE_NAMES_[i]) == 0) {
   1368                                 if (getType(result) == i) {
   1369                                     return result;
   1370                                 }
   1371                                 break;
   1372                             }
   1373                         }
   1374                     }
   1375                 }
   1376             }
   1377             return -1;
   1378         }
   1379         return -2;
   1380     }
   1381 
   1382     // sets of name characters, maximum name lengths -----------------------
   1383 
   1384     /**
   1385      * Adds a codepoint into a set of ints.
   1386      * Equivalent to SET_ADD.
   1387      * @param set set to add to
   1388      * @param ch 16 bit char to add
   1389      */
   1390     private static void add(int set[], char ch)
   1391     {
   1392         set[ch >>> 5] |= 1 << (ch & 0x1f);
   1393     }
   1394 
   1395     /**
   1396      * Checks if a codepoint is a part of a set of ints.
   1397      * Equivalent to SET_CONTAINS.
   1398      * @param set set to check in
   1399      * @param ch 16 bit char to check
   1400      * @return true if codepoint is part of the set, false otherwise
   1401      */
   1402     private static boolean contains(int set[], char ch)
   1403     {
   1404         return (set[ch >>> 5] & (1 << (ch & 0x1f))) != 0;
   1405     }
   1406 
   1407     /**
   1408      * Adds all characters of the argument str and gets the length
   1409      * Equivalent to calcStringSetLength.
   1410      * @param set set to add all chars of str to
   1411      * @param str string to add
   1412      */
   1413     private static int add(int set[], String str)
   1414     {
   1415         int result = str.length();
   1416 
   1417         for (int i = result - 1; i >= 0; i --) {
   1418             add(set, str.charAt(i));
   1419         }
   1420         return result;
   1421     }
   1422 
   1423     /**
   1424      * Adds all characters of the argument str and gets the length
   1425      * Equivalent to calcStringSetLength.
   1426      * @param set set to add all chars of str to
   1427      * @param str string to add
   1428      */
   1429     private static int add(int set[], StringBuffer str)
   1430     {
   1431         int result = str.length();
   1432 
   1433         for (int i = result - 1; i >= 0; i --) {
   1434             add(set, str.charAt(i));
   1435         }
   1436         return result;
   1437     }
   1438 
   1439     /**
   1440      * Adds all algorithmic names into the name set.
   1441      * Equivalent to part of calcAlgNameSetsLengths.
   1442      * @param maxlength length to compare to
   1443      * @return the maximum length of any possible algorithmic name if it is >
   1444      *         maxlength, otherwise maxlength is returned.
   1445      */
   1446     private int addAlgorithmName(int maxlength)
   1447     {
   1448         int result = 0;
   1449         for (int i = m_algorithm_.length - 1; i >= 0; i --) {
   1450             result = m_algorithm_[i].add(m_nameSet_, maxlength);
   1451             if (result > maxlength) {
   1452                 maxlength = result;
   1453             }
   1454         }
   1455         return maxlength;
   1456     }
   1457 
   1458     /**
   1459      * Adds all extended names into the name set.
   1460      * Equivalent to part of calcExtNameSetsLengths.
   1461      * @param maxlength length to compare to
   1462      * @return the maxlength of any possible extended name.
   1463      */
   1464     private int addExtendedName(int maxlength)
   1465     {
   1466         for (int i = TYPE_NAMES_.length - 1; i >= 0; i --) {
   1467             // for each category, count the length of the category name
   1468             // plus 9 =
   1469             // 2 for <>
   1470             // 1 for -
   1471             // 6 for most hex digits per code point
   1472             int length = 9 + add(m_nameSet_, TYPE_NAMES_[i]);
   1473             if (length > maxlength) {
   1474                 maxlength = length;
   1475             }
   1476         }
   1477         return maxlength;
   1478     }
   1479 
   1480     /**
   1481      * Adds names of a group to the argument set.
   1482      * Equivalent to calcNameSetLength.
   1483      * @param offset of the group name string in byte count
   1484      * @param length of the group name string
   1485      * @param tokenlength array to store the length of each token
   1486      * @param set to add to
   1487      * @return the length of the name string and the length of the group
   1488      *         string parsed
   1489      */
   1490     private int[] addGroupName(int offset, int length, byte tokenlength[],
   1491                                int set[])
   1492     {
   1493         int resultnlength = 0;
   1494         int resultplength = 0;
   1495         while (resultplength < length) {
   1496             char b = (char)(m_groupstring_[offset + resultplength] & 0xff);
   1497             resultplength ++;
   1498             if (b == ';') {
   1499                 break;
   1500             }
   1501 
   1502             if (b >= m_tokentable_.length) {
   1503                 add(set, b); // implicit letter
   1504                 resultnlength ++;
   1505             }
   1506             else {
   1507                 char token = m_tokentable_[b & 0x00ff];
   1508                 if (token == 0xFFFE) {
   1509                     // this is a lead byte for a double-byte token
   1510                     b = (char)(b << 8 | (m_groupstring_[offset + resultplength]
   1511                                          & 0x00ff));
   1512                     token = m_tokentable_[b];
   1513                     resultplength ++;
   1514                 }
   1515                 if (token == 0xFFFF) {
   1516                     add(set, b);
   1517                     resultnlength ++;
   1518                 }
   1519                 else {
   1520                     // count token word
   1521                     // use cached token length
   1522                     byte tlength = tokenlength[b];
   1523                     if (tlength == 0) {
   1524                         synchronized (m_utilStringBuffer_) {
   1525                             m_utilStringBuffer_.setLength(0);
   1526                             UCharacterUtility.getNullTermByteSubString(
   1527                                            m_utilStringBuffer_, m_tokenstring_,
   1528                                            token);
   1529                             tlength = (byte)add(set, m_utilStringBuffer_);
   1530                         }
   1531                         tokenlength[b] = tlength;
   1532                     }
   1533                     resultnlength += tlength;
   1534                 }
   1535             }
   1536         }
   1537         m_utilIntBuffer_[0] = resultnlength;
   1538         m_utilIntBuffer_[1] = resultplength;
   1539         return m_utilIntBuffer_;
   1540     }
   1541 
   1542     /**
   1543      * Adds names of all group to the argument set.
   1544      * Sets the data member m_max*Length_.
   1545      * Method called only once.
   1546      * Equivalent to calcGroupNameSetsLength.
   1547      * @param maxlength length to compare to
   1548      */
   1549     private void addGroupName(int maxlength)
   1550     {
   1551         int maxisolength = 0;
   1552         char offsets[] = new char[LINES_PER_GROUP_ + 2];
   1553         char lengths[] = new char[LINES_PER_GROUP_ + 2];
   1554         byte tokenlengths[] = new byte[m_tokentable_.length];
   1555 
   1556         // enumerate all groups
   1557         // for (int i = m_groupcount_ - 1; i >= 0; i --) {
   1558         for (int i = 0; i < m_groupcount_ ; i ++) {
   1559             int offset = getGroupLengths(i, offsets, lengths);
   1560             // enumerate all lines in each group
   1561             // for (int linenumber = LINES_PER_GROUP_ - 1; linenumber >= 0;
   1562             //    linenumber --) {
   1563             for (int linenumber = 0; linenumber < LINES_PER_GROUP_;
   1564                 linenumber ++) {
   1565                 int lineoffset = offset + offsets[linenumber];
   1566                 int length = lengths[linenumber];
   1567                 if (length == 0) {
   1568                     continue;
   1569                 }
   1570 
   1571                 // read regular name
   1572                 int parsed[] = addGroupName(lineoffset, length, tokenlengths,
   1573                                             m_nameSet_);
   1574                 if (parsed[0] > maxlength) {
   1575                     // 0 for name length
   1576                     maxlength = parsed[0];
   1577                 }
   1578                 lineoffset += parsed[1];
   1579                 if (parsed[1] >= length) {
   1580                     // 1 for parsed group string length
   1581                     continue;
   1582                 }
   1583                 length -= parsed[1];
   1584                 // read Unicode 1.0 name
   1585                 parsed = addGroupName(lineoffset, length, tokenlengths,
   1586                                       m_nameSet_);
   1587                 if (parsed[0] > maxlength) {
   1588                     // 0 for name length
   1589                     maxlength = parsed[0];
   1590                 }
   1591                 lineoffset += parsed[1];
   1592                 if (parsed[1] >= length) {
   1593                     // 1 for parsed group string length
   1594                     continue;
   1595                 }
   1596                 length -= parsed[1];
   1597                 // read ISO comment
   1598                 parsed = addGroupName(lineoffset, length, tokenlengths,
   1599                                       m_ISOCommentSet_);
   1600                 if (parsed[1] > maxisolength) {
   1601                     maxisolength = length;
   1602                 }
   1603             }
   1604         }
   1605 
   1606         // set gMax... - name length last for threading
   1607         m_maxISOCommentLength_ = maxisolength;
   1608         m_maxNameLength_ = maxlength;
   1609     }
   1610 
   1611     /**
   1612      * Sets up the name sets and the calculation of the maximum lengths.
   1613      * Equivalent to calcNameSetsLengths.
   1614      */
   1615     private boolean initNameSetsLengths()
   1616     {
   1617         if (m_maxNameLength_ > 0) {
   1618             return true;
   1619         }
   1620 
   1621         String extra = "0123456789ABCDEF<>-";
   1622         // set hex digits, used in various names, and <>-, used in extended
   1623         // names
   1624         for (int i = extra.length() - 1; i >= 0; i --) {
   1625             add(m_nameSet_, extra.charAt(i));
   1626         }
   1627 
   1628         // set sets and lengths from algorithmic names
   1629         m_maxNameLength_ = addAlgorithmName(0);
   1630         // set sets and lengths from extended names
   1631         m_maxNameLength_ = addExtendedName(m_maxNameLength_);
   1632         // set sets and lengths from group names, set global maximum values
   1633         addGroupName(m_maxNameLength_);
   1634         return true;
   1635     }
   1636 
   1637     /**
   1638      * Converts the char set cset into a Unicode set uset.
   1639      * Equivalent to charSetToUSet.
   1640      * @param set Set of 256 bit flags corresponding to a set of chars.
   1641      * @param uset USet to receive characters. Existing contents are deleted.
   1642      */
   1643     private void convert(int set[], UnicodeSet uset)
   1644     {
   1645         uset.clear();
   1646         if (!initNameSetsLengths()) {
   1647             return;
   1648         }
   1649 
   1650         // build a char string with all chars that are used in character names
   1651         for (char c = 255; c > 0; c --) {
   1652             if (contains(set, c)) {
   1653                 uset.add(c);
   1654             }
   1655         }
   1656     }
   1657 }
   1658