Home | History | Annotate | Download | only in coll
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 *******************************************************************************
      6 * Copyright (C) 2013-2015, International Business Machines
      7 * Corporation and others.  All Rights Reserved.
      8 *******************************************************************************
      9 * CollationFastLatin.java, ported from collationfastlatin.h/.cpp
     10 *
     11 * C++ version created on: 2013aug09
     12 * created by: Markus W. Scherer
     13 */
     14 
     15 package android.icu.impl.coll;
     16 
     17 import android.icu.lang.UScript;
     18 import android.icu.text.Collator;
     19 
     20 /**
     21  * @hide Only a subset of ICU is exposed in Android
     22  */
     23 public final class CollationFastLatin /* all static */ {
     24     /**
     25      * Fast Latin format version (one byte 1..FF).
     26      * Must be incremented for any runtime-incompatible changes,
     27      * in particular, for changes to any of the following constants.
     28      *
     29      * When the major version number of the main data format changes,
     30      * we can reset this fast Latin version to 1.
     31      */
     32     public static final int VERSION = 2;
     33 
     34     public static final int LATIN_MAX = 0x17f;
     35     public static final int LATIN_LIMIT = LATIN_MAX + 1;
     36 
     37     static final int LATIN_MAX_UTF8_LEAD = 0xc5;  // UTF-8 lead byte of LATIN_MAX
     38 
     39     static final int PUNCT_START = 0x2000;
     40     static final int PUNCT_LIMIT = 0x2040;
     41 
     42     // excludes U+FFFE & U+FFFF
     43     static final int NUM_FAST_CHARS = LATIN_LIMIT + (PUNCT_LIMIT - PUNCT_START);
     44 
     45     // Note on the supported weight ranges:
     46     // Analysis of UCA 6.3 and CLDR 23 non-search tailorings shows that
     47     // the CEs for characters in the above ranges, excluding expansions with length >2,
     48     // excluding contractions of >2 characters, and other restrictions
     49     // (see the builder's getCEsFromCE32()),
     50     // use at most about 150 primary weights,
     51     // where about 94 primary weights are possibly-variable (space/punct/symbol/currency),
     52     // at most 4 secondary before-common weights,
     53     // at most 4 secondary after-common weights,
     54     // at most 16 secondary high weights (in secondary CEs), and
     55     // at most 4 tertiary after-common weights.
     56     // The following ranges are designed to support slightly more weights than that.
     57     // (en_US_POSIX is unusual: It creates about 64 variable + 116 Latin primaries.)
     58 
     59     // Digits may use long primaries (preserving more short ones)
     60     // or short primaries (faster) without changing this data structure.
     61     // (If we supported numeric collation, then digits would have to have long primaries
     62     // so that special handling does not affect the fast path.)
     63 
     64     static final int SHORT_PRIMARY_MASK = 0xfc00;  // bits 15..10
     65     static final int INDEX_MASK = 0x3ff;  // bits 9..0 for expansions & contractions
     66     static final int SECONDARY_MASK = 0x3e0;  // bits 9..5
     67     static final int CASE_MASK = 0x18;  // bits 4..3
     68     static final int LONG_PRIMARY_MASK = 0xfff8;  // bits 15..3
     69     static final int TERTIARY_MASK = 7;  // bits 2..0
     70     static final int CASE_AND_TERTIARY_MASK = CASE_MASK | TERTIARY_MASK;
     71 
     72     static final int TWO_SHORT_PRIMARIES_MASK =
     73             (SHORT_PRIMARY_MASK << 16) | SHORT_PRIMARY_MASK;  // 0xfc00fc00
     74     static final int TWO_LONG_PRIMARIES_MASK =
     75             (LONG_PRIMARY_MASK << 16) | LONG_PRIMARY_MASK;  // 0xfff8fff8
     76     static final int TWO_SECONDARIES_MASK =
     77             (SECONDARY_MASK << 16) | SECONDARY_MASK;  // 0x3e003e0
     78     static final int TWO_CASES_MASK =
     79             (CASE_MASK << 16) | CASE_MASK;  // 0x180018
     80     static final int TWO_TERTIARIES_MASK =
     81             (TERTIARY_MASK << 16) | TERTIARY_MASK;  // 0x70007
     82 
     83     /**
     84      * Contraction with one fast Latin character.
     85      * Use INDEX_MASK to find the start of the contraction list after the fixed table.
     86      * The first entry contains the default mapping.
     87      * Otherwise use CONTR_CHAR_MASK for the contraction character index
     88      * (in ascending order).
     89      * Use CONTR_LENGTH_SHIFT for the length of the entry
     90      * (1=BAIL_OUT, 2=one CE, 3=two CEs).
     91      *
     92      * Also, U+0000 maps to a contraction entry, so that the fast path need not
     93      * check for NUL termination.
     94      * It usually maps to a contraction list with only the completely ignorable default value.
     95      */
     96     static final int CONTRACTION = 0x400;
     97     /**
     98      * An expansion encodes two CEs.
     99      * Use INDEX_MASK to find the pair of CEs after the fixed table.
    100      *
    101      * The higher a mini CE value, the easier it is to process.
    102      * For expansions and higher, no context needs to be considered.
    103      */
    104     static final int EXPANSION = 0x800;
    105     /**
    106      * Encodes one CE with a long/low mini primary (there are 128).
    107      * All potentially-variable primaries must be in this range,
    108      * to make the short-primary path as fast as possible.
    109      */
    110     static final int MIN_LONG = 0xc00;
    111     static final int LONG_INC = 8;
    112     static final int MAX_LONG = 0xff8;
    113     /**
    114      * Encodes one CE with a short/high primary (there are 60),
    115      * plus a secondary CE if the secondary weight is high.
    116      * Fast handling: At least all letter primaries should be in this range.
    117      */
    118     static final int MIN_SHORT = 0x1000;
    119     static final int SHORT_INC = 0x400;
    120     /** The highest primary weight is reserved for U+FFFF. */
    121     static final int MAX_SHORT = SHORT_PRIMARY_MASK;
    122 
    123     static final int MIN_SEC_BEFORE = 0;  // must add SEC_OFFSET
    124     static final int SEC_INC = 0x20;
    125     static final int MAX_SEC_BEFORE = MIN_SEC_BEFORE + 4 * SEC_INC;  // 5 before common
    126     static final int COMMON_SEC = MAX_SEC_BEFORE + SEC_INC;
    127     static final int MIN_SEC_AFTER = COMMON_SEC + SEC_INC;
    128     static final int MAX_SEC_AFTER = MIN_SEC_AFTER + 5 * SEC_INC;  // 6 after common
    129     static final int MIN_SEC_HIGH = MAX_SEC_AFTER + SEC_INC;  // 20 high secondaries
    130     static final int MAX_SEC_HIGH = SECONDARY_MASK;
    131 
    132     /**
    133      * Lookup: Add this offset to secondary weights, except for completely ignorable CEs.
    134      * Must be greater than any special value, e.g., MERGE_WEIGHT.
    135      * The exact value is not relevant for the format version.
    136      */
    137     static final int SEC_OFFSET = SEC_INC;
    138     static final int COMMON_SEC_PLUS_OFFSET = COMMON_SEC + SEC_OFFSET;
    139 
    140     static final int TWO_SEC_OFFSETS =
    141             (SEC_OFFSET << 16) | SEC_OFFSET;  // 0x200020
    142     static final int TWO_COMMON_SEC_PLUS_OFFSET =
    143             (COMMON_SEC_PLUS_OFFSET << 16) | COMMON_SEC_PLUS_OFFSET;
    144 
    145     static final int LOWER_CASE = 8;  // case bits include this offset
    146     static final int TWO_LOWER_CASES = (LOWER_CASE << 16) | LOWER_CASE;  // 0x80008
    147 
    148     static final int COMMON_TER = 0;  // must add TER_OFFSET
    149     static final int MAX_TER_AFTER = 7;  // 7 after common
    150 
    151     /**
    152      * Lookup: Add this offset to tertiary weights, except for completely ignorable CEs.
    153      * Must be greater than any special value, e.g., MERGE_WEIGHT.
    154      * Must be greater than case bits as well, so that with combined case+tertiary weights
    155      * plus the offset the tertiary bits does not spill over into the case bits.
    156      * The exact value is not relevant for the format version.
    157      */
    158     static final int TER_OFFSET = SEC_OFFSET;
    159     static final int COMMON_TER_PLUS_OFFSET = COMMON_TER + TER_OFFSET;
    160 
    161     static final int TWO_TER_OFFSETS = (TER_OFFSET << 16) | TER_OFFSET;
    162     static final int TWO_COMMON_TER_PLUS_OFFSET =
    163             (COMMON_TER_PLUS_OFFSET << 16) | COMMON_TER_PLUS_OFFSET;
    164 
    165     static final int MERGE_WEIGHT = 3;
    166     static final int EOS = 2;  // end of string
    167     static final int BAIL_OUT = 1;
    168 
    169     /**
    170      * Contraction result first word bits 8..0 contain the
    171      * second contraction character, as a char index 0..NUM_FAST_CHARS-1.
    172      * Each contraction list is terminated with a word containing CONTR_CHAR_MASK.
    173      */
    174     static final int CONTR_CHAR_MASK = 0x1ff;
    175     /**
    176      * Contraction result first word bits 10..9 contain the result length:
    177      * 1=bail out, 2=one mini CE, 3=two mini CEs
    178      */
    179     static final int CONTR_LENGTH_SHIFT = 9;
    180 
    181     /**
    182      * Comparison return value when the regular comparison must be used.
    183      * The exact value is not relevant for the format version.
    184      */
    185     public static final int BAIL_OUT_RESULT = -2;
    186 
    187     static int getCharIndex(char c) {
    188         if(c <= LATIN_MAX) {
    189             return c;
    190         } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
    191             return c - (PUNCT_START - LATIN_LIMIT);
    192         } else {
    193             // Not a fast Latin character.
    194             // Note: U+FFFE & U+FFFF are forbidden in tailorings
    195             // and thus do not occur in any contractions.
    196             return -1;
    197         }
    198     }
    199 
    200     /**
    201      * Computes the options value for the compare functions
    202      * and writes the precomputed primary weights.
    203      * Returns -1 if the Latin fastpath is not supported for the data and settings.
    204      * The capacity must be LATIN_LIMIT.
    205      */
    206     public static int getOptions(CollationData data, CollationSettings settings,
    207             char[] primaries) {
    208         char[] header = data.fastLatinTableHeader;
    209         if(header == null) { return -1; }
    210         assert((header[0] >> 8) == VERSION);
    211         if(primaries.length != LATIN_LIMIT) {
    212             assert false;
    213             return -1;
    214         }
    215 
    216         int miniVarTop;
    217         if((settings.options & CollationSettings.ALTERNATE_MASK) == 0) {
    218             // No mini primaries are variable, set a variableTop just below the
    219             // lowest long mini primary.
    220             miniVarTop = MIN_LONG - 1;
    221         } else {
    222             int headerLength = header[0] & 0xff;
    223             int i = 1 + settings.getMaxVariable();
    224             if(i >= headerLength) {
    225                 return -1;  // variableTop >= digits, should not occur
    226             }
    227             miniVarTop = header[i];
    228         }
    229 
    230         boolean digitsAreReordered = false;
    231         if(settings.hasReordering()) {
    232             long prevStart = 0;
    233             long beforeDigitStart = 0;
    234             long digitStart = 0;
    235             long afterDigitStart = 0;
    236             for(int group = Collator.ReorderCodes.FIRST;
    237                     group < Collator.ReorderCodes.FIRST + CollationData.MAX_NUM_SPECIAL_REORDER_CODES;
    238                     ++group) {
    239                 long start = data.getFirstPrimaryForGroup(group);
    240                 start = settings.reorder(start);
    241                 if(group == Collator.ReorderCodes.DIGIT) {
    242                     beforeDigitStart = prevStart;
    243                     digitStart = start;
    244                 } else if(start != 0) {
    245                     if(start < prevStart) {
    246                         // The permutation affects the groups up to Latin.
    247                         return -1;
    248                     }
    249                     // In the future, there might be a special group between digits & Latin.
    250                     if(digitStart != 0 && afterDigitStart == 0 && prevStart == beforeDigitStart) {
    251                         afterDigitStart = start;
    252                     }
    253                     prevStart = start;
    254                 }
    255             }
    256             long latinStart = data.getFirstPrimaryForGroup(UScript.LATIN);
    257             latinStart = settings.reorder(latinStart);
    258             if(latinStart < prevStart) {
    259                 return -1;
    260             }
    261             if(afterDigitStart == 0) {
    262                 afterDigitStart = latinStart;
    263             }
    264             if(!(beforeDigitStart < digitStart && digitStart < afterDigitStart)) {
    265                 digitsAreReordered = true;
    266             }
    267         }
    268 
    269         char[] table = data.fastLatinTable;  // skip the header
    270         for(int c = 0; c < LATIN_LIMIT; ++c) {
    271             int p = table[c];
    272             if(p >= MIN_SHORT) {
    273                 p &= SHORT_PRIMARY_MASK;
    274             } else if(p > miniVarTop) {
    275                 p &= LONG_PRIMARY_MASK;
    276             } else {
    277                 p = 0;
    278             }
    279             primaries[c] = (char)p;
    280         }
    281         if(digitsAreReordered || (settings.options & CollationSettings.NUMERIC) != 0) {
    282             // Bail out for digits.
    283             for(int c = 0x30; c <= 0x39; ++c) { primaries[c] = 0; }
    284         }
    285 
    286         // Shift the miniVarTop above other options.
    287         return (miniVarTop << 16) | settings.options;
    288     }
    289 
    290     public static int compareUTF16(char[] table, char[] primaries, int options,
    291             CharSequence left, CharSequence right, int startIndex) {
    292         // This is a modified copy of CollationCompare.compareUpToQuaternary(),
    293         // optimized for common Latin text.
    294         // Keep them in sync!
    295 
    296         int variableTop = options >> 16;  // see getOptions()
    297         options &= 0xffff;  // needed for CollationSettings.getStrength() to work
    298 
    299         // Check for supported characters, fetch mini CEs, and compare primaries.
    300         int leftIndex = startIndex, rightIndex = startIndex;
    301         /**
    302          * Single mini CE or a pair.
    303          * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits.
    304          * If there is only one, then it is in the lower bits, and the upper bits are 0.
    305          */
    306         int leftPair = 0, rightPair = 0;
    307         for(;;) {
    308             // We fetch CEs until we get a non-ignorable primary or reach the end.
    309             while(leftPair == 0) {
    310                 if(leftIndex == left.length()) {
    311                     leftPair = EOS;
    312                     break;
    313                 }
    314                 int c = left.charAt(leftIndex++);
    315                 if(c <= LATIN_MAX) {
    316                     leftPair = primaries[c];
    317                     if(leftPair != 0) { break; }
    318                     if(c <= 0x39 && c >= 0x30 && (options & CollationSettings.NUMERIC) != 0) {
    319                         return BAIL_OUT_RESULT;
    320                     }
    321                     leftPair = table[c];
    322                 } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
    323                     leftPair = table[c - PUNCT_START + LATIN_LIMIT];
    324                 } else {
    325                     leftPair = lookup(table, c);
    326                 }
    327                 if(leftPair >= MIN_SHORT) {
    328                     leftPair &= SHORT_PRIMARY_MASK;
    329                     break;
    330                 } else if(leftPair > variableTop) {
    331                     leftPair &= LONG_PRIMARY_MASK;
    332                     break;
    333                 } else {
    334                     long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
    335                     if(pairAndInc < 0) {
    336                         ++leftIndex;
    337                         pairAndInc = ~pairAndInc;
    338                     }
    339                     leftPair = (int)pairAndInc;
    340                     if(leftPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
    341                     leftPair = getPrimaries(variableTop, leftPair);
    342                 }
    343             }
    344 
    345             while(rightPair == 0) {
    346                 if(rightIndex == right.length()) {
    347                     rightPair = EOS;
    348                     break;
    349                 }
    350                 int c = right.charAt(rightIndex++);
    351                 if(c <= LATIN_MAX) {
    352                     rightPair = primaries[c];
    353                     if(rightPair != 0) { break; }
    354                     if(c <= 0x39 && c >= 0x30 && (options & CollationSettings.NUMERIC) != 0) {
    355                         return BAIL_OUT_RESULT;
    356                     }
    357                     rightPair = table[c];
    358                 } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
    359                     rightPair = table[c - PUNCT_START + LATIN_LIMIT];
    360                 } else {
    361                     rightPair = lookup(table, c);
    362                 }
    363                 if(rightPair >= MIN_SHORT) {
    364                     rightPair &= SHORT_PRIMARY_MASK;
    365                     break;
    366                 } else if(rightPair > variableTop) {
    367                     rightPair &= LONG_PRIMARY_MASK;
    368                     break;
    369                 } else {
    370                     long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
    371                     if(pairAndInc < 0) {
    372                         ++rightIndex;
    373                         pairAndInc = ~pairAndInc;
    374                     }
    375                     rightPair = (int)pairAndInc;
    376                     if(rightPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
    377                     rightPair = getPrimaries(variableTop, rightPair);
    378                 }
    379             }
    380 
    381             if(leftPair == rightPair) {
    382                 if(leftPair == EOS) { break; }
    383                 leftPair = rightPair = 0;
    384                 continue;
    385             }
    386             int leftPrimary = leftPair & 0xffff;
    387             int rightPrimary = rightPair & 0xffff;
    388             if(leftPrimary != rightPrimary) {
    389                 // Return the primary difference.
    390                 return (leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER;
    391             }
    392             if(leftPair == EOS) { break; }
    393             leftPair >>>= 16;
    394             rightPair >>>= 16;
    395         }
    396         // In the following, we need to re-fetch each character because we did not buffer the CEs,
    397         // but we know that the string is well-formed and
    398         // only contains supported characters and mappings.
    399 
    400         // We might skip the secondary level but continue with the case level
    401         // which is turned on separately.
    402         if(CollationSettings.getStrength(options) >= Collator.SECONDARY) {
    403             leftIndex = rightIndex = startIndex;
    404             leftPair = rightPair = 0;
    405             for(;;) {
    406                 while(leftPair == 0) {
    407                     if(leftIndex == left.length()) {
    408                         leftPair = EOS;
    409                         break;
    410                     }
    411                     int c = left.charAt(leftIndex++);
    412                     if(c <= LATIN_MAX) {
    413                         leftPair = table[c];
    414                     } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
    415                         leftPair = table[c - PUNCT_START + LATIN_LIMIT];
    416                     } else {
    417                         leftPair = lookup(table, c);
    418                     }
    419                     if(leftPair >= MIN_SHORT) {
    420                         leftPair = getSecondariesFromOneShortCE(leftPair);
    421                         break;
    422                     } else if(leftPair > variableTop) {
    423                         leftPair = COMMON_SEC_PLUS_OFFSET;
    424                         break;
    425                     } else {
    426                         long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
    427                         if(pairAndInc < 0) {
    428                             ++leftIndex;
    429                             pairAndInc = ~pairAndInc;
    430                         }
    431                         leftPair = getSecondaries(variableTop, (int)pairAndInc);
    432                     }
    433                 }
    434 
    435                 while(rightPair == 0) {
    436                     if(rightIndex == right.length()) {
    437                         rightPair = EOS;
    438                         break;
    439                     }
    440                     int c = right.charAt(rightIndex++);
    441                     if(c <= LATIN_MAX) {
    442                         rightPair = table[c];
    443                     } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
    444                         rightPair = table[c - PUNCT_START + LATIN_LIMIT];
    445                     } else {
    446                         rightPair = lookup(table, c);
    447                     }
    448                     if(rightPair >= MIN_SHORT) {
    449                         rightPair = getSecondariesFromOneShortCE(rightPair);
    450                         break;
    451                     } else if(rightPair > variableTop) {
    452                         rightPair = COMMON_SEC_PLUS_OFFSET;
    453                         break;
    454                     } else {
    455                         long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
    456                         if(pairAndInc < 0) {
    457                             ++rightIndex;
    458                             pairAndInc = ~pairAndInc;
    459                         }
    460                         rightPair = getSecondaries(variableTop, (int)pairAndInc);
    461                     }
    462                 }
    463 
    464                 if(leftPair == rightPair) {
    465                     if(leftPair == EOS) { break; }
    466                     leftPair = rightPair = 0;
    467                     continue;
    468                 }
    469                 int leftSecondary = leftPair & 0xffff;
    470                 int rightSecondary = rightPair & 0xffff;
    471                 if(leftSecondary != rightSecondary) {
    472                     if((options & CollationSettings.BACKWARD_SECONDARY) != 0) {
    473                         // Full support for backwards secondary requires backwards contraction matching
    474                         // and moving backwards between merge separators.
    475                         return BAIL_OUT_RESULT;
    476                     }
    477                     return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER;
    478                 }
    479                 if(leftPair == EOS) { break; }
    480                 leftPair >>>= 16;
    481                 rightPair >>>= 16;
    482             }
    483         }
    484 
    485         if((options & CollationSettings.CASE_LEVEL) != 0) {
    486             boolean strengthIsPrimary = CollationSettings.getStrength(options) == Collator.PRIMARY;
    487             leftIndex = rightIndex = startIndex;
    488             leftPair = rightPair = 0;
    489             for(;;) {
    490                 while(leftPair == 0) {
    491                     if(leftIndex == left.length()) {
    492                         leftPair = EOS;
    493                         break;
    494                     }
    495                     int c = left.charAt(leftIndex++);
    496                     leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
    497                     if(leftPair < MIN_LONG) {
    498                         long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
    499                         if(pairAndInc < 0) {
    500                             ++leftIndex;
    501                             pairAndInc = ~pairAndInc;
    502                         }
    503                         leftPair = (int)pairAndInc;
    504                     }
    505                     leftPair = getCases(variableTop, strengthIsPrimary, leftPair);
    506                 }
    507 
    508                 while(rightPair == 0) {
    509                     if(rightIndex == right.length()) {
    510                         rightPair = EOS;
    511                         break;
    512                     }
    513                     int c = right.charAt(rightIndex++);
    514                     rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
    515                     if(rightPair < MIN_LONG) {
    516                         long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
    517                         if(pairAndInc < 0) {
    518                             ++rightIndex;
    519                             pairAndInc = ~pairAndInc;
    520                         }
    521                         rightPair = (int)pairAndInc;
    522                     }
    523                     rightPair = getCases(variableTop, strengthIsPrimary, rightPair);
    524                 }
    525 
    526                 if(leftPair == rightPair) {
    527                     if(leftPair == EOS) { break; }
    528                     leftPair = rightPair = 0;
    529                     continue;
    530                 }
    531                 int leftCase = leftPair & 0xffff;
    532                 int rightCase = rightPair & 0xffff;
    533                 if(leftCase != rightCase) {
    534                     if((options & CollationSettings.UPPER_FIRST) == 0) {
    535                         return (leftCase < rightCase) ? Collation.LESS : Collation.GREATER;
    536                     } else {
    537                         return (leftCase < rightCase) ? Collation.GREATER : Collation.LESS;
    538                     }
    539                 }
    540                 if(leftPair == EOS) { break; }
    541                 leftPair >>>= 16;
    542                 rightPair >>>= 16;
    543             }
    544         }
    545         if(CollationSettings.getStrength(options) <= Collator.SECONDARY) { return Collation.EQUAL; }
    546 
    547         // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
    548         boolean withCaseBits = CollationSettings.isTertiaryWithCaseBits(options);
    549 
    550         leftIndex = rightIndex = startIndex;
    551         leftPair = rightPair = 0;
    552         for(;;) {
    553             while(leftPair == 0) {
    554                 if(leftIndex == left.length()) {
    555                     leftPair = EOS;
    556                     break;
    557                 }
    558                 int c = left.charAt(leftIndex++);
    559                 leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
    560                 if(leftPair < MIN_LONG) {
    561                     long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
    562                     if(pairAndInc < 0) {
    563                         ++leftIndex;
    564                         pairAndInc = ~pairAndInc;
    565                     }
    566                     leftPair = (int)pairAndInc;
    567                 }
    568                 leftPair = getTertiaries(variableTop, withCaseBits, leftPair);
    569             }
    570 
    571             while(rightPair == 0) {
    572                 if(rightIndex == right.length()) {
    573                     rightPair = EOS;
    574                     break;
    575                 }
    576                 int c = right.charAt(rightIndex++);
    577                 rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
    578                 if(rightPair < MIN_LONG) {
    579                     long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
    580                     if(pairAndInc < 0) {
    581                         ++rightIndex;
    582                         pairAndInc = ~pairAndInc;
    583                     }
    584                     rightPair = (int)pairAndInc;
    585                 }
    586                 rightPair = getTertiaries(variableTop, withCaseBits, rightPair);
    587             }
    588 
    589             if(leftPair == rightPair) {
    590                 if(leftPair == EOS) { break; }
    591                 leftPair = rightPair = 0;
    592                 continue;
    593             }
    594             int leftTertiary = leftPair & 0xffff;
    595             int rightTertiary = rightPair & 0xffff;
    596             if(leftTertiary != rightTertiary) {
    597                 if(CollationSettings.sortsTertiaryUpperCaseFirst(options)) {
    598                     // Pass through EOS and MERGE_WEIGHT
    599                     // and keep real tertiary weights larger than the MERGE_WEIGHT.
    600                     // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
    601                     if(leftTertiary > MERGE_WEIGHT) {
    602                         leftTertiary ^= CASE_MASK;
    603                     }
    604                     if(rightTertiary > MERGE_WEIGHT) {
    605                         rightTertiary ^= CASE_MASK;
    606                     }
    607                 }
    608                 return (leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER;
    609             }
    610             if(leftPair == EOS) { break; }
    611             leftPair >>>= 16;
    612             rightPair >>>= 16;
    613         }
    614         if(CollationSettings.getStrength(options) <= Collator.TERTIARY) { return Collation.EQUAL; }
    615 
    616         leftIndex = rightIndex = startIndex;
    617         leftPair = rightPair = 0;
    618         for(;;) {
    619             while(leftPair == 0) {
    620                 if(leftIndex == left.length()) {
    621                     leftPair = EOS;
    622                     break;
    623                 }
    624                 int c = left.charAt(leftIndex++);
    625                 leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
    626                 if(leftPair < MIN_LONG) {
    627                     long pairAndInc = nextPair(table, c, leftPair, left, leftIndex);
    628                     if(pairAndInc < 0) {
    629                         ++leftIndex;
    630                         pairAndInc = ~pairAndInc;
    631                     }
    632                     leftPair = (int)pairAndInc;
    633                 }
    634                 leftPair = getQuaternaries(variableTop, leftPair);
    635             }
    636 
    637             while(rightPair == 0) {
    638                 if(rightIndex == right.length()) {
    639                     rightPair = EOS;
    640                     break;
    641                 }
    642                 int c = right.charAt(rightIndex++);
    643                 rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
    644                 if(rightPair < MIN_LONG) {
    645                     long pairAndInc = nextPair(table, c, rightPair, right, rightIndex);
    646                     if(pairAndInc < 0) {
    647                         ++rightIndex;
    648                         pairAndInc = ~pairAndInc;
    649                     }
    650                     rightPair = (int)pairAndInc;
    651                 }
    652                 rightPair = getQuaternaries(variableTop, rightPair);
    653             }
    654 
    655             if(leftPair == rightPair) {
    656                 if(leftPair == EOS) { break; }
    657                 leftPair = rightPair = 0;
    658                 continue;
    659             }
    660             int leftQuaternary = leftPair & 0xffff;
    661             int rightQuaternary = rightPair & 0xffff;
    662             if(leftQuaternary != rightQuaternary) {
    663                 return (leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER;
    664             }
    665             if(leftPair == EOS) { break; }
    666             leftPair >>>= 16;
    667             rightPair >>>= 16;
    668         }
    669         return Collation.EQUAL;
    670     }
    671 
    672     private static int lookup(char[] table, int c) {
    673         assert(c > LATIN_MAX);
    674         if(PUNCT_START <= c && c < PUNCT_LIMIT) {
    675             return table[c - PUNCT_START + LATIN_LIMIT];
    676         } else if(c == 0xfffe) {
    677             return MERGE_WEIGHT;
    678         } else if(c == 0xffff) {
    679             return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER;
    680         } else {
    681             return BAIL_OUT;
    682         }
    683     }
    684 
    685     /**
    686      * Java returns a negative result (use the '~' operator) if sIndex is to be incremented.
    687      * C++ modifies sIndex.
    688      */
    689     private static long nextPair(char[] table, int c, int ce, CharSequence s16, int sIndex) {
    690         if(ce >= MIN_LONG || ce < CONTRACTION) {
    691             return ce;  // simple or special mini CE
    692         } else if(ce >= EXPANSION) {
    693             int index = NUM_FAST_CHARS + (ce & INDEX_MASK);
    694             return ((long)table[index + 1] << 16) | table[index];
    695         } else /* ce >= CONTRACTION */ {
    696             // Contraction list: Default mapping followed by
    697             // 0 or more single-character contraction suffix mappings.
    698             int index = NUM_FAST_CHARS + (ce & INDEX_MASK);
    699             boolean inc = false;  // true if the next char is consumed.
    700             if(sIndex != s16.length()) {
    701                 // Read the next character.
    702                 int c2;
    703                 int nextIndex = sIndex;
    704                 c2 = s16.charAt(nextIndex++);
    705                 if(c2 > LATIN_MAX) {
    706                     if(PUNCT_START <= c2 && c2 < PUNCT_LIMIT) {
    707                         c2 = c2 - PUNCT_START + LATIN_LIMIT;  // 2000..203F -> 0180..01BF
    708                     } else if(c2 == 0xfffe || c2 == 0xffff) {
    709                         c2 = -1;  // U+FFFE & U+FFFF cannot occur in contractions.
    710                     } else {
    711                         return BAIL_OUT;
    712                     }
    713                 }
    714                 // Look for the next character in the contraction suffix list,
    715                 // which is in ascending order of single suffix characters.
    716                 int i = index;
    717                 int head = table[i];  // first skip the default mapping
    718                 int x;
    719                 do {
    720                     i += head >> CONTR_LENGTH_SHIFT;
    721                     head = table[i];
    722                     x = head & CONTR_CHAR_MASK;
    723                 } while(x < c2);
    724                 if(x == c2) {
    725                     index = i;
    726                     inc = true;
    727                 }
    728             }
    729             // Return the CE or CEs for the default or contraction mapping.
    730             int length = table[index] >> CONTR_LENGTH_SHIFT;
    731             if(length == 1) {
    732                 return BAIL_OUT;
    733             }
    734             ce = table[index + 1];
    735             long result;
    736             if(length == 2) {
    737                 result = ce;
    738             } else {
    739                 result = ((long)table[index + 2] << 16) | ce;
    740             }
    741             return inc ? ~result : result;
    742         }
    743     }
    744 
    745     private static int getPrimaries(int variableTop, int pair) {
    746         int ce = pair & 0xffff;
    747         if(ce >= MIN_SHORT) { return pair & TWO_SHORT_PRIMARIES_MASK; }
    748         if(ce > variableTop) { return pair & TWO_LONG_PRIMARIES_MASK; }
    749         if(ce >= MIN_LONG) { return 0; }  // variable
    750         return pair;  // special mini CE
    751     }
    752 
    753     private static int getSecondariesFromOneShortCE(int ce) {
    754         ce &= SECONDARY_MASK;
    755         if(ce < MIN_SEC_HIGH) {
    756             return ce + SEC_OFFSET;
    757         } else {
    758             return ((ce + SEC_OFFSET) << 16) | COMMON_SEC_PLUS_OFFSET;
    759         }
    760     }
    761 
    762     private static int getSecondaries(int variableTop, int pair) {
    763         if(pair <= 0xffff) {
    764             // one mini CE
    765             if(pair >= MIN_SHORT) {
    766                 pair = getSecondariesFromOneShortCE(pair);
    767             } else if(pair > variableTop) {
    768                 pair = COMMON_SEC_PLUS_OFFSET;
    769             } else if(pair >= MIN_LONG) {
    770                 pair = 0;  // variable
    771             }
    772             // else special mini CE
    773         } else {
    774             int ce = pair & 0xffff;
    775             if(ce >= MIN_SHORT) {
    776                 pair = (pair & TWO_SECONDARIES_MASK) + TWO_SEC_OFFSETS;
    777             } else if(ce > variableTop) {
    778                 pair = TWO_COMMON_SEC_PLUS_OFFSET;
    779             } else {
    780                 assert(ce >= MIN_LONG);
    781                 pair = 0;  // variable
    782             }
    783         }
    784         return pair;
    785     }
    786 
    787     private static int getCases(int variableTop, boolean strengthIsPrimary, int pair) {
    788         // Primary+caseLevel: Ignore case level weights of primary ignorables.
    789         // Otherwise: Ignore case level weights of secondary ignorables.
    790         // For details see the comments in the CollationCompare class.
    791         // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
    792         if(pair <= 0xffff) {
    793             // one mini CE
    794             if(pair >= MIN_SHORT) {
    795                 // A high secondary weight means we really have two CEs,
    796                 // a primary CE and a secondary CE.
    797                 int ce = pair;
    798                 pair &= CASE_MASK;  // explicit weight of primary CE
    799                 if(!strengthIsPrimary && (ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
    800                     pair |= LOWER_CASE << 16;  // implied weight of secondary CE
    801                 }
    802             } else if(pair > variableTop) {
    803                 pair = LOWER_CASE;
    804             } else if(pair >= MIN_LONG) {
    805                 pair = 0;  // variable
    806             }
    807             // else special mini CE
    808         } else {
    809             // two mini CEs, same primary groups, neither expands like above
    810             int ce = pair & 0xffff;
    811             if(ce >= MIN_SHORT) {
    812                 if(strengthIsPrimary && (pair & (SHORT_PRIMARY_MASK << 16)) == 0) {
    813                     pair &= CASE_MASK;
    814                 } else {
    815                     pair &= TWO_CASES_MASK;
    816                 }
    817             } else if(ce > variableTop) {
    818                 pair = TWO_LOWER_CASES;
    819             } else {
    820                 assert(ce >= MIN_LONG);
    821                 pair = 0;  // variable
    822             }
    823         }
    824         return pair;
    825     }
    826 
    827     private static int getTertiaries(int variableTop, boolean withCaseBits, int pair) {
    828         if(pair <= 0xffff) {
    829             // one mini CE
    830             if(pair >= MIN_SHORT) {
    831                 // A high secondary weight means we really have two CEs,
    832                 // a primary CE and a secondary CE.
    833                 int ce = pair;
    834                 if(withCaseBits) {
    835                     pair = (pair & CASE_AND_TERTIARY_MASK) + TER_OFFSET;
    836                     if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
    837                         pair |= (LOWER_CASE | COMMON_TER_PLUS_OFFSET) << 16;
    838                     }
    839                 } else {
    840                     pair = (pair & TERTIARY_MASK) + TER_OFFSET;
    841                     if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
    842                         pair |= COMMON_TER_PLUS_OFFSET << 16;
    843                     }
    844                 }
    845             } else if(pair > variableTop) {
    846                 pair = (pair & TERTIARY_MASK) + TER_OFFSET;
    847                 if(withCaseBits) {
    848                     pair |= LOWER_CASE;
    849                 }
    850             } else if(pair >= MIN_LONG) {
    851                 pair = 0;  // variable
    852             }
    853             // else special mini CE
    854         } else {
    855             // two mini CEs, same primary groups, neither expands like above
    856             int ce = pair & 0xffff;
    857             if(ce >= MIN_SHORT) {
    858                 if(withCaseBits) {
    859                     pair &= TWO_CASES_MASK | TWO_TERTIARIES_MASK;
    860                 } else {
    861                     pair &= TWO_TERTIARIES_MASK;
    862                 }
    863                 pair += TWO_TER_OFFSETS;
    864             } else if(ce > variableTop) {
    865                 pair = (pair & TWO_TERTIARIES_MASK) + TWO_TER_OFFSETS;
    866                 if(withCaseBits) {
    867                     pair |= TWO_LOWER_CASES;
    868                 }
    869             } else {
    870                 assert(ce >= MIN_LONG);
    871                 pair = 0;  // variable
    872             }
    873         }
    874         return pair;
    875     }
    876 
    877     private static int getQuaternaries(int variableTop, int pair) {
    878         // Return the primary weight of a variable CE,
    879         // or the maximum primary weight for a non-variable, not-completely-ignorable CE.
    880         if(pair <= 0xffff) {
    881             // one mini CE
    882             if(pair >= MIN_SHORT) {
    883                 // A high secondary weight means we really have two CEs,
    884                 // a primary CE and a secondary CE.
    885                 if((pair & SECONDARY_MASK) >= MIN_SEC_HIGH) {
    886                     pair = TWO_SHORT_PRIMARIES_MASK;
    887                 } else {
    888                     pair = SHORT_PRIMARY_MASK;
    889                 }
    890             } else if(pair > variableTop) {
    891                 pair = SHORT_PRIMARY_MASK;
    892             } else if(pair >= MIN_LONG) {
    893                 pair &= LONG_PRIMARY_MASK;  // variable
    894             }
    895             // else special mini CE
    896         } else {
    897             // two mini CEs, same primary groups, neither expands like above
    898             int ce = pair & 0xffff;
    899             if(ce > variableTop) {
    900                 pair = TWO_SHORT_PRIMARIES_MASK;
    901             } else {
    902                 assert(ce >= MIN_LONG);
    903                 pair &= TWO_LONG_PRIMARIES_MASK;  // variable
    904             }
    905         }
    906         return pair;
    907     }
    908 
    909     private CollationFastLatin() {}  // no constructor
    910 }
    911