Home | History | Annotate | Download | only in coll
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 *******************************************************************************
      6 * Copyright (C) 2010-2015, International Business Machines
      7 * Corporation and others.  All Rights Reserved.
      8 *******************************************************************************
      9 * Collation.java, ported from collation.h/.cpp
     10 *
     11 * C++ version created on: 2010oct27
     12 * created by: Markus W. Scherer
     13 */
     14 
     15 package android.icu.impl.coll;
     16 
     17 /**
     18  * Collation v2 basic definitions and static helper functions.
     19  *
     20  * Data structures except for expansion tables store 32-bit CEs which are
     21  * either specials (see tags below) or are compact forms of 64-bit CEs.
     22  * @hide Only a subset of ICU is exposed in Android
     23  */
     24 public final class Collation {
     25     /** UChar32 U_SENTINEL.
     26      * TODO: Create a common, public constant?
     27      */
     28     public static final int SENTINEL_CP = -1;
     29 
     30     // ICU4C compare() API returns enum UCollationResult values (with UCOL_ prefix).
     31     // ICU4J just returns int. We use these constants for ease of porting.
     32     public static final int LESS = -1;
     33     public static final int EQUAL = 0;
     34     public static final int GREATER = 1;
     35 
     36     // Special sort key bytes for all levels.
     37     public static final int TERMINATOR_BYTE = 0;
     38     public static final int LEVEL_SEPARATOR_BYTE = 1;
     39 
     40     /** The secondary/tertiary lower limit for tailoring before any root elements. */
     41     static final int BEFORE_WEIGHT16 = 0x100;
     42 
     43     /**
     44      * Merge-sort-key separator.
     45      * Same as the unique primary and identical-level weights of U+FFFE.
     46      * Must not be used as primary compression low terminator.
     47      * Otherwise usable.
     48      */
     49     public static final int MERGE_SEPARATOR_BYTE = 2;
     50     public static final long MERGE_SEPARATOR_PRIMARY = 0x02000000;  // U+FFFE
     51     static final int MERGE_SEPARATOR_CE32 = 0x02000505;  // U+FFFE
     52 
     53     /**
     54      * Primary compression low terminator, must be greater than MERGE_SEPARATOR_BYTE.
     55      * Reserved value in primary second byte if the lead byte is compressible.
     56      * Otherwise usable in all CE weight bytes.
     57      */
     58     public static final int PRIMARY_COMPRESSION_LOW_BYTE = 3;
     59     /**
     60      * Primary compression high terminator.
     61      * Reserved value in primary second byte if the lead byte is compressible.
     62      * Otherwise usable in all CE weight bytes.
     63      */
     64     public static final int PRIMARY_COMPRESSION_HIGH_BYTE = 0xff;
     65 
     66     /** Default secondary/tertiary weight lead byte. */
     67     static final int COMMON_BYTE = 5;
     68     public static final int COMMON_WEIGHT16 = 0x0500;
     69     /** Middle 16 bits of a CE with a common secondary weight. */
     70     static final int COMMON_SECONDARY_CE = 0x05000000;
     71     /** Lower 16 bits of a CE with a common tertiary weight. */
     72     static final int COMMON_TERTIARY_CE = 0x0500;
     73     /** Lower 32 bits of a CE with common secondary and tertiary weights. */
     74     public static final int COMMON_SEC_AND_TER_CE = 0x05000500;
     75 
     76     static final int SECONDARY_MASK = 0xffff0000;
     77     public static final int CASE_MASK = 0xc000;
     78     static final int SECONDARY_AND_CASE_MASK = SECONDARY_MASK | CASE_MASK;
     79     /** Only the 2*6 bits for the pure tertiary weight. */
     80     public static final int ONLY_TERTIARY_MASK = 0x3f3f;
     81     /** Only the secondary & tertiary bits; no case, no quaternary. */
     82     static final int ONLY_SEC_TER_MASK = SECONDARY_MASK | ONLY_TERTIARY_MASK;
     83     /** Case bits and tertiary bits. */
     84     static final int CASE_AND_TERTIARY_MASK = CASE_MASK | ONLY_TERTIARY_MASK;
     85     public static final int QUATERNARY_MASK = 0xc0;
     86     /** Case bits and quaternary bits. */
     87     public static final int CASE_AND_QUATERNARY_MASK = CASE_MASK | QUATERNARY_MASK;
     88 
     89     static final int UNASSIGNED_IMPLICIT_BYTE = 0xfe;  // compressible
     90     /**
     91      * First unassigned: AlphabeticIndex overflow boundary.
     92      * We want a 3-byte primary so that it fits into the root elements table.
     93      *
     94      * This 3-byte primary will not collide with
     95      * any unassigned-implicit 4-byte primaries because
     96      * the first few hundred Unicode code points all have real mappings.
     97      */
     98     static final long FIRST_UNASSIGNED_PRIMARY = 0xfe040200L;
     99 
    100     static final int TRAIL_WEIGHT_BYTE = 0xff;  // not compressible
    101     static final long FIRST_TRAILING_PRIMARY = 0xff020200L;  // [first trailing]
    102     public static final long MAX_PRIMARY = 0xffff0000L;  // U+FFFF
    103     static final int MAX_REGULAR_CE32 = 0xffff0505;  // U+FFFF
    104 
    105     // CE32 value for U+FFFD as well as illegal UTF-8 byte sequences (which behave like U+FFFD).
    106     // We use the third-highest primary weight for U+FFFD (as in UCA 6.3+).
    107     public static final long FFFD_PRIMARY = MAX_PRIMARY - 0x20000;
    108     static final int FFFD_CE32 = MAX_REGULAR_CE32 - 0x20000;
    109 
    110     /**
    111      * A CE32 is special if its low byte is this or greater.
    112      * Impossible case bits 11 mark special CE32s.
    113      * This value itself is used to indicate a fallback to the base collator.
    114      */
    115     static final int SPECIAL_CE32_LOW_BYTE = 0xc0;
    116     static final int FALLBACK_CE32 = SPECIAL_CE32_LOW_BYTE;
    117     /**
    118      * Low byte of a long-primary special CE32.
    119      */
    120     static final int LONG_PRIMARY_CE32_LOW_BYTE = 0xc1;  // SPECIAL_CE32_LOW_BYTE | LONG_PRIMARY_TAG
    121 
    122     static final int UNASSIGNED_CE32 = 0xffffffff;  // Compute an unassigned-implicit CE.
    123 
    124     static final int NO_CE32 = 1;
    125 
    126     /** No CE: End of input. Only used in runtime code, not stored in data. */
    127     static final long NO_CE_PRIMARY = 1;  // not a left-adjusted weight
    128     static final int NO_CE_WEIGHT16 = 0x0100;  // weight of LEVEL_SEPARATOR_BYTE
    129     public static final long NO_CE = 0x101000100L;  // NO_CE_PRIMARY, NO_CE_WEIGHT16, NO_CE_WEIGHT16
    130 
    131     /** Sort key levels. */
    132 
    133     /** Unspecified level. */
    134     public static final int NO_LEVEL = 0;
    135     public static final int PRIMARY_LEVEL = 1;
    136     public static final int SECONDARY_LEVEL = 2;
    137     public static final int CASE_LEVEL = 3;
    138     public static final int TERTIARY_LEVEL = 4;
    139     public static final int QUATERNARY_LEVEL = 5;
    140     public static final int IDENTICAL_LEVEL = 6;
    141     /** Beyond sort key bytes. */
    142     public static final int ZERO_LEVEL = 7;
    143 
    144     /**
    145      * Sort key level flags: xx_FLAG = 1 << xx_LEVEL.
    146      * In Java, use enum Level with flag() getters, or use EnumSet rather than hand-made bit sets.
    147      */
    148     static final int NO_LEVEL_FLAG = 1;
    149     static final int PRIMARY_LEVEL_FLAG = 2;
    150     static final int SECONDARY_LEVEL_FLAG = 4;
    151     static final int CASE_LEVEL_FLAG = 8;
    152     static final int TERTIARY_LEVEL_FLAG = 0x10;
    153     static final int QUATERNARY_LEVEL_FLAG = 0x20;
    154     static final int IDENTICAL_LEVEL_FLAG = 0x40;
    155     static final int ZERO_LEVEL_FLAG = 0x80;
    156 
    157     /**
    158      * Special-CE32 tags, from bits 3..0 of a special 32-bit CE.
    159      * Bits 31..8 are available for tag-specific data.
    160      * Bits  5..4: Reserved. May be used in the future to indicate lccc!=0 and tccc!=0.
    161      */
    162 
    163     /**
    164      * Fall back to the base collator.
    165      * This is the tag value in SPECIAL_CE32_LOW_BYTE and FALLBACK_CE32.
    166      * Bits 31..8: Unused, 0.
    167      */
    168     static final int FALLBACK_TAG = 0;
    169     /**
    170      * Long-primary CE with COMMON_SEC_AND_TER_CE.
    171      * Bits 31..8: Three-byte primary.
    172      */
    173     static final int LONG_PRIMARY_TAG = 1;
    174     /**
    175      * Long-secondary CE with zero primary.
    176      * Bits 31..16: Secondary weight.
    177      * Bits 15.. 8: Tertiary weight.
    178      */
    179     static final int LONG_SECONDARY_TAG = 2;
    180     /**
    181      * Unused.
    182      * May be used in the future for single-byte secondary CEs (SHORT_SECONDARY_TAG),
    183      * storing the secondary in bits 31..24, the ccc in bits 23..16,
    184      * and the tertiary in bits 15..8.
    185      */
    186     static final int RESERVED_TAG_3 = 3;
    187     /**
    188      * Latin mini expansions of two simple CEs [pp, 05, tt] [00, ss, 05].
    189      * Bits 31..24: Single-byte primary weight pp of the first CE.
    190      * Bits 23..16: Tertiary weight tt of the first CE.
    191      * Bits 15.. 8: Secondary weight ss of the second CE.
    192      */
    193     static final int LATIN_EXPANSION_TAG = 4;
    194     /**
    195      * Points to one or more simple/long-primary/long-secondary 32-bit CE32s.
    196      * Bits 31..13: Index into int table.
    197      * Bits 12.. 8: Length=1..31.
    198      */
    199     static final int EXPANSION32_TAG = 5;
    200     /**
    201      * Points to one or more 64-bit CEs.
    202      * Bits 31..13: Index into CE table.
    203      * Bits 12.. 8: Length=1..31.
    204      */
    205     static final int EXPANSION_TAG = 6;
    206     /**
    207      * Builder data, used only in the CollationDataBuilder, not in runtime data.
    208      *
    209      * If bit 8 is 0: Builder context, points to a list of context-sensitive mappings.
    210      * Bits 31..13: Index to the builder's list of ConditionalCE32 for this character.
    211      * Bits 12.. 9: Unused, 0.
    212      *
    213      * If bit 8 is 1 (IS_BUILDER_JAMO_CE32): Builder-only jamoCE32 value.
    214      * The builder fetches the Jamo CE32 from the trie.
    215      * Bits 31..13: Jamo code point.
    216      * Bits 12.. 9: Unused, 0.
    217      */
    218     static final int BUILDER_DATA_TAG = 7;
    219     /**
    220      * Points to prefix trie.
    221      * Bits 31..13: Index into prefix/contraction data.
    222      * Bits 12.. 8: Unused, 0.
    223      */
    224     static final int PREFIX_TAG = 8;
    225     /**
    226      * Points to contraction data.
    227      * Bits 31..13: Index into prefix/contraction data.
    228      * Bits 12..11: Unused, 0.
    229      * Bit      10: CONTRACT_TRAILING_CCC flag.
    230      * Bit       9: CONTRACT_NEXT_CCC flag.
    231      * Bit       8: CONTRACT_SINGLE_CP_NO_MATCH flag.
    232      */
    233     static final int CONTRACTION_TAG = 9;
    234     /**
    235      * Decimal digit.
    236      * Bits 31..13: Index into int table for non-numeric-collation CE32.
    237      * Bit      12: Unused, 0.
    238      * Bits 11.. 8: Digit value 0..9.
    239      */
    240     static final int DIGIT_TAG = 10;
    241     /**
    242      * Tag for U+0000, for moving the NUL-termination handling
    243      * from the regular fastpath into specials-handling code.
    244      * Bits 31..8: Unused, 0.
    245      */
    246     static final int U0000_TAG = 11;
    247     /**
    248      * Tag for a Hangul syllable.
    249      * Bits 31..9: Unused, 0.
    250      * Bit      8: HANGUL_NO_SPECIAL_JAMO flag.
    251      */
    252     static final int HANGUL_TAG = 12;
    253     /**
    254      * Tag for a lead surrogate code unit.
    255      * Optional optimization for UTF-16 string processing.
    256      * Bits 31..10: Unused, 0.
    257      *       9.. 8: =0: All associated supplementary code points are unassigned-implict.
    258      *              =1: All associated supplementary code points fall back to the base data.
    259      *              else: (Normally 2) Look up the data for the supplementary code point.
    260      */
    261     static final int LEAD_SURROGATE_TAG = 13;
    262     /**
    263      * Tag for CEs with primary weights in code point order.
    264      * Bits 31..13: Index into CE table, for one data "CE".
    265      * Bits 12.. 8: Unused, 0.
    266      *
    267      * This data "CE" has the following bit fields:
    268      * Bits 63..32: Three-byte primary pppppp00.
    269      *      31.. 8: Start/base code point of the in-order range.
    270      *           7: Flag isCompressible primary.
    271      *       6.. 0: Per-code point primary-weight increment.
    272      */
    273     static final int OFFSET_TAG = 14;
    274     /**
    275      * Implicit CE tag. Compute an unassigned-implicit CE.
    276      * All bits are set (UNASSIGNED_CE32=0xffffffff).
    277      */
    278     static final int IMPLICIT_TAG = 15;
    279 
    280     static boolean isAssignedCE32(int ce32) {
    281         return ce32 != FALLBACK_CE32 && ce32 != UNASSIGNED_CE32;
    282     }
    283 
    284     /**
    285      * We limit the number of CEs in an expansion
    286      * so that we can use a small number of length bits in the data structure,
    287      * and so that an implementation can copy CEs at runtime without growing a destination buffer.
    288      */
    289     static final int MAX_EXPANSION_LENGTH = 31;
    290     static final int MAX_INDEX = 0x7ffff;
    291 
    292     /**
    293      * Set if there is no match for the single (no-suffix) character itself.
    294      * This is only possible if there is a prefix.
    295      * In this case, discontiguous contraction matching cannot add combining marks
    296      * starting from an empty suffix.
    297      * The default CE32 is used anyway if there is no suffix match.
    298      */
    299     static final int CONTRACT_SINGLE_CP_NO_MATCH = 0x100;
    300     /** Set if the first character of every contraction suffix has lccc!=0. */
    301     static final int CONTRACT_NEXT_CCC = 0x200;
    302     /** Set if any contraction suffix ends with lccc!=0. */
    303     static final int CONTRACT_TRAILING_CCC = 0x400;
    304 
    305     /** For HANGUL_TAG: None of its Jamo CE32s isSpecialCE32(). */
    306     static final int HANGUL_NO_SPECIAL_JAMO = 0x100;
    307 
    308     static final int LEAD_ALL_UNASSIGNED = 0;
    309     static final int LEAD_ALL_FALLBACK = 0x100;
    310     static final int LEAD_MIXED = 0x200;
    311     static final int LEAD_TYPE_MASK = 0x300;
    312 
    313     static int makeLongPrimaryCE32(long p) { return (int)(p | LONG_PRIMARY_CE32_LOW_BYTE); }
    314 
    315     /** Turns the long-primary CE32 into a primary weight pppppp00. */
    316     static long primaryFromLongPrimaryCE32(int ce32) {
    317         return (long)ce32 & 0xffffff00L;
    318     }
    319     static long ceFromLongPrimaryCE32(int ce32) {
    320         return ((long)(ce32 & 0xffffff00) << 32) | COMMON_SEC_AND_TER_CE;
    321     }
    322 
    323     static int makeLongSecondaryCE32(int lower32) {
    324         return lower32 | SPECIAL_CE32_LOW_BYTE | LONG_SECONDARY_TAG;
    325     }
    326     static long ceFromLongSecondaryCE32(int ce32) {
    327         return (long)ce32 & 0xffffff00L;
    328     }
    329 
    330     /** Makes a special CE32 with tag, index and length. */
    331     static int makeCE32FromTagIndexAndLength(int tag, int index, int length) {
    332         return (index << 13) | (length << 8) | SPECIAL_CE32_LOW_BYTE | tag;
    333     }
    334     /** Makes a special CE32 with only tag and index. */
    335     static int makeCE32FromTagAndIndex(int tag, int index) {
    336         return (index << 13) | SPECIAL_CE32_LOW_BYTE | tag;
    337     }
    338 
    339     static boolean isSpecialCE32(int ce32) {
    340         return (ce32 & 0xff) >= SPECIAL_CE32_LOW_BYTE;
    341     }
    342 
    343     static int tagFromCE32(int ce32) {
    344         return ce32 & 0xf;
    345     }
    346 
    347     static boolean hasCE32Tag(int ce32, int tag) {
    348         return isSpecialCE32(ce32) && tagFromCE32(ce32) == tag;
    349     }
    350 
    351     static boolean isLongPrimaryCE32(int ce32) {
    352         return hasCE32Tag(ce32, LONG_PRIMARY_TAG);
    353     }
    354 
    355     static boolean isSimpleOrLongCE32(int ce32) {
    356         return !isSpecialCE32(ce32) ||
    357                 tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
    358                 tagFromCE32(ce32) == LONG_SECONDARY_TAG;
    359     }
    360 
    361     /**
    362      * @return true if the ce32 yields one or more CEs without further data lookups
    363      */
    364     static boolean isSelfContainedCE32(int ce32) {
    365         return !isSpecialCE32(ce32) ||
    366                 tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
    367                 tagFromCE32(ce32) == LONG_SECONDARY_TAG ||
    368                 tagFromCE32(ce32) == LATIN_EXPANSION_TAG;
    369     }
    370 
    371     static boolean isPrefixCE32(int ce32) {
    372         return hasCE32Tag(ce32, PREFIX_TAG);
    373     }
    374 
    375     static boolean isContractionCE32(int ce32) {
    376         return hasCE32Tag(ce32, CONTRACTION_TAG);
    377     }
    378 
    379     static boolean ce32HasContext(int ce32) {
    380         return isSpecialCE32(ce32) &&
    381                 (tagFromCE32(ce32) == PREFIX_TAG ||
    382                 tagFromCE32(ce32) == CONTRACTION_TAG);
    383     }
    384 
    385     /**
    386      * Get the first of the two Latin-expansion CEs encoded in ce32.
    387      * @see LATIN_EXPANSION_TAG
    388      */
    389     static long latinCE0FromCE32(int ce32) {
    390         return ((long)(ce32 & 0xff000000) << 32) | COMMON_SECONDARY_CE | ((ce32 & 0xff0000) >> 8);
    391     }
    392 
    393     /**
    394      * Get the second of the two Latin-expansion CEs encoded in ce32.
    395      * @see LATIN_EXPANSION_TAG
    396      */
    397     static long latinCE1FromCE32(int ce32) {
    398         return (((long)ce32 & 0xff00) << 16) | COMMON_TERTIARY_CE;
    399     }
    400 
    401     /**
    402      * Returns the data index from a special CE32.
    403      */
    404     static int indexFromCE32(int ce32) {
    405         return ce32 >>> 13;
    406     }
    407 
    408     /**
    409      * Returns the data length from a ce32.
    410      */
    411     static int lengthFromCE32(int ce32) {
    412         return (ce32 >> 8) & 31;
    413     }
    414 
    415     /**
    416      * Returns the digit value from a DIGIT_TAG ce32.
    417      */
    418     static char digitFromCE32(int ce32) {
    419         return (char)((ce32 >> 8) & 0xf);
    420     }
    421 
    422     /** Returns a 64-bit CE from a simple CE32 (not special). */
    423     static long ceFromSimpleCE32(int ce32) {
    424         // normal form ppppsstt -> pppp0000ss00tt00
    425         assert (ce32 & 0xff) < SPECIAL_CE32_LOW_BYTE;
    426         return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | ((ce32 & 0xff) << 8);
    427     }
    428 
    429     /** Returns a 64-bit CE from a simple/long-primary/long-secondary CE32. */
    430     static long ceFromCE32(int ce32) {
    431         int tertiary = ce32 & 0xff;
    432         if(tertiary < SPECIAL_CE32_LOW_BYTE) {
    433             // normal form ppppsstt -> pppp0000ss00tt00
    434             return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | (tertiary << 8);
    435         } else {
    436             ce32 -= tertiary;
    437             if((tertiary & 0xf) == LONG_PRIMARY_TAG) {
    438                 // long-primary form ppppppC1 -> pppppp00050000500
    439                 return ((long)ce32 << 32) | COMMON_SEC_AND_TER_CE;
    440             } else {
    441                 // long-secondary form ssssttC2 -> 00000000sssstt00
    442                 assert (tertiary & 0xf) == LONG_SECONDARY_TAG;
    443                 return ce32 & 0xffffffffL;
    444             }
    445         }
    446     }
    447 
    448     /** Creates a CE from a primary weight. */
    449     public static long makeCE(long p) {
    450         return (p << 32) | COMMON_SEC_AND_TER_CE;
    451     }
    452     /**
    453      * Creates a CE from a primary weight,
    454      * 16-bit secondary/tertiary weights, and a 2-bit quaternary.
    455      */
    456     static long makeCE(long p, int s, int t, int q) {
    457         return (p << 32) | ((long)s << 16) | t | (q << 6);
    458     }
    459 
    460     /**
    461      * Increments a 2-byte primary by a code point offset.
    462      */
    463     public static long incTwoBytePrimaryByOffset(long basePrimary, boolean isCompressible,
    464                                               int offset) {
    465         // Extract the second byte, minus the minimum byte value,
    466         // plus the offset, modulo the number of usable byte values, plus the minimum.
    467         // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
    468         long primary;
    469         if(isCompressible) {
    470             offset += ((int)(basePrimary >> 16) & 0xff) - 4;
    471             primary = ((offset % 251) + 4) << 16;
    472             offset /= 251;
    473         } else {
    474             offset += ((int)(basePrimary >> 16) & 0xff) - 2;
    475             primary = ((offset % 254) + 2) << 16;
    476             offset /= 254;
    477         }
    478         // First byte, assume no further overflow.
    479         return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24));
    480     }
    481 
    482     /**
    483      * Increments a 3-byte primary by a code point offset.
    484      */
    485     public static long incThreeBytePrimaryByOffset(long basePrimary, boolean isCompressible,
    486                                                 int offset) {
    487         // Extract the third byte, minus the minimum byte value,
    488         // plus the offset, modulo the number of usable byte values, plus the minimum.
    489         offset += ((int)(basePrimary >> 8) & 0xff) - 2;
    490         long primary = ((offset % 254) + 2) << 8;
    491         offset /= 254;
    492         // Same with the second byte,
    493         // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
    494         if(isCompressible) {
    495             offset += ((int)(basePrimary >> 16) & 0xff) - 4;
    496             primary |= ((offset % 251) + 4) << 16;
    497             offset /= 251;
    498         } else {
    499             offset += ((int)(basePrimary >> 16) & 0xff) - 2;
    500             primary |= ((offset % 254) + 2) << 16;
    501             offset /= 254;
    502         }
    503         // First byte, assume no further overflow.
    504         return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24));
    505     }
    506 
    507     /**
    508      * Decrements a 2-byte primary by one range step (1..0x7f).
    509      */
    510     static long decTwoBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) {
    511         // Extract the second byte, minus the minimum byte value,
    512         // minus the step, modulo the number of usable byte values, plus the minimum.
    513         // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
    514         // Assume no further underflow for the first byte.
    515         assert(0 < step && step <= 0x7f);
    516         int byte2 = ((int)(basePrimary >> 16) & 0xff) - step;
    517         if(isCompressible) {
    518             if(byte2 < 4) {
    519                 byte2 += 251;
    520                 basePrimary -= 0x1000000;
    521             }
    522         } else {
    523             if(byte2 < 2) {
    524                 byte2 += 254;
    525                 basePrimary -= 0x1000000;
    526             }
    527         }
    528         return (basePrimary & 0xff000000L) | (byte2 << 16);
    529     }
    530 
    531     /**
    532      * Decrements a 3-byte primary by one range step (1..0x7f).
    533      */
    534     static long decThreeBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) {
    535         // Extract the third byte, minus the minimum byte value,
    536         // minus the step, modulo the number of usable byte values, plus the minimum.
    537         assert(0 < step && step <= 0x7f);
    538         int byte3 = ((int)(basePrimary >> 8) & 0xff) - step;
    539         if(byte3 >= 2) {
    540             return (basePrimary & 0xffff0000L) | (byte3 << 8);
    541         }
    542         byte3 += 254;
    543         // Same with the second byte,
    544         // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
    545         int byte2 = ((int)(basePrimary >> 16) & 0xff) - 1;
    546         if(isCompressible) {
    547             if(byte2 < 4) {
    548                 byte2 = 0xfe;
    549                 basePrimary -= 0x1000000;
    550             }
    551         } else {
    552             if(byte2 < 2) {
    553                 byte2 = 0xff;
    554                 basePrimary -= 0x1000000;
    555             }
    556         }
    557         // First byte, assume no further underflow.
    558         return (basePrimary & 0xff000000L) | (byte2 << 16) | (byte3 << 8);
    559     }
    560 
    561     /**
    562      * Computes a 3-byte primary for c's OFFSET_TAG data "CE".
    563      */
    564     static long getThreeBytePrimaryForOffsetData(int c, long dataCE) {
    565         long p = dataCE >>> 32;  // three-byte primary pppppp00
    566         int lower32 = (int)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)
    567         int offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
    568         boolean isCompressible = (lower32 & 0x80) != 0;
    569         return Collation.incThreeBytePrimaryByOffset(p, isCompressible, offset);
    570     }
    571 
    572     /**
    573      * Returns the unassigned-character implicit primary weight for any valid code point c.
    574      */
    575     static long unassignedPrimaryFromCodePoint(int c) {
    576         // Create a gap before U+0000. Use c=-1 for [first unassigned].
    577         ++c;
    578         // Fourth byte: 18 values, every 14th byte value (gap of 13).
    579         long primary = 2 + (c % 18) * 14;
    580         c /= 18;
    581         // Third byte: 254 values.
    582         primary |= (2 + (c % 254)) << 8;
    583         c /= 254;
    584         // Second byte: 251 values 04..FE excluding the primary compression bytes.
    585         primary |= (4 + (c % 251)) << 16;
    586         // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
    587         return primary | ((long)UNASSIGNED_IMPLICIT_BYTE << 24);
    588     }
    589 
    590     static long unassignedCEFromCodePoint(int c) {
    591         return makeCE(unassignedPrimaryFromCodePoint(c));
    592     }
    593 
    594     // private Collation()  // No instantiation.
    595 }
    596