Home | History | Annotate | Download | only in coll
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 *******************************************************************************
      6 * Copyright (C) 2013-2015, International Business Machines
      7 * Corporation and others.  All Rights Reserved.
      8 *******************************************************************************
      9 * CollationSettings.java, ported from collationsettings.h/.cpp
     10 *
     11 * C++ version created on: 2013feb07
     12 * created by: Markus W. Scherer
     13 */
     14 
     15 package android.icu.impl.coll;
     16 
     17 import java.util.Arrays;
     18 
     19 import android.icu.text.Collator;
     20 
     21 /**
     22  * Collation settings/options/attributes.
     23  * These are the values that can be changed via API.
     24  * @hide Only a subset of ICU is exposed in Android
     25  */
     26 public final class CollationSettings extends SharedObject {
     27     /**
     28      * Options bit 0: Perform the FCD check on the input text and deliver normalized text.
     29      */
     30     public static final int CHECK_FCD = 1;
     31     /**
     32      * Options bit 1: Numeric collation.
     33      * Also known as CODAN = COllate Digits As Numbers.
     34      *
     35      * Treat digit sequences as numbers with CE sequences in numeric order,
     36      * rather than returning a normal CE for each digit.
     37      */
     38     public static final int NUMERIC = 2;
     39     /**
     40      * "Shifted" alternate handling, see ALTERNATE_MASK.
     41      */
     42     static final int SHIFTED = 4;
     43     /**
     44      * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable.
     45      * Reserve values 8 and 0xc for shift-trimmed and blanked.
     46      */
     47     static final int ALTERNATE_MASK = 0xc;
     48     /**
     49      * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value.
     50      */
     51     static final int MAX_VARIABLE_SHIFT = 4;
     52     /** maxVariable options bit mask before shifting. */
     53     static final int MAX_VARIABLE_MASK = 0x70;
     54     /** Options bit 7: Reserved/unused/0. */
     55     /**
     56      * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on.
     57      */
     58     static final int UPPER_FIRST = 0x100;
     59     /**
     60      * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values)
     61      * unless case level is on (when they are *moved* into the separate case level).
     62      * By default, the case bits are removed from the tertiary weight (ignored).
     63      *
     64      * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
     65      * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST.
     66      */
     67     public static final int CASE_FIRST = 0x200;
     68     /**
     69      * Options bit mask for caseFirst and upperFirst, before shifting.
     70      * Same value as caseFirst==upperFirst.
     71      */
     72     public static final int CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST;
     73     /**
     74      * Options bit 10: Insert the case level between the secondary and tertiary levels.
     75      */
     76     public static final int CASE_LEVEL = 0x400;
     77     /**
     78      * Options bit 11: Compare secondary weights backwards. ("French secondary")
     79      */
     80     public static final int BACKWARD_SECONDARY = 0x800;
     81     /**
     82      * Options bits 15..12: The 4-bit strength value bit field is shifted by this value.
     83      * It is the top used bit field in the options. (No need to mask after shifting.)
     84      */
     85     static final int STRENGTH_SHIFT = 12;
     86     /** Strength options bit mask before shifting. */
     87     static final int STRENGTH_MASK = 0xf000;
     88 
     89     /** maxVariable values */
     90     static final int MAX_VAR_SPACE = 0;
     91     static final int MAX_VAR_PUNCT = 1;
     92     static final int MAX_VAR_SYMBOL = 2;
     93     static final int MAX_VAR_CURRENCY = 3;
     94 
     95     CollationSettings() {}
     96 
     97     @Override
     98     public CollationSettings clone() {
     99         CollationSettings newSettings = (CollationSettings)super.clone();
    100         // Note: The reorderTable, reorderRanges, and reorderCodes need not be cloned
    101         // because, in Java, they only get replaced but not modified.
    102         newSettings.fastLatinPrimaries = fastLatinPrimaries.clone();
    103         return newSettings;
    104     }
    105 
    106     @Override
    107     public boolean equals(Object other) {
    108         if(other == null) { return false; }
    109         if(!this.getClass().equals(other.getClass())) { return false; }
    110         CollationSettings o = (CollationSettings)other;
    111         if(options != o.options) { return false; }
    112         if((options & ALTERNATE_MASK) != 0 && variableTop != o.variableTop) { return false; }
    113         if(!Arrays.equals(reorderCodes, o.reorderCodes)) { return false; }
    114         return true;
    115     }
    116 
    117     @Override
    118     public int hashCode() {
    119         int h = options << 8;
    120         if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
    121         h ^= reorderCodes.length;
    122         for(int i = 0; i < reorderCodes.length; ++i) {
    123             h ^= (reorderCodes[i] << i);
    124         }
    125         return h;
    126     }
    127 
    128     public void resetReordering() {
    129         // When we turn off reordering, we want to set a null permutation
    130         // rather than a no-op permutation.
    131         reorderTable = null;
    132         minHighNoReorder = 0;
    133         reorderRanges = null;
    134         reorderCodes = EMPTY_INT_ARRAY;
    135     }
    136 
    137     void aliasReordering(CollationData data, int[] codesAndRanges, int codesLength, byte[] table) {
    138         int[] codes;
    139         if(codesLength == codesAndRanges.length) {
    140             codes = codesAndRanges;
    141         } else {
    142             // TODO: Java 6: Arrays.copyOf(codes, codesLength);
    143             codes = new int[codesLength];
    144             System.arraycopy(codesAndRanges, 0, codes, 0, codesLength);
    145         }
    146         int rangesStart = codesLength;
    147         int rangesLimit = codesAndRanges.length;
    148         int rangesLength = rangesLimit - rangesStart;
    149         if(table != null &&
    150                 (rangesLength == 0 ?
    151                         !reorderTableHasSplitBytes(table) :
    152                         rangesLength >= 2 &&
    153                         // The first offset must be 0. The last offset must not be 0.
    154                         (codesAndRanges[rangesStart] & 0xffff) == 0 &&
    155                         (codesAndRanges[rangesLimit - 1] & 0xffff) != 0)) {
    156             reorderTable = table;
    157             reorderCodes = codes;
    158             // Drop ranges before the first split byte. They are reordered by the table.
    159             // This then speeds up reordering of the remaining ranges.
    160             int firstSplitByteRangeIndex = rangesStart;
    161             while(firstSplitByteRangeIndex < rangesLimit &&
    162                     (codesAndRanges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
    163                 // The second byte of the primary limit is 0.
    164                 ++firstSplitByteRangeIndex;
    165             }
    166             if(firstSplitByteRangeIndex == rangesLimit) {
    167                 assert(!reorderTableHasSplitBytes(table));
    168                 minHighNoReorder = 0;
    169                 reorderRanges = null;
    170             } else {
    171                 assert(table[codesAndRanges[firstSplitByteRangeIndex] >>> 24] == 0);
    172                 minHighNoReorder = codesAndRanges[rangesLimit - 1] & 0xffff0000L;
    173                 setReorderRanges(codesAndRanges, firstSplitByteRangeIndex,
    174                         rangesLimit - firstSplitByteRangeIndex);
    175             }
    176             return;
    177         }
    178         // Regenerate missing data.
    179         setReordering(data, codes);
    180     }
    181 
    182     public void setReordering(CollationData data, int[] codes) {
    183         if(codes.length == 0 || (codes.length == 1 && codes[0] == Collator.ReorderCodes.NONE)) {
    184             resetReordering();
    185             return;
    186         }
    187         UVector32 rangesList = new UVector32();
    188         data.makeReorderRanges(codes, rangesList);
    189         int rangesLength = rangesList.size();
    190         if(rangesLength == 0) {
    191             resetReordering();
    192             return;
    193         }
    194         int[] ranges = rangesList.getBuffer();
    195         // ranges[] contains at least two (limit, offset) pairs.
    196         // The first offset must be 0. The last offset must not be 0.
    197         // Separators (at the low end) and trailing weights (at the high end)
    198         // are never reordered.
    199         assert(rangesLength >= 2);
    200         assert((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
    201         minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000L;
    202 
    203         // Write the lead byte permutation table.
    204         // Set a 0 for each lead byte that has a range boundary in the middle.
    205         byte[] table = new byte[256];
    206         int b = 0;
    207         int firstSplitByteRangeIndex = -1;
    208         for(int i = 0; i < rangesLength; ++i) {
    209             int pair = ranges[i];
    210             int limit1 = pair >>> 24;
    211             while(b < limit1) {
    212                 table[b] = (byte)(b + pair);
    213                 ++b;
    214             }
    215             // Check the second byte of the limit.
    216             if((pair & 0xff0000) != 0) {
    217                 table[limit1] = 0;
    218                 b = limit1 + 1;
    219                 if(firstSplitByteRangeIndex < 0) {
    220                     firstSplitByteRangeIndex = i;
    221                 }
    222             }
    223         }
    224         while(b <= 0xff) {
    225             table[b] = (byte)b;
    226             ++b;
    227         }
    228         int rangesStart;
    229         if(firstSplitByteRangeIndex < 0) {
    230             // The lead byte permutation table alone suffices for reordering.
    231             rangesStart = rangesLength = 0;
    232         } else {
    233             // Remove the ranges below the first split byte.
    234             rangesStart = firstSplitByteRangeIndex;
    235             rangesLength -= firstSplitByteRangeIndex;
    236         }
    237         setReorderArrays(codes, ranges, rangesStart, rangesLength, table);
    238     }
    239 
    240     private void setReorderArrays(int[] codes,
    241             int[] ranges, int rangesStart, int rangesLength, byte[] table) {
    242         // Very different from C++. See the comments after the reorderCodes declaration.
    243         if(codes == null) {
    244             codes = EMPTY_INT_ARRAY;
    245         }
    246         assert (codes.length == 0) == (table == null);
    247         reorderTable = table;
    248         reorderCodes = codes;
    249         setReorderRanges(ranges, rangesStart, rangesLength);
    250     }
    251 
    252     private void setReorderRanges(int[] ranges, int rangesStart, int rangesLength) {
    253         if(rangesLength == 0) {
    254             reorderRanges = null;
    255         } else {
    256             reorderRanges = new long[rangesLength];
    257             int i = 0;
    258             do {
    259                 reorderRanges[i++] = ranges[rangesStart++] & 0xffffffffL;
    260             } while(i < rangesLength);
    261         }
    262     }
    263 
    264     public void copyReorderingFrom(CollationSettings other) {
    265         if(!other.hasReordering()) {
    266             resetReordering();
    267             return;
    268         }
    269         minHighNoReorder = other.minHighNoReorder;
    270         reorderTable = other.reorderTable;
    271         reorderRanges = other.reorderRanges;
    272         reorderCodes = other.reorderCodes;
    273     }
    274 
    275     public boolean hasReordering() { return reorderTable != null; }
    276 
    277     private static boolean reorderTableHasSplitBytes(byte[] table) {
    278         assert(table[0] == 0);
    279         for(int i = 1; i < 256; ++i) {
    280             if(table[i] == 0) {
    281                 return true;
    282             }
    283         }
    284         return false;
    285     }
    286 
    287     public long reorder(long p) {
    288         byte b = reorderTable[(int)p >>> 24];
    289         if(b != 0 || p <= Collation.NO_CE_PRIMARY) {
    290             return ((b & 0xffL) << 24) | (p & 0xffffff);
    291         } else {
    292             return reorderEx(p);
    293         }
    294     }
    295 
    296     private long reorderEx(long p) {
    297         assert minHighNoReorder > 0;
    298         if(p >= minHighNoReorder) { return p; }
    299         // Round up p so that its lower 16 bits are >= any offset bits.
    300         // Then compare q directly with (limit, offset) pairs.
    301         long q = p | 0xffff;
    302         long r;
    303         int i = 0;
    304         while(q >= (r = reorderRanges[i])) { ++i; }
    305         return p + ((long)(short)r << 24);
    306     }
    307 
    308     // In C++, we use enums for attributes and their values, with a special value for the default.
    309     // Combined getter/setter methods handle many attributes.
    310     // In Java, we have specific methods for getting, setting, and set-to-default,
    311     // except that this class uses bits in its own bit set for simple values.
    312 
    313     public void setStrength(int value) {
    314         int noStrength = options & ~STRENGTH_MASK;
    315         switch(value) {
    316         case Collator.PRIMARY:
    317         case Collator.SECONDARY:
    318         case Collator.TERTIARY:
    319         case Collator.QUATERNARY:
    320         case Collator.IDENTICAL:
    321             options = noStrength | (value << STRENGTH_SHIFT);
    322             break;
    323         default:
    324             throw new IllegalArgumentException("illegal strength value " + value);
    325         }
    326     }
    327 
    328     public void setStrengthDefault(int defaultOptions) {
    329         int noStrength = options & ~STRENGTH_MASK;
    330         options = noStrength | (defaultOptions & STRENGTH_MASK);
    331     }
    332 
    333     static int getStrength(int options) {
    334         return options >> STRENGTH_SHIFT;
    335     }
    336 
    337     public int getStrength() {
    338         return getStrength(options);
    339     }
    340 
    341     /** Sets the options bit for an on/off attribute. */
    342     public void setFlag(int bit, boolean value) {
    343         if(value) {
    344             options |= bit;
    345         } else {
    346             options &= ~bit;
    347         }
    348     }
    349 
    350     public void setFlagDefault(int bit, int defaultOptions) {
    351         options = (options & ~bit) | (defaultOptions & bit);
    352     }
    353 
    354     public boolean getFlag(int bit) {
    355         return (options & bit) != 0;
    356     }
    357 
    358     public void setCaseFirst(int value) {
    359         assert value == 0 || value == CASE_FIRST || value == CASE_FIRST_AND_UPPER_MASK;
    360         int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
    361         options = noCaseFirst | value;
    362     }
    363 
    364     public void setCaseFirstDefault(int defaultOptions) {
    365         int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
    366         options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
    367     }
    368 
    369     public int getCaseFirst() {
    370         return options & CASE_FIRST_AND_UPPER_MASK;
    371     }
    372 
    373     public void setAlternateHandlingShifted(boolean value) {
    374         int noAlternate = options & ~ALTERNATE_MASK;
    375         if(value) {
    376             options = noAlternate | SHIFTED;
    377         } else {
    378             options = noAlternate;
    379         }
    380     }
    381 
    382     public void setAlternateHandlingDefault(int defaultOptions) {
    383         int noAlternate = options & ~ALTERNATE_MASK;
    384         options = noAlternate | (defaultOptions & ALTERNATE_MASK);
    385     }
    386 
    387     public boolean getAlternateHandling() {
    388         return (options & ALTERNATE_MASK) != 0;
    389     }
    390 
    391     public void setMaxVariable(int value, int defaultOptions) {
    392         int noMax = options & ~MAX_VARIABLE_MASK;
    393         switch(value) {
    394         case MAX_VAR_SPACE:
    395         case MAX_VAR_PUNCT:
    396         case MAX_VAR_SYMBOL:
    397         case MAX_VAR_CURRENCY:
    398             options = noMax | (value << MAX_VARIABLE_SHIFT);
    399             break;
    400         case -1:
    401             options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
    402             break;
    403         default:
    404             throw new IllegalArgumentException("illegal maxVariable value " + value);
    405         }
    406     }
    407 
    408     public int getMaxVariable() {
    409         return (options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT;
    410     }
    411 
    412     /**
    413      * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off.
    414      */
    415     static boolean isTertiaryWithCaseBits(int options) {
    416         return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST;
    417     }
    418     static int getTertiaryMask(int options) {
    419         // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
    420         return isTertiaryWithCaseBits(options) ?
    421                 Collation.CASE_AND_TERTIARY_MASK : Collation.ONLY_TERTIARY_MASK;
    422     }
    423 
    424     static boolean sortsTertiaryUpperCaseFirst(int options) {
    425         // On tertiary level, consider case bits and sort uppercase first
    426         // if caseLevel is off and caseFirst==upperFirst.
    427         return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK;
    428     }
    429 
    430     public boolean dontCheckFCD() {
    431         return (options & CHECK_FCD) == 0;
    432     }
    433 
    434     boolean hasBackwardSecondary() {
    435         return (options & BACKWARD_SECONDARY) != 0;
    436     }
    437 
    438     public boolean isNumeric() {
    439         return (options & NUMERIC) != 0;
    440     }
    441 
    442     /** CHECK_FCD etc. */
    443     public int options = (Collator.TERTIARY << STRENGTH_SHIFT) |  // DEFAULT_STRENGTH
    444             (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT);
    445     /** Variable-top primary weight. */
    446     public long variableTop;
    447     /**
    448      * 256-byte table for reordering permutation of primary lead bytes; null if no reordering.
    449      * A 0 entry at a non-zero index means that the primary lead byte is "split"
    450      * (there are different offsets for primaries that share that lead byte)
    451      * and the reordering offset must be determined via the reorderRanges.
    452      */
    453     public byte[] reorderTable;
    454     /** Limit of last reordered range. 0 if no reordering or no split bytes. */
    455     long minHighNoReorder;
    456     /**
    457      * Primary-weight ranges for script reordering,
    458      * to be used by reorder(p) for split-reordered primary lead bytes.
    459      *
    460      * <p>Each entry is a (limit, offset) pair.
    461      * The upper 16 bits of the entry are the upper 16 bits of the
    462      * exclusive primary limit of a range.
    463      * Primaries between the previous limit and this one have their lead bytes
    464      * modified by the signed offset (-0xff..+0xff) stored in the lower 16 bits.
    465      *
    466      * <p>CollationData.makeReorderRanges() writes a full list where the first range
    467      * (at least for terminators and separators) has a 0 offset.
    468      * The last range has a non-zero offset.
    469      * minHighNoReorder is set to the limit of that last range.
    470      *
    471      * <p>In the settings object, the initial ranges before the first split lead byte
    472      * are omitted for efficiency; they are handled by reorder(p) via the reorderTable.
    473      * If there are no split-reordered lead bytes, then no ranges are needed.
    474      */
    475     long[] reorderRanges;
    476     /** Array of reorder codes; ignored if length == 0. */
    477     public int[] reorderCodes = EMPTY_INT_ARRAY;
    478     // Note: In C++, we keep a memory block around for the reorder codes,
    479     // the ranges, and the permutation table,
    480     // and modify them for new codes.
    481     // In Java, we simply copy references and then never modify the array contents.
    482     // The caller must abandon the arrays.
    483     // Reorder codes from the public setter API must be cloned.
    484     private static final int[] EMPTY_INT_ARRAY = new int[0];
    485 
    486     /** Options for CollationFastLatin. Negative if disabled. */
    487     public int fastLatinOptions = -1;
    488     // fastLatinPrimaries.length must be equal to CollationFastLatin.LATIN_LIMIT,
    489     // but we do not import CollationFastLatin to reduce circular dependencies.
    490     public char[] fastLatinPrimaries = new char[0x180];  // mutable contents
    491 }
    492