Home | History | Annotate | Download | only in coll
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 /*
      3 *******************************************************************************
      4 * Copyright (C) 2013-2015, International Business Machines
      5 * Corporation and others.  All Rights Reserved.
      6 *******************************************************************************
      7 * CollationSettings.java, ported from collationsettings.h/.cpp
      8 *
      9 * C++ version created on: 2013feb07
     10 * created by: Markus W. Scherer
     11 */
     12 
     13 package android.icu.impl.coll;
     14 
     15 import java.util.Arrays;
     16 
     17 import android.icu.text.Collator;
     18 
     19 /**
     20  * Collation settings/options/attributes.
     21  * These are the values that can be changed via API.
     22  * @hide Only a subset of ICU is exposed in Android
     23  */
     24 public final class CollationSettings extends SharedObject {
     25     /**
     26      * Options bit 0: Perform the FCD check on the input text and deliver normalized text.
     27      */
     28     public static final int CHECK_FCD = 1;
     29     /**
     30      * Options bit 1: Numeric collation.
     31      * Also known as CODAN = COllate Digits As Numbers.
     32      *
     33      * Treat digit sequences as numbers with CE sequences in numeric order,
     34      * rather than returning a normal CE for each digit.
     35      */
     36     public static final int NUMERIC = 2;
     37     /**
     38      * "Shifted" alternate handling, see ALTERNATE_MASK.
     39      */
     40     static final int SHIFTED = 4;
     41     /**
     42      * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable.
     43      * Reserve values 8 and 0xc for shift-trimmed and blanked.
     44      */
     45     static final int ALTERNATE_MASK = 0xc;
     46     /**
     47      * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value.
     48      */
     49     static final int MAX_VARIABLE_SHIFT = 4;
     50     /** maxVariable options bit mask before shifting. */
     51     static final int MAX_VARIABLE_MASK = 0x70;
     52     /** Options bit 7: Reserved/unused/0. */
     53     /**
     54      * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on.
     55      */
     56     static final int UPPER_FIRST = 0x100;
     57     /**
     58      * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values)
     59      * unless case level is on (when they are *moved* into the separate case level).
     60      * By default, the case bits are removed from the tertiary weight (ignored).
     61      *
     62      * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
     63      * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST.
     64      */
     65     public static final int CASE_FIRST = 0x200;
     66     /**
     67      * Options bit mask for caseFirst and upperFirst, before shifting.
     68      * Same value as caseFirst==upperFirst.
     69      */
     70     public static final int CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST;
     71     /**
     72      * Options bit 10: Insert the case level between the secondary and tertiary levels.
     73      */
     74     public static final int CASE_LEVEL = 0x400;
     75     /**
     76      * Options bit 11: Compare secondary weights backwards. ("French secondary")
     77      */
     78     public static final int BACKWARD_SECONDARY = 0x800;
     79     /**
     80      * Options bits 15..12: The 4-bit strength value bit field is shifted by this value.
     81      * It is the top used bit field in the options. (No need to mask after shifting.)
     82      */
     83     static final int STRENGTH_SHIFT = 12;
     84     /** Strength options bit mask before shifting. */
     85     static final int STRENGTH_MASK = 0xf000;
     86 
     87     /** maxVariable values */
     88     static final int MAX_VAR_SPACE = 0;
     89     static final int MAX_VAR_PUNCT = 1;
     90     static final int MAX_VAR_SYMBOL = 2;
     91     static final int MAX_VAR_CURRENCY = 3;
     92 
     93     CollationSettings() {}
     94 
     95     @Override
     96     public CollationSettings clone() {
     97         CollationSettings newSettings = (CollationSettings)super.clone();
     98         // Note: The reorderTable, reorderRanges, and reorderCodes need not be cloned
     99         // because, in Java, they only get replaced but not modified.
    100         newSettings.fastLatinPrimaries = fastLatinPrimaries.clone();
    101         return newSettings;
    102     }
    103 
    104     @Override
    105     public boolean equals(Object other) {
    106         if(other == null) { return false; }
    107         if(!this.getClass().equals(other.getClass())) { return false; }
    108         CollationSettings o = (CollationSettings)other;
    109         if(options != o.options) { return false; }
    110         if((options & ALTERNATE_MASK) != 0 && variableTop != o.variableTop) { return false; }
    111         if(!Arrays.equals(reorderCodes, o.reorderCodes)) { return false; }
    112         return true;
    113     }
    114 
    115     @Override
    116     public int hashCode() {
    117         int h = options << 8;
    118         if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
    119         h ^= reorderCodes.length;
    120         for(int i = 0; i < reorderCodes.length; ++i) {
    121             h ^= (reorderCodes[i] << i);
    122         }
    123         return h;
    124     }
    125 
    126     public void resetReordering() {
    127         // When we turn off reordering, we want to set a null permutation
    128         // rather than a no-op permutation.
    129         reorderTable = null;
    130         minHighNoReorder = 0;
    131         reorderRanges = null;
    132         reorderCodes = EMPTY_INT_ARRAY;
    133     }
    134 
    135     void aliasReordering(CollationData data, int[] codesAndRanges, int codesLength, byte[] table) {
    136         int[] codes;
    137         if(codesLength == codesAndRanges.length) {
    138             codes = codesAndRanges;
    139         } else {
    140             // TODO: Java 6: Arrays.copyOf(codes, codesLength);
    141             codes = new int[codesLength];
    142             System.arraycopy(codesAndRanges, 0, codes, 0, codesLength);
    143         }
    144         int rangesStart = codesLength;
    145         int rangesLimit = codesAndRanges.length;
    146         int rangesLength = rangesLimit - rangesStart;
    147         if(table != null &&
    148                 (rangesLength == 0 ?
    149                         !reorderTableHasSplitBytes(table) :
    150                         rangesLength >= 2 &&
    151                         // The first offset must be 0. The last offset must not be 0.
    152                         (codesAndRanges[rangesStart] & 0xffff) == 0 &&
    153                         (codesAndRanges[rangesLimit - 1] & 0xffff) != 0)) {
    154             reorderTable = table;
    155             reorderCodes = codes;
    156             // Drop ranges before the first split byte. They are reordered by the table.
    157             // This then speeds up reordering of the remaining ranges.
    158             int firstSplitByteRangeIndex = rangesStart;
    159             while(firstSplitByteRangeIndex < rangesLimit &&
    160                     (codesAndRanges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
    161                 // The second byte of the primary limit is 0.
    162                 ++firstSplitByteRangeIndex;
    163             }
    164             if(firstSplitByteRangeIndex == rangesLimit) {
    165                 assert(!reorderTableHasSplitBytes(table));
    166                 minHighNoReorder = 0;
    167                 reorderRanges = null;
    168             } else {
    169                 assert(table[codesAndRanges[firstSplitByteRangeIndex] >>> 24] == 0);
    170                 minHighNoReorder = codesAndRanges[rangesLimit - 1] & 0xffff0000L;
    171                 setReorderRanges(codesAndRanges, firstSplitByteRangeIndex,
    172                         rangesLimit - firstSplitByteRangeIndex);
    173             }
    174             return;
    175         }
    176         // Regenerate missing data.
    177         setReordering(data, codes);
    178     }
    179 
    180     public void setReordering(CollationData data, int[] codes) {
    181         if(codes.length == 0 || (codes.length == 1 && codes[0] == Collator.ReorderCodes.NONE)) {
    182             resetReordering();
    183             return;
    184         }
    185         UVector32 rangesList = new UVector32();
    186         data.makeReorderRanges(codes, rangesList);
    187         int rangesLength = rangesList.size();
    188         if(rangesLength == 0) {
    189             resetReordering();
    190             return;
    191         }
    192         int[] ranges = rangesList.getBuffer();
    193         // ranges[] contains at least two (limit, offset) pairs.
    194         // The first offset must be 0. The last offset must not be 0.
    195         // Separators (at the low end) and trailing weights (at the high end)
    196         // are never reordered.
    197         assert(rangesLength >= 2);
    198         assert((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
    199         minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000L;
    200 
    201         // Write the lead byte permutation table.
    202         // Set a 0 for each lead byte that has a range boundary in the middle.
    203         byte[] table = new byte[256];
    204         int b = 0;
    205         int firstSplitByteRangeIndex = -1;
    206         for(int i = 0; i < rangesLength; ++i) {
    207             int pair = ranges[i];
    208             int limit1 = pair >>> 24;
    209             while(b < limit1) {
    210                 table[b] = (byte)(b + pair);
    211                 ++b;
    212             }
    213             // Check the second byte of the limit.
    214             if((pair & 0xff0000) != 0) {
    215                 table[limit1] = 0;
    216                 b = limit1 + 1;
    217                 if(firstSplitByteRangeIndex < 0) {
    218                     firstSplitByteRangeIndex = i;
    219                 }
    220             }
    221         }
    222         while(b <= 0xff) {
    223             table[b] = (byte)b;
    224             ++b;
    225         }
    226         int rangesStart;
    227         if(firstSplitByteRangeIndex < 0) {
    228             // The lead byte permutation table alone suffices for reordering.
    229             rangesStart = rangesLength = 0;
    230         } else {
    231             // Remove the ranges below the first split byte.
    232             rangesStart = firstSplitByteRangeIndex;
    233             rangesLength -= firstSplitByteRangeIndex;
    234         }
    235         setReorderArrays(codes, ranges, rangesStart, rangesLength, table);
    236     }
    237 
    238     private void setReorderArrays(int[] codes,
    239             int[] ranges, int rangesStart, int rangesLength, byte[] table) {
    240         // Very different from C++. See the comments after the reorderCodes declaration.
    241         if(codes == null) {
    242             codes = EMPTY_INT_ARRAY;
    243         }
    244         assert (codes.length == 0) == (table == null);
    245         reorderTable = table;
    246         reorderCodes = codes;
    247         setReorderRanges(ranges, rangesStart, rangesLength);
    248     }
    249 
    250     private void setReorderRanges(int[] ranges, int rangesStart, int rangesLength) {
    251         if(rangesLength == 0) {
    252             reorderRanges = null;
    253         } else {
    254             reorderRanges = new long[rangesLength];
    255             int i = 0;
    256             do {
    257                 reorderRanges[i++] = ranges[rangesStart++] & 0xffffffffL;
    258             } while(i < rangesLength);
    259         }
    260     }
    261 
    262     public void copyReorderingFrom(CollationSettings other) {
    263         if(!other.hasReordering()) {
    264             resetReordering();
    265             return;
    266         }
    267         minHighNoReorder = other.minHighNoReorder;
    268         reorderTable = other.reorderTable;
    269         reorderRanges = other.reorderRanges;
    270         reorderCodes = other.reorderCodes;
    271     }
    272 
    273     public boolean hasReordering() { return reorderTable != null; }
    274 
    275     private static boolean reorderTableHasSplitBytes(byte[] table) {
    276         assert(table[0] == 0);
    277         for(int i = 1; i < 256; ++i) {
    278             if(table[i] == 0) {
    279                 return true;
    280             }
    281         }
    282         return false;
    283     }
    284 
    285     public long reorder(long p) {
    286         byte b = reorderTable[(int)p >>> 24];
    287         if(b != 0 || p <= Collation.NO_CE_PRIMARY) {
    288             return ((b & 0xffL) << 24) | (p & 0xffffff);
    289         } else {
    290             return reorderEx(p);
    291         }
    292     }
    293 
    294     private long reorderEx(long p) {
    295         assert minHighNoReorder > 0;
    296         if(p >= minHighNoReorder) { return p; }
    297         // Round up p so that its lower 16 bits are >= any offset bits.
    298         // Then compare q directly with (limit, offset) pairs.
    299         long q = p | 0xffff;
    300         long r;
    301         int i = 0;
    302         while(q >= (r = reorderRanges[i])) { ++i; }
    303         return p + ((long)(short)r << 24);
    304     }
    305 
    306     // In C++, we use enums for attributes and their values, with a special value for the default.
    307     // Combined getter/setter methods handle many attributes.
    308     // In Java, we have specific methods for getting, setting, and set-to-default,
    309     // except that this class uses bits in its own bit set for simple values.
    310 
    311     public void setStrength(int value) {
    312         int noStrength = options & ~STRENGTH_MASK;
    313         switch(value) {
    314         case Collator.PRIMARY:
    315         case Collator.SECONDARY:
    316         case Collator.TERTIARY:
    317         case Collator.QUATERNARY:
    318         case Collator.IDENTICAL:
    319             options = noStrength | (value << STRENGTH_SHIFT);
    320             break;
    321         default:
    322             throw new IllegalArgumentException("illegal strength value " + value);
    323         }
    324     }
    325 
    326     public void setStrengthDefault(int defaultOptions) {
    327         int noStrength = options & ~STRENGTH_MASK;
    328         options = noStrength | (defaultOptions & STRENGTH_MASK);
    329     }
    330 
    331     static int getStrength(int options) {
    332         return options >> STRENGTH_SHIFT;
    333     }
    334 
    335     public int getStrength() {
    336         return getStrength(options);
    337     }
    338 
    339     /** Sets the options bit for an on/off attribute. */
    340     public void setFlag(int bit, boolean value) {
    341         if(value) {
    342             options |= bit;
    343         } else {
    344             options &= ~bit;
    345         }
    346     }
    347 
    348     public void setFlagDefault(int bit, int defaultOptions) {
    349         options = (options & ~bit) | (defaultOptions & bit);
    350     }
    351 
    352     public boolean getFlag(int bit) {
    353         return (options & bit) != 0;
    354     }
    355 
    356     public void setCaseFirst(int value) {
    357         assert value == 0 || value == CASE_FIRST || value == CASE_FIRST_AND_UPPER_MASK;
    358         int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
    359         options = noCaseFirst | value;
    360     }
    361 
    362     public void setCaseFirstDefault(int defaultOptions) {
    363         int noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
    364         options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
    365     }
    366 
    367     public int getCaseFirst() {
    368         return options & CASE_FIRST_AND_UPPER_MASK;
    369     }
    370 
    371     public void setAlternateHandlingShifted(boolean value) {
    372         int noAlternate = options & ~ALTERNATE_MASK;
    373         if(value) {
    374             options = noAlternate | SHIFTED;
    375         } else {
    376             options = noAlternate;
    377         }
    378     }
    379 
    380     public void setAlternateHandlingDefault(int defaultOptions) {
    381         int noAlternate = options & ~ALTERNATE_MASK;
    382         options = noAlternate | (defaultOptions & ALTERNATE_MASK);
    383     }
    384 
    385     public boolean getAlternateHandling() {
    386         return (options & ALTERNATE_MASK) != 0;
    387     }
    388 
    389     public void setMaxVariable(int value, int defaultOptions) {
    390         int noMax = options & ~MAX_VARIABLE_MASK;
    391         switch(value) {
    392         case MAX_VAR_SPACE:
    393         case MAX_VAR_PUNCT:
    394         case MAX_VAR_SYMBOL:
    395         case MAX_VAR_CURRENCY:
    396             options = noMax | (value << MAX_VARIABLE_SHIFT);
    397             break;
    398         case -1:
    399             options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
    400             break;
    401         default:
    402             throw new IllegalArgumentException("illegal maxVariable value " + value);
    403         }
    404     }
    405 
    406     public int getMaxVariable() {
    407         return (options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT;
    408     }
    409 
    410     /**
    411      * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off.
    412      */
    413     static boolean isTertiaryWithCaseBits(int options) {
    414         return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST;
    415     }
    416     static int getTertiaryMask(int options) {
    417         // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
    418         return isTertiaryWithCaseBits(options) ?
    419                 Collation.CASE_AND_TERTIARY_MASK : Collation.ONLY_TERTIARY_MASK;
    420     }
    421 
    422     static boolean sortsTertiaryUpperCaseFirst(int options) {
    423         // On tertiary level, consider case bits and sort uppercase first
    424         // if caseLevel is off and caseFirst==upperFirst.
    425         return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK;
    426     }
    427 
    428     public boolean dontCheckFCD() {
    429         return (options & CHECK_FCD) == 0;
    430     }
    431 
    432     boolean hasBackwardSecondary() {
    433         return (options & BACKWARD_SECONDARY) != 0;
    434     }
    435 
    436     public boolean isNumeric() {
    437         return (options & NUMERIC) != 0;
    438     }
    439 
    440     /** CHECK_FCD etc. */
    441     public int options = (Collator.TERTIARY << STRENGTH_SHIFT) |  // DEFAULT_STRENGTH
    442             (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT);
    443     /** Variable-top primary weight. */
    444     public long variableTop;
    445     /**
    446      * 256-byte table for reordering permutation of primary lead bytes; null if no reordering.
    447      * A 0 entry at a non-zero index means that the primary lead byte is "split"
    448      * (there are different offsets for primaries that share that lead byte)
    449      * and the reordering offset must be determined via the reorderRanges.
    450      */
    451     public byte[] reorderTable;
    452     /** Limit of last reordered range. 0 if no reordering or no split bytes. */
    453     long minHighNoReorder;
    454     /**
    455      * Primary-weight ranges for script reordering,
    456      * to be used by reorder(p) for split-reordered primary lead bytes.
    457      *
    458      * <p>Each entry is a (limit, offset) pair.
    459      * The upper 16 bits of the entry are the upper 16 bits of the
    460      * exclusive primary limit of a range.
    461      * Primaries between the previous limit and this one have their lead bytes
    462      * modified by the signed offset (-0xff..+0xff) stored in the lower 16 bits.
    463      *
    464      * <p>CollationData.makeReorderRanges() writes a full list where the first range
    465      * (at least for terminators and separators) has a 0 offset.
    466      * The last range has a non-zero offset.
    467      * minHighNoReorder is set to the limit of that last range.
    468      *
    469      * <p>In the settings object, the initial ranges before the first split lead byte
    470      * are omitted for efficiency; they are handled by reorder(p) via the reorderTable.
    471      * If there are no split-reordered lead bytes, then no ranges are needed.
    472      */
    473     long[] reorderRanges;
    474     /** Array of reorder codes; ignored if length == 0. */
    475     public int[] reorderCodes = EMPTY_INT_ARRAY;
    476     // Note: In C++, we keep a memory block around for the reorder codes,
    477     // the ranges, and the permutation table,
    478     // and modify them for new codes.
    479     // In Java, we simply copy references and then never modify the array contents.
    480     // The caller must abandon the arrays.
    481     // Reorder codes from the public setter API must be cloned.
    482     private static final int[] EMPTY_INT_ARRAY = new int[0];
    483 
    484     /** Options for CollationFastLatin. Negative if disabled. */
    485     public int fastLatinOptions = -1;
    486     // fastLatinPrimaries.length must be equal to CollationFastLatin.LATIN_LIMIT,
    487     // but we do not import CollationFastLatin to reduce circular dependencies.
    488     public char[] fastLatinPrimaries = new char[0x180];  // mutable contents
    489 }
    490