Home | History | Annotate | Download | only in coll
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 *******************************************************************************
      6 * Copyright (C) 2013-2014, International Business Machines
      7 * Corporation and others.  All Rights Reserved.
      8 *******************************************************************************
      9 * CollationRootElements.java, ported from collationrootelements.h/.cpp
     10 *
     11 * C++ version created on: 2013mar01
     12 * created by: Markus W. Scherer
     13 */
     14 
     15 package android.icu.impl.coll;
     16 
     17 /**
     18  * Container and access methods for collation elements and weights
     19  * that occur in the root collator.
     20  * Needed for finding boundaries for building a tailoring.
     21  *
     22  * This class takes and returns 16-bit secondary and tertiary weights.
     23  * @hide Only a subset of ICU is exposed in Android
     24  */
     25 public final class CollationRootElements {
     26     public CollationRootElements(long[] rootElements) {
     27         elements = rootElements;
     28     }
     29 
     30     /**
     31      * Higher than any root primary.
     32      */
     33     public static final long PRIMARY_SENTINEL = 0xffffff00L;
     34 
     35     /**
     36      * Flag in a root element, set if the element contains secondary & tertiary weights,
     37      * rather than a primary.
     38      */
     39     public static final int SEC_TER_DELTA_FLAG = 0x80;
     40     /**
     41      * Mask for getting the primary range step value from a primary-range-end element.
     42      */
     43     public static final int PRIMARY_STEP_MASK = 0x7f;
     44 
     45     /**
     46      * Index of the first CE with a non-zero tertiary weight.
     47      * Same as the start of the compact root elements table.
     48      */
     49     public static final int IX_FIRST_TERTIARY_INDEX = 0;
     50     /**
     51      * Index of the first CE with a non-zero secondary weight.
     52      */
     53     static final int IX_FIRST_SECONDARY_INDEX = 1;
     54     /**
     55      * Index of the first CE with a non-zero primary weight.
     56      */
     57     static final int IX_FIRST_PRIMARY_INDEX = 2;
     58     /**
     59      * Must match Collation.COMMON_SEC_AND_TER_CE.
     60      */
     61     static final int IX_COMMON_SEC_AND_TER_CE = 3;
     62     /**
     63      * Secondary & tertiary boundaries.
     64      * Bits 31..24: [fixed last secondary common byte 45]
     65      * Bits 23..16: [fixed first ignorable secondary byte 80]
     66      * Bits 15.. 8: reserved, 0
     67      * Bits  7.. 0: [fixed first ignorable tertiary byte 3C]
     68      */
     69     static final int IX_SEC_TER_BOUNDARIES = 4;
     70     /**
     71      * The current number of indexes.
     72      * Currently the same as elements[IX_FIRST_TERTIARY_INDEX].
     73      */
     74     static final int IX_COUNT = 5;
     75 
     76     /**
     77      * Returns the boundary between tertiary weights of primary/secondary CEs
     78      * and those of tertiary CEs.
     79      * This is the upper limit for tertiaries of primary/secondary CEs.
     80      * This minus one is the lower limit for tertiaries of tertiary CEs.
     81      */
     82     public int getTertiaryBoundary() {
     83         return ((int)elements[IX_SEC_TER_BOUNDARIES] << 8) & 0xff00;
     84     }
     85 
     86     /**
     87      * Returns the first assigned tertiary CE.
     88      */
     89     long getFirstTertiaryCE() {
     90         return elements[(int)elements[IX_FIRST_TERTIARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
     91     }
     92 
     93     /**
     94      * Returns the last assigned tertiary CE.
     95      */
     96     long getLastTertiaryCE() {
     97         return elements[(int)elements[IX_FIRST_SECONDARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
     98     }
     99 
    100     /**
    101      * Returns the last common secondary weight.
    102      * This is the lower limit for secondaries of primary CEs.
    103      */
    104     public int getLastCommonSecondary() {
    105         return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 16) & 0xff00;
    106     }
    107 
    108     /**
    109      * Returns the boundary between secondary weights of primary CEs
    110      * and those of secondary CEs.
    111      * This is the upper limit for secondaries of primary CEs.
    112      * This minus one is the lower limit for secondaries of secondary CEs.
    113      */
    114     public int getSecondaryBoundary() {
    115         return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 8) & 0xff00;
    116     }
    117 
    118     /**
    119      * Returns the first assigned secondary CE.
    120      */
    121     long getFirstSecondaryCE() {
    122         return elements[(int)elements[IX_FIRST_SECONDARY_INDEX]] & ~SEC_TER_DELTA_FLAG;
    123     }
    124 
    125     /**
    126      * Returns the last assigned secondary CE.
    127      */
    128     long getLastSecondaryCE() {
    129         return elements[(int)elements[IX_FIRST_PRIMARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG;
    130     }
    131 
    132     /**
    133      * Returns the first assigned primary weight.
    134      */
    135     long getFirstPrimary() {
    136         return elements[(int)elements[IX_FIRST_PRIMARY_INDEX]];  // step=0: cannot be a range end
    137     }
    138 
    139     /**
    140      * Returns the first assigned primary CE.
    141      */
    142     long getFirstPrimaryCE() {
    143         return Collation.makeCE(getFirstPrimary());
    144     }
    145 
    146     /**
    147      * Returns the last root CE with a primary weight before p.
    148      * Intended only for reordering group boundaries.
    149      */
    150     long lastCEWithPrimaryBefore(long p) {
    151         if(p == 0) { return 0; }
    152         assert(p > elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]);
    153         int index = findP(p);
    154         long q = elements[index];
    155         long secTer;
    156         if(p == (q & 0xffffff00L)) {
    157             // p == elements[index] is a root primary. Find the CE before it.
    158             // We must not be in a primary range.
    159             assert((q & PRIMARY_STEP_MASK) == 0);
    160             secTer = elements[index - 1];
    161             if((secTer & SEC_TER_DELTA_FLAG) == 0) {
    162                 // Primary CE just before p.
    163                 p = secTer & 0xffffff00L;
    164                 secTer = Collation.COMMON_SEC_AND_TER_CE;
    165             } else {
    166                 // secTer = last secondary & tertiary for the previous primary
    167                 index -= 2;
    168                 for(;;) {
    169                     p = elements[index];
    170                     if((p & SEC_TER_DELTA_FLAG) == 0) {
    171                         p &= 0xffffff00L;
    172                         break;
    173                     }
    174                     --index;
    175                 }
    176             }
    177         } else {
    178             // p > elements[index] which is the previous primary.
    179             // Find the last secondary & tertiary weights for it.
    180             p = q & 0xffffff00L;
    181             secTer = Collation.COMMON_SEC_AND_TER_CE;
    182             for(;;) {
    183                 q = elements[++index];
    184                 if((q & SEC_TER_DELTA_FLAG) == 0) {
    185                     // We must not be in a primary range.
    186                     assert((q & PRIMARY_STEP_MASK) == 0);
    187                     break;
    188                 }
    189                 secTer = q;
    190             }
    191         }
    192         return (p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
    193     }
    194 
    195     /**
    196      * Returns the first root CE with a primary weight of at least p.
    197      * Intended only for reordering group boundaries.
    198      */
    199     long firstCEWithPrimaryAtLeast(long p) {
    200         if(p == 0) { return 0; }
    201         int index = findP(p);
    202         if(p != (elements[index] & 0xffffff00L)) {
    203             for(;;) {
    204                 p = elements[++index];
    205                 if((p & SEC_TER_DELTA_FLAG) == 0) {
    206                     // First primary after p. We must not be in a primary range.
    207                     assert((p & PRIMARY_STEP_MASK) == 0);
    208                     break;
    209                 }
    210             }
    211         }
    212         // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
    213         return (p << 32) | Collation.COMMON_SEC_AND_TER_CE;
    214     }
    215 
    216     /**
    217      * Returns the primary weight before p.
    218      * p must be greater than the first root primary.
    219      */
    220     long getPrimaryBefore(long p, boolean isCompressible) {
    221         int index = findPrimary(p);
    222         int step;
    223         long q = elements[index];
    224         if(p == (q & 0xffffff00L)) {
    225             // Found p itself. Return the previous primary.
    226             // See if p is at the end of a previous range.
    227             step = (int)q & PRIMARY_STEP_MASK;
    228             if(step == 0) {
    229                 // p is not at the end of a range. Look for the previous primary.
    230                 do {
    231                     p = elements[--index];
    232                 } while((p & SEC_TER_DELTA_FLAG) != 0);
    233                 return p & 0xffffff00L;
    234             }
    235         } else {
    236             // p is in a range, and not at the start.
    237             long nextElement = elements[index + 1];
    238             assert(isEndOfPrimaryRange(nextElement));
    239             step = (int)nextElement & PRIMARY_STEP_MASK;
    240         }
    241         // Return the previous range primary.
    242         if((p & 0xffff) == 0) {
    243             return Collation.decTwoBytePrimaryByOneStep(p, isCompressible, step);
    244         } else {
    245             return Collation.decThreeBytePrimaryByOneStep(p, isCompressible, step);
    246         }
    247     }
    248 
    249     /** Returns the secondary weight before [p, s]. */
    250     int getSecondaryBefore(long p, int s) {
    251         int index;
    252         int previousSec, sec;
    253         if(p == 0) {
    254             index = (int)elements[IX_FIRST_SECONDARY_INDEX];
    255             // Gap at the beginning of the secondary CE range.
    256             previousSec = 0;
    257             sec = (int)(elements[index] >> 16);
    258         } else {
    259             index = findPrimary(p) + 1;
    260             previousSec = Collation.BEFORE_WEIGHT16;
    261             sec = (int)getFirstSecTerForPrimary(index) >>> 16;
    262         }
    263         assert(s >= sec);
    264         while(s > sec) {
    265             previousSec = sec;
    266             assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
    267             sec = (int)(elements[index++] >> 16);
    268         }
    269         assert(sec == s);
    270         return previousSec;
    271     }
    272 
    273     /** Returns the tertiary weight before [p, s, t]. */
    274     int getTertiaryBefore(long p, int s, int t) {
    275         assert((t & ~Collation.ONLY_TERTIARY_MASK) == 0);
    276         int index;
    277         int previousTer;
    278         long secTer;
    279         if(p == 0) {
    280             if(s == 0) {
    281                 index = (int)elements[IX_FIRST_TERTIARY_INDEX];
    282                 // Gap at the beginning of the tertiary CE range.
    283                 previousTer = 0;
    284             } else {
    285                 index = (int)elements[IX_FIRST_SECONDARY_INDEX];
    286                 previousTer = Collation.BEFORE_WEIGHT16;
    287             }
    288             secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
    289         } else {
    290             index = findPrimary(p) + 1;
    291             previousTer = Collation.BEFORE_WEIGHT16;
    292             secTer = getFirstSecTerForPrimary(index);
    293         }
    294         long st = ((long)s << 16) | t;
    295         while(st > secTer) {
    296             if((int)(secTer >> 16) == s) { previousTer = (int)secTer; }
    297             assert((elements[index] & SEC_TER_DELTA_FLAG) != 0);
    298             secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
    299         }
    300         assert(secTer == st);
    301         return previousTer & 0xffff;
    302     }
    303 
    304     /**
    305      * Finds the index of the input primary.
    306      * p must occur as a root primary, and must not be 0.
    307      */
    308     int findPrimary(long p) {
    309         // Requirement: p must occur as a root primary.
    310         assert((p & 0xff) == 0);  // at most a 3-byte primary
    311         int index = findP(p);
    312         // If p is in a range, then we just assume that p is an actual primary in this range.
    313         // (Too cumbersome/expensive to check.)
    314         // Otherwise, it must be an exact match.
    315         assert(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00L));
    316         return index;
    317     }
    318 
    319     /**
    320      * Returns the primary weight after p where index=findPrimary(p).
    321      * p must be at least the first root primary.
    322      */
    323     long getPrimaryAfter(long p, int index, boolean isCompressible) {
    324         assert(p == (elements[index] & 0xffffff00L) || isEndOfPrimaryRange(elements[index + 1]));
    325         long q = elements[++index];
    326         int step;
    327         if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int)q & PRIMARY_STEP_MASK) != 0) {
    328             // Return the next primary in this range.
    329             if((p & 0xffff) == 0) {
    330                 return Collation.incTwoBytePrimaryByOffset(p, isCompressible, step);
    331             } else {
    332                 return Collation.incThreeBytePrimaryByOffset(p, isCompressible, step);
    333             }
    334         } else {
    335             // Return the next primary in the list.
    336             while((q & SEC_TER_DELTA_FLAG) != 0) {
    337                 q = elements[++index];
    338             }
    339             assert((q & PRIMARY_STEP_MASK) == 0);
    340             return q;
    341         }
    342     }
    343     /**
    344      * Returns the secondary weight after [p, s] where index=findPrimary(p)
    345      * except use index=0 for p=0.
    346      *
    347      * <p>Must return a weight for every root [p, s] as well as for every weight
    348      * returned by getSecondaryBefore(). If p!=0 then s can be BEFORE_WEIGHT16.
    349      *
    350      * <p>Exception: [0, 0] is handled by the CollationBuilder:
    351      * Both its lower and upper boundaries are special.
    352      */
    353     int getSecondaryAfter(int index, int s) {
    354         long secTer;
    355         int secLimit;
    356         if(index == 0) {
    357             // primary = 0
    358             assert(s != 0);
    359             index = (int)elements[IX_FIRST_SECONDARY_INDEX];
    360             secTer = elements[index];
    361             // Gap at the end of the secondary CE range.
    362             secLimit = 0x10000;
    363         } else {
    364             assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
    365             secTer = getFirstSecTerForPrimary(index + 1);
    366             // If this is an explicit sec/ter unit, then it will be read once more.
    367             // Gap for secondaries of primary CEs.
    368             secLimit = getSecondaryBoundary();
    369         }
    370         for(;;) {
    371             int sec = (int)(secTer >> 16);
    372             if(sec > s) { return sec; }
    373             secTer = elements[++index];
    374             if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
    375         }
    376     }
    377     /**
    378      * Returns the tertiary weight after [p, s, t] where index=findPrimary(p)
    379      * except use index=0 for p=0.
    380      *
    381      * <p>Must return a weight for every root [p, s, t] as well as for every weight
    382      * returned by getTertiaryBefore(). If s!=0 then t can be BEFORE_WEIGHT16.
    383      *
    384      * <p>Exception: [0, 0, 0] is handled by the CollationBuilder:
    385      * Both its lower and upper boundaries are special.
    386      */
    387     int getTertiaryAfter(int index, int s, int t) {
    388         long secTer;
    389         int terLimit;
    390         if(index == 0) {
    391             // primary = 0
    392             if(s == 0) {
    393                 assert(t != 0);
    394                 index = (int)elements[IX_FIRST_TERTIARY_INDEX];
    395                 // Gap at the end of the tertiary CE range.
    396                 terLimit = 0x4000;
    397             } else {
    398                 index = (int)elements[IX_FIRST_SECONDARY_INDEX];
    399                 // Gap for tertiaries of primary/secondary CEs.
    400                 terLimit = getTertiaryBoundary();
    401             }
    402             secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
    403         } else {
    404             assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]);
    405             secTer = getFirstSecTerForPrimary(index + 1);
    406             // If this is an explicit sec/ter unit, then it will be read once more.
    407             terLimit = getTertiaryBoundary();
    408         }
    409         long st = (((long)s & 0xffffffffL) << 16) | t;
    410         for(;;) {
    411             if(secTer > st) {
    412                 assert((secTer >> 16) == s);
    413                 return (int)secTer & 0xffff;
    414             }
    415             secTer = elements[++index];
    416             // No tertiary greater than t for this primary+secondary.
    417             if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
    418             secTer &= ~SEC_TER_DELTA_FLAG;
    419         }
    420     }
    421 
    422     /**
    423      * Returns the first secondary & tertiary weights for p where index=findPrimary(p)+1.
    424      */
    425     private long getFirstSecTerForPrimary(int index) {
    426         long secTer = elements[index];
    427         if((secTer & SEC_TER_DELTA_FLAG) == 0) {
    428             // No sec/ter delta.
    429             return Collation.COMMON_SEC_AND_TER_CE;
    430         }
    431         secTer &= ~SEC_TER_DELTA_FLAG;
    432         if(secTer > Collation.COMMON_SEC_AND_TER_CE) {
    433             // Implied sec/ter.
    434             return Collation.COMMON_SEC_AND_TER_CE;
    435         }
    436         // Explicit sec/ter below common/common.
    437         return secTer;
    438     }
    439 
    440     /**
    441      * Finds the largest index i where elements[i]<=p.
    442      * Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL).
    443      * Does not require that p is a root collator primary.
    444      */
    445     private int findP(long p) {
    446         // p need not occur as a root primary.
    447         // For example, it might be a reordering group boundary.
    448         assert((p >> 24) != Collation.UNASSIGNED_IMPLICIT_BYTE);
    449         // modified binary search
    450         int start = (int)elements[IX_FIRST_PRIMARY_INDEX];
    451         assert(p >= elements[start]);
    452         int limit = elements.length - 1;
    453         assert(elements[limit] >= PRIMARY_SENTINEL);
    454         assert(p < elements[limit]);
    455         while((start + 1) < limit) {
    456             // Invariant: elements[start] and elements[limit] are primaries,
    457             // and elements[start]<=p<=elements[limit].
    458             int i = (int)(((long)start + (long)limit) / 2);
    459             long q = elements[i];
    460             if((q & SEC_TER_DELTA_FLAG) != 0) {
    461                 // Find the next primary.
    462                 int j = i + 1;
    463                 for(;;) {
    464                     if(j == limit) { break; }
    465                     q = elements[j];
    466                     if((q & SEC_TER_DELTA_FLAG) == 0) {
    467                         i = j;
    468                         break;
    469                     }
    470                     ++j;
    471                 }
    472                 if((q & SEC_TER_DELTA_FLAG) != 0) {
    473                     // Find the preceding primary.
    474                     j = i - 1;
    475                     for(;;) {
    476                         if(j == start) { break; }
    477                         q = elements[j];
    478                         if((q & SEC_TER_DELTA_FLAG) == 0) {
    479                             i = j;
    480                             break;
    481                         }
    482                         --j;
    483                     }
    484                     if((q & SEC_TER_DELTA_FLAG) != 0) {
    485                         // No primary between start and limit.
    486                         break;
    487                     }
    488                 }
    489             }
    490             if(p < (q & 0xffffff00L)) {  // Reset the "step" bits of a range end primary.
    491                 limit = i;
    492             } else {
    493                 start = i;
    494             }
    495         }
    496         return start;
    497     }
    498 
    499     private static boolean isEndOfPrimaryRange(long q) {
    500         return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0;
    501     }
    502 
    503     /**
    504      * Data structure: See ICU4C source/i18n/collationrootelements.h.
    505      */
    506     private long[] elements;
    507 }
    508