Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2013-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * collationrootelements.cpp
      7 *
      8 * created on: 2013mar05
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION
     15 
     16 #include "collation.h"
     17 #include "collationrootelements.h"
     18 #include "uassert.h"
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 int64_t
     23 CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
     24     if(p == 0) { return 0; }
     25     U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
     26     int32_t index = findP(p);
     27     uint32_t q = elements[index];
     28     uint32_t secTer;
     29     if(p == (q & 0xffffff00)) {
     30         // p == elements[index] is a root primary. Find the CE before it.
     31         // We must not be in a primary range.
     32         U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
     33         secTer = elements[index - 1];
     34         if((secTer & SEC_TER_DELTA_FLAG) == 0) {
     35             // Primary CE just before p.
     36             p = secTer & 0xffffff00;
     37             secTer = Collation::COMMON_SEC_AND_TER_CE;
     38         } else {
     39             // secTer = last secondary & tertiary for the previous primary
     40             index -= 2;
     41             for(;;) {
     42                 p = elements[index];
     43                 if((p & SEC_TER_DELTA_FLAG) == 0) {
     44                     p &= 0xffffff00;
     45                     break;
     46                 }
     47                 --index;
     48             }
     49         }
     50     } else {
     51         // p > elements[index] which is the previous primary.
     52         // Find the last secondary & tertiary weights for it.
     53         p = q & 0xffffff00;
     54         secTer = Collation::COMMON_SEC_AND_TER_CE;
     55         for(;;) {
     56             q = elements[++index];
     57             if((q & SEC_TER_DELTA_FLAG) == 0) {
     58                 // We must not be in a primary range.
     59                 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
     60                 break;
     61             }
     62             secTer = q;
     63         }
     64     }
     65     return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
     66 }
     67 
     68 int64_t
     69 CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
     70     if(p == 0) { return 0; }
     71     int32_t index = findP(p);
     72     if(p != (elements[index] & 0xffffff00)) {
     73         for(;;) {
     74             p = elements[++index];
     75             if((p & SEC_TER_DELTA_FLAG) == 0) {
     76                 // First primary after p. We must not be in a primary range.
     77                 U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
     78                 break;
     79             }
     80         }
     81     }
     82     // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
     83     return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE;
     84 }
     85 
     86 uint32_t
     87 CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
     88     int32_t index = findPrimary(p);
     89     int32_t step;
     90     uint32_t q = elements[index];
     91     if(p == (q & 0xffffff00)) {
     92         // Found p itself. Return the previous primary.
     93         // See if p is at the end of a previous range.
     94         step = (int32_t)q & PRIMARY_STEP_MASK;
     95         if(step == 0) {
     96             // p is not at the end of a range. Look for the previous primary.
     97             do {
     98                 p = elements[--index];
     99             } while((p & SEC_TER_DELTA_FLAG) != 0);
    100             return p & 0xffffff00;
    101         }
    102     } else {
    103         // p is in a range, and not at the start.
    104         uint32_t nextElement = elements[index + 1];
    105         U_ASSERT(isEndOfPrimaryRange(nextElement));
    106         step = (int32_t)nextElement & PRIMARY_STEP_MASK;
    107     }
    108     // Return the previous range primary.
    109     if((p & 0xffff) == 0) {
    110         return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
    111     } else {
    112         return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
    113     }
    114 }
    115 
    116 uint32_t
    117 CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
    118     int32_t index;
    119     uint32_t previousSec, sec;
    120     if(p == 0) {
    121         index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
    122         // Gap at the beginning of the secondary CE range.
    123         previousSec = 0;
    124         sec = elements[index] >> 16;
    125     } else {
    126         index = findPrimary(p) + 1;
    127         previousSec = Collation::BEFORE_WEIGHT16;
    128         sec = getFirstSecTerForPrimary(index) >> 16;
    129     }
    130     U_ASSERT(s >= sec);
    131     while(s > sec) {
    132         previousSec = sec;
    133         U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
    134         sec = elements[index++] >> 16;
    135     }
    136     U_ASSERT(sec == s);
    137     return previousSec;
    138 }
    139 
    140 uint32_t
    141 CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
    142     U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
    143     int32_t index;
    144     uint32_t previousTer, secTer;
    145     if(p == 0) {
    146         if(s == 0) {
    147             index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
    148             // Gap at the beginning of the tertiary CE range.
    149             previousTer = 0;
    150         } else {
    151             index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
    152             previousTer = Collation::BEFORE_WEIGHT16;
    153         }
    154         secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
    155     } else {
    156         index = findPrimary(p) + 1;
    157         previousTer = Collation::BEFORE_WEIGHT16;
    158         secTer = getFirstSecTerForPrimary(index);
    159     }
    160     uint32_t st = (s << 16) | t;
    161     while(st > secTer) {
    162         if((secTer >> 16) == s) { previousTer = secTer; }
    163         U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
    164         secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
    165     }
    166     U_ASSERT(secTer == st);
    167     return previousTer & 0xffff;
    168 }
    169 
    170 uint32_t
    171 CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
    172     U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
    173     uint32_t q = elements[++index];
    174     int32_t step;
    175     if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) {
    176         // Return the next primary in this range.
    177         if((p & 0xffff) == 0) {
    178             return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
    179         } else {
    180             return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
    181         }
    182     } else {
    183         // Return the next primary in the list.
    184         while((q & SEC_TER_DELTA_FLAG) != 0) {
    185             q = elements[++index];
    186         }
    187         U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
    188         return q;
    189     }
    190 }
    191 
    192 uint32_t
    193 CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
    194     uint32_t secTer;
    195     uint32_t secLimit;
    196     if(index == 0) {
    197         // primary = 0
    198         U_ASSERT(s != 0);
    199         index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
    200         secTer = elements[index];
    201         // Gap at the end of the secondary CE range.
    202         secLimit = 0x10000;
    203     } else {
    204         U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
    205         secTer = getFirstSecTerForPrimary(index + 1);
    206         // If this is an explicit sec/ter unit, then it will be read once more.
    207         // Gap for secondaries of primary CEs.
    208         secLimit = getSecondaryBoundary();
    209     }
    210     for(;;) {
    211         uint32_t sec = secTer >> 16;
    212         if(sec > s) { return sec; }
    213         secTer = elements[++index];
    214         if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
    215     }
    216 }
    217 
    218 uint32_t
    219 CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
    220     uint32_t secTer;
    221     uint32_t terLimit;
    222     if(index == 0) {
    223         // primary = 0
    224         if(s == 0) {
    225             U_ASSERT(t != 0);
    226             index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
    227             // Gap at the end of the tertiary CE range.
    228             terLimit = 0x4000;
    229         } else {
    230             index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
    231             // Gap for tertiaries of primary/secondary CEs.
    232             terLimit = getTertiaryBoundary();
    233         }
    234         secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
    235     } else {
    236         U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
    237         secTer = getFirstSecTerForPrimary(index + 1);
    238         // If this is an explicit sec/ter unit, then it will be read once more.
    239         terLimit = getTertiaryBoundary();
    240     }
    241     uint32_t st = (s << 16) | t;
    242     for(;;) {
    243         if(secTer > st) {
    244             U_ASSERT((secTer >> 16) == s);
    245             return secTer & 0xffff;
    246         }
    247         secTer = elements[++index];
    248         // No tertiary greater than t for this primary+secondary.
    249         if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
    250         secTer &= ~SEC_TER_DELTA_FLAG;
    251     }
    252 }
    253 
    254 uint32_t
    255 CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
    256     uint32_t secTer = elements[index];
    257     if((secTer & SEC_TER_DELTA_FLAG) == 0) {
    258         // No sec/ter delta.
    259         return Collation::COMMON_SEC_AND_TER_CE;
    260     }
    261     secTer &= ~SEC_TER_DELTA_FLAG;
    262     if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
    263         // Implied sec/ter.
    264         return Collation::COMMON_SEC_AND_TER_CE;
    265     }
    266     // Explicit sec/ter below common/common.
    267     return secTer;
    268 }
    269 
    270 int32_t
    271 CollationRootElements::findPrimary(uint32_t p) const {
    272     // Requirement: p must occur as a root primary.
    273     U_ASSERT((p & 0xff) == 0);  // at most a 3-byte primary
    274     int32_t index = findP(p);
    275     // If p is in a range, then we just assume that p is an actual primary in this range.
    276     // (Too cumbersome/expensive to check.)
    277     // Otherwise, it must be an exact match.
    278     U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
    279     return index;
    280 }
    281 
    282 int32_t
    283 CollationRootElements::findP(uint32_t p) const {
    284     // p need not occur as a root primary.
    285     // For example, it might be a reordering group boundary.
    286     U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
    287     // modified binary search
    288     int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX];
    289     U_ASSERT(p >= elements[start]);
    290     int32_t limit = length - 1;
    291     U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
    292     U_ASSERT(p < elements[limit]);
    293     while((start + 1) < limit) {
    294         // Invariant: elements[start] and elements[limit] are primaries,
    295         // and elements[start]<=p<=elements[limit].
    296         int32_t i = (start + limit) / 2;
    297         uint32_t q = elements[i];
    298         if((q & SEC_TER_DELTA_FLAG) != 0) {
    299             // Find the next primary.
    300             int32_t j = i + 1;
    301             for(;;) {
    302                 if(j == limit) { break; }
    303                 q = elements[j];
    304                 if((q & SEC_TER_DELTA_FLAG) == 0) {
    305                     i = j;
    306                     break;
    307                 }
    308                 ++j;
    309             }
    310             if((q & SEC_TER_DELTA_FLAG) != 0) {
    311                 // Find the preceding primary.
    312                 j = i - 1;
    313                 for(;;) {
    314                     if(j == start) { break; }
    315                     q = elements[j];
    316                     if((q & SEC_TER_DELTA_FLAG) == 0) {
    317                         i = j;
    318                         break;
    319                     }
    320                     --j;
    321                 }
    322                 if((q & SEC_TER_DELTA_FLAG) != 0) {
    323                     // No primary between start and limit.
    324                     break;
    325                 }
    326             }
    327         }
    328         if(p < (q & 0xffffff00)) {  // Reset the "step" bits of a range end primary.
    329             limit = i;
    330         } else {
    331             start = i;
    332         }
    333     }
    334     return start;
    335 }
    336 
    337 U_NAMESPACE_END
    338 
    339 #endif  // !UCONFIG_NO_COLLATION
    340