Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2013-2015, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * collationsettings.cpp
      7 *
      8 * created on: 2013feb07
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION
     15 
     16 #include "unicode/ucol.h"
     17 #include "cmemory.h"
     18 #include "collation.h"
     19 #include "collationdata.h"
     20 #include "collationsettings.h"
     21 #include "sharedobject.h"
     22 #include "uassert.h"
     23 #include "umutex.h"
     24 #include "uvectr32.h"
     25 
     26 U_NAMESPACE_BEGIN
     27 
     28 CollationSettings::CollationSettings(const CollationSettings &other)
     29         : SharedObject(other),
     30           options(other.options), variableTop(other.variableTop),
     31           reorderTable(NULL),
     32           minHighNoReorder(other.minHighNoReorder),
     33           reorderRanges(NULL), reorderRangesLength(0),
     34           reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
     35           fastLatinOptions(other.fastLatinOptions) {
     36     UErrorCode errorCode = U_ZERO_ERROR;
     37     copyReorderingFrom(other, errorCode);
     38     if(fastLatinOptions >= 0) {
     39         uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
     40     }
     41 }
     42 
     43 CollationSettings::~CollationSettings() {
     44     if(reorderCodesCapacity != 0) {
     45         uprv_free(const_cast<int32_t *>(reorderCodes));
     46     }
     47 }
     48 
     49 UBool
     50 CollationSettings::operator==(const CollationSettings &other) const {
     51     if(options != other.options) { return FALSE; }
     52     if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
     53     if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
     54     for(int32_t i = 0; i < reorderCodesLength; ++i) {
     55         if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
     56     }
     57     return TRUE;
     58 }
     59 
     60 int32_t
     61 CollationSettings::hashCode() const {
     62     int32_t h = options << 8;
     63     if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
     64     h ^= reorderCodesLength;
     65     for(int32_t i = 0; i < reorderCodesLength; ++i) {
     66         h ^= (reorderCodes[i] << i);
     67     }
     68     return h;
     69 }
     70 
     71 void
     72 CollationSettings::resetReordering() {
     73     // When we turn off reordering, we want to set a NULL permutation
     74     // rather than a no-op permutation.
     75     // Keep the memory via reorderCodes and its capacity.
     76     reorderTable = NULL;
     77     minHighNoReorder = 0;
     78     reorderRangesLength = 0;
     79     reorderCodesLength = 0;
     80 }
     81 
     82 void
     83 CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
     84                                    const uint32_t *ranges, int32_t rangesLength,
     85                                    const uint8_t *table, UErrorCode &errorCode) {
     86     if(U_FAILURE(errorCode)) { return; }
     87     if(table != NULL &&
     88             (rangesLength == 0 ?
     89                     !reorderTableHasSplitBytes(table) :
     90                     rangesLength >= 2 &&
     91                     // The first offset must be 0. The last offset must not be 0.
     92                     (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
     93         // We need to release the memory before setting the alias pointer.
     94         if(reorderCodesCapacity != 0) {
     95             uprv_free(const_cast<int32_t *>(reorderCodes));
     96             reorderCodesCapacity = 0;
     97         }
     98         reorderTable = table;
     99         reorderCodes = codes;
    100         reorderCodesLength = length;
    101         // Drop ranges before the first split byte. They are reordered by the table.
    102         // This then speeds up reordering of the remaining ranges.
    103         int32_t firstSplitByteRangeIndex = 0;
    104         while(firstSplitByteRangeIndex < rangesLength &&
    105                 (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
    106             // The second byte of the primary limit is 0.
    107             ++firstSplitByteRangeIndex;
    108         }
    109         if(firstSplitByteRangeIndex == rangesLength) {
    110             U_ASSERT(!reorderTableHasSplitBytes(table));
    111             minHighNoReorder = 0;
    112             reorderRanges = NULL;
    113             reorderRangesLength = 0;
    114         } else {
    115             U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
    116             minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
    117             reorderRanges = ranges + firstSplitByteRangeIndex;
    118             reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
    119         }
    120         return;
    121     }
    122     // Regenerate missing data.
    123     setReordering(data, codes, length, errorCode);
    124 }
    125 
    126 void
    127 CollationSettings::setReordering(const CollationData &data,
    128                                  const int32_t *codes, int32_t codesLength,
    129                                  UErrorCode &errorCode) {
    130     if(U_FAILURE(errorCode)) { return; }
    131     if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
    132         resetReordering();
    133         return;
    134     }
    135     UVector32 rangesList(errorCode);
    136     data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
    137     if(U_FAILURE(errorCode)) { return; }
    138     int32_t rangesLength = rangesList.size();
    139     if(rangesLength == 0) {
    140         resetReordering();
    141         return;
    142     }
    143     const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
    144     // ranges[] contains at least two (limit, offset) pairs.
    145     // The first offset must be 0. The last offset must not be 0.
    146     // Separators (at the low end) and trailing weights (at the high end)
    147     // are never reordered.
    148     U_ASSERT(rangesLength >= 2);
    149     U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
    150     minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
    151 
    152     // Write the lead byte permutation table.
    153     // Set a 0 for each lead byte that has a range boundary in the middle.
    154     uint8_t table[256];
    155     int32_t b = 0;
    156     int32_t firstSplitByteRangeIndex = -1;
    157     for(int32_t i = 0; i < rangesLength; ++i) {
    158         uint32_t pair = ranges[i];
    159         int32_t limit1 = (int32_t)(pair >> 24);
    160         while(b < limit1) {
    161             table[b] = (uint8_t)(b + pair);
    162             ++b;
    163         }
    164         // Check the second byte of the limit.
    165         if((pair & 0xff0000) != 0) {
    166             table[limit1] = 0;
    167             b = limit1 + 1;
    168             if(firstSplitByteRangeIndex < 0) {
    169                 firstSplitByteRangeIndex = i;
    170             }
    171         }
    172     }
    173     while(b <= 0xff) {
    174         table[b] = (uint8_t)b;
    175         ++b;
    176     }
    177     if(firstSplitByteRangeIndex < 0) {
    178         // The lead byte permutation table alone suffices for reordering.
    179         rangesLength = 0;
    180     } else {
    181         // Remove the ranges below the first split byte.
    182         ranges += firstSplitByteRangeIndex;
    183         rangesLength -= firstSplitByteRangeIndex;
    184     }
    185     setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
    186 }
    187 
    188 void
    189 CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
    190                                     const uint32_t *ranges, int32_t rangesLength,
    191                                     const uint8_t *table, UErrorCode &errorCode) {
    192     if(U_FAILURE(errorCode)) { return; }
    193     int32_t *ownedCodes;
    194     int32_t totalLength = codesLength + rangesLength;
    195     U_ASSERT(totalLength > 0);
    196     if(totalLength <= reorderCodesCapacity) {
    197         ownedCodes = const_cast<int32_t *>(reorderCodes);
    198     } else {
    199         // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
    200         int32_t capacity = (totalLength + 3) & ~3;  // round up to a multiple of 4 ints
    201         ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
    202         if(ownedCodes == NULL) {
    203             resetReordering();
    204             errorCode = U_MEMORY_ALLOCATION_ERROR;
    205             return;
    206         }
    207         if(reorderCodesCapacity != 0) {
    208             uprv_free(const_cast<int32_t *>(reorderCodes));
    209         }
    210         reorderCodes = ownedCodes;
    211         reorderCodesCapacity = capacity;
    212     }
    213     uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
    214     uprv_memcpy(ownedCodes, codes, codesLength * 4);
    215     uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
    216     reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
    217     reorderCodesLength = codesLength;
    218     reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
    219     reorderRangesLength = rangesLength;
    220 }
    221 
    222 void
    223 CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
    224     if(U_FAILURE(errorCode)) { return; }
    225     if(!other.hasReordering()) {
    226         resetReordering();
    227         return;
    228     }
    229     minHighNoReorder = other.minHighNoReorder;
    230     if(other.reorderCodesCapacity == 0) {
    231         // The reorder arrays are aliased to memory-mapped data.
    232         reorderTable = other.reorderTable;
    233         reorderRanges = other.reorderRanges;
    234         reorderRangesLength = other.reorderRangesLength;
    235         reorderCodes = other.reorderCodes;
    236         reorderCodesLength = other.reorderCodesLength;
    237     } else {
    238         setReorderArrays(other.reorderCodes, other.reorderCodesLength,
    239                          other.reorderRanges, other.reorderRangesLength,
    240                          other.reorderTable, errorCode);
    241     }
    242 }
    243 
    244 UBool
    245 CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
    246     U_ASSERT(table[0] == 0);
    247     for(int32_t i = 1; i < 256; ++i) {
    248         if(table[i] == 0) {
    249             return TRUE;
    250         }
    251     }
    252     return FALSE;
    253 }
    254 
    255 uint32_t
    256 CollationSettings::reorderEx(uint32_t p) const {
    257     if(p >= minHighNoReorder) { return p; }
    258     // Round up p so that its lower 16 bits are >= any offset bits.
    259     // Then compare q directly with (limit, offset) pairs.
    260     uint32_t q = p | 0xffff;
    261     uint32_t r;
    262     const uint32_t *ranges = reorderRanges;
    263     while(q >= (r = *ranges)) { ++ranges; }
    264     return p + (r << 24);
    265 }
    266 
    267 void
    268 CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
    269     if(U_FAILURE(errorCode)) { return; }
    270     int32_t noStrength = options & ~STRENGTH_MASK;
    271     switch(value) {
    272     case UCOL_PRIMARY:
    273     case UCOL_SECONDARY:
    274     case UCOL_TERTIARY:
    275     case UCOL_QUATERNARY:
    276     case UCOL_IDENTICAL:
    277         options = noStrength | (value << STRENGTH_SHIFT);
    278         break;
    279     case UCOL_DEFAULT:
    280         options = noStrength | (defaultOptions & STRENGTH_MASK);
    281         break;
    282     default:
    283         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    284         break;
    285     }
    286 }
    287 
    288 void
    289 CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
    290                            int32_t defaultOptions, UErrorCode &errorCode) {
    291     if(U_FAILURE(errorCode)) { return; }
    292     switch(value) {
    293     case UCOL_ON:
    294         options |= bit;
    295         break;
    296     case UCOL_OFF:
    297         options &= ~bit;
    298         break;
    299     case UCOL_DEFAULT:
    300         options = (options & ~bit) | (defaultOptions & bit);
    301         break;
    302     default:
    303         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    304         break;
    305     }
    306 }
    307 
    308 void
    309 CollationSettings::setCaseFirst(UColAttributeValue value,
    310                                 int32_t defaultOptions, UErrorCode &errorCode) {
    311     if(U_FAILURE(errorCode)) { return; }
    312     int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
    313     switch(value) {
    314     case UCOL_OFF:
    315         options = noCaseFirst;
    316         break;
    317     case UCOL_LOWER_FIRST:
    318         options = noCaseFirst | CASE_FIRST;
    319         break;
    320     case UCOL_UPPER_FIRST:
    321         options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
    322         break;
    323     case UCOL_DEFAULT:
    324         options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
    325         break;
    326     default:
    327         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    328         break;
    329     }
    330 }
    331 
    332 void
    333 CollationSettings::setAlternateHandling(UColAttributeValue value,
    334                                         int32_t defaultOptions, UErrorCode &errorCode) {
    335     if(U_FAILURE(errorCode)) { return; }
    336     int32_t noAlternate = options & ~ALTERNATE_MASK;
    337     switch(value) {
    338     case UCOL_NON_IGNORABLE:
    339         options = noAlternate;
    340         break;
    341     case UCOL_SHIFTED:
    342         options = noAlternate | SHIFTED;
    343         break;
    344     case UCOL_DEFAULT:
    345         options = noAlternate | (defaultOptions & ALTERNATE_MASK);
    346         break;
    347     default:
    348         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    349         break;
    350     }
    351 }
    352 
    353 void
    354 CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
    355     if(U_FAILURE(errorCode)) { return; }
    356     int32_t noMax = options & ~MAX_VARIABLE_MASK;
    357     switch(value) {
    358     case MAX_VAR_SPACE:
    359     case MAX_VAR_PUNCT:
    360     case MAX_VAR_SYMBOL:
    361     case MAX_VAR_CURRENCY:
    362         options = noMax | (value << MAX_VARIABLE_SHIFT);
    363         break;
    364     case UCOL_DEFAULT:
    365         options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
    366         break;
    367     default:
    368         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    369         break;
    370     }
    371 }
    372 
    373 U_NAMESPACE_END
    374 
    375 #endif  // !UCONFIG_NO_COLLATION
    376