Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2010-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * collation.cpp
      7 *
      8 * created on: 2010oct27
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION
     15 
     16 #include "collation.h"
     17 #include "uassert.h"
     18 
     19 U_NAMESPACE_BEGIN
     20 
     21 // Some compilers don't care if constants are defined in the .cpp file.
     22 // MS Visual C++ does not like it, but gcc requires it. clang does not care.
     23 #ifndef _MSC_VER
     24 const uint8_t Collation::LEVEL_SEPARATOR_BYTE;
     25 const uint8_t Collation::MERGE_SEPARATOR_BYTE;
     26 const uint32_t Collation::ONLY_TERTIARY_MASK;
     27 const uint32_t Collation::CASE_AND_TERTIARY_MASK;
     28 #endif
     29 
     30 uint32_t
     31 Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
     32     // Extract the second byte, minus the minimum byte value,
     33     // plus the offset, modulo the number of usable byte values, plus the minimum.
     34     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     35     uint32_t primary;
     36     if(isCompressible) {
     37         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
     38         primary = (uint32_t)((offset % 251) + 4) << 16;
     39         offset /= 251;
     40     } else {
     41         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
     42         primary = (uint32_t)((offset % 254) + 2) << 16;
     43         offset /= 254;
     44     }
     45     // First byte, assume no further overflow.
     46     return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
     47 }
     48 
     49 uint32_t
     50 Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
     51     // Extract the third byte, minus the minimum byte value,
     52     // plus the offset, modulo the number of usable byte values, plus the minimum.
     53     offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2;
     54     uint32_t primary = (uint32_t)((offset % 254) + 2) << 8;
     55     offset /= 254;
     56     // Same with the second byte,
     57     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     58     if(isCompressible) {
     59         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
     60         primary |= (uint32_t)((offset % 251) + 4) << 16;
     61         offset /= 251;
     62     } else {
     63         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
     64         primary |= (uint32_t)((offset % 254) + 2) << 16;
     65         offset /= 254;
     66     }
     67     // First byte, assume no further overflow.
     68     return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
     69 }
     70 
     71 uint32_t
     72 Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
     73     // Extract the second byte, minus the minimum byte value,
     74     // minus the step, modulo the number of usable byte values, plus the minimum.
     75     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     76     // Assume no further underflow for the first byte.
     77     U_ASSERT(0 < step && step <= 0x7f);
     78     int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step;
     79     if(isCompressible) {
     80         if(byte2 < 4) {
     81             byte2 += 251;
     82             basePrimary -= 0x1000000;
     83         }
     84     } else {
     85         if(byte2 < 2) {
     86             byte2 += 254;
     87             basePrimary -= 0x1000000;
     88         }
     89     }
     90     return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16);
     91 }
     92 
     93 uint32_t
     94 Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
     95     // Extract the third byte, minus the minimum byte value,
     96     // minus the step, modulo the number of usable byte values, plus the minimum.
     97     U_ASSERT(0 < step && step <= 0x7f);
     98     int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step;
     99     if(byte3 >= 2) {
    100         return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8);
    101     }
    102     byte3 += 254;
    103     // Same with the second byte,
    104     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
    105     int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1;
    106     if(isCompressible) {
    107         if(byte2 < 4) {
    108             byte2 = 0xfe;
    109             basePrimary -= 0x1000000;
    110         }
    111     } else {
    112         if(byte2 < 2) {
    113             byte2 = 0xff;
    114             basePrimary -= 0x1000000;
    115         }
    116     }
    117     // First byte, assume no further underflow.
    118     return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8);
    119 }
    120 
    121 uint32_t
    122 Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) {
    123     uint32_t p = (uint32_t)(dataCE >> 32);  // three-byte primary pppppp00
    124     int32_t lower32 = (int32_t)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)
    125     int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
    126     UBool isCompressible = (lower32 & 0x80) != 0;
    127     return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset);
    128 }
    129 
    130 uint32_t
    131 Collation::unassignedPrimaryFromCodePoint(UChar32 c) {
    132     // Create a gap before U+0000. Use c=-1 for [first unassigned].
    133     ++c;
    134     // Fourth byte: 18 values, every 14th byte value (gap of 13).
    135     uint32_t primary = 2 + (c % 18) * 14;
    136     c /= 18;
    137     // Third byte: 254 values.
    138     primary |= (2 + (c % 254)) << 8;
    139     c /= 254;
    140     // Second byte: 251 values 04..FE excluding the primary compression bytes.
    141     primary |= (4 + (c % 251)) << 16;
    142     // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
    143     return primary | (UNASSIGNED_IMPLICIT_BYTE << 24);
    144 }
    145 
    146 U_NAMESPACE_END
    147 
    148 #endif  // !UCONFIG_NO_COLLATION
    149