Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2010-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * collation.cpp
      7 *
      8 * created on: 2010oct27
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION
     15 
     16 #include "collation.h"
     17 #include "uassert.h"
     18 
     19 U_NAMESPACE_BEGIN
     20 
     21 // Some compilers don't care if constants are defined in the .cpp file.
     22 // MS Visual C++ does not like it, but gcc requires it. clang does not care.
     23 #ifndef _MSC_VER
     24 const uint32_t Collation::ONLY_TERTIARY_MASK;
     25 const uint32_t Collation::CASE_AND_TERTIARY_MASK;
     26 #endif
     27 
     28 uint32_t
     29 Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
     30     // Extract the second byte, minus the minimum byte value,
     31     // plus the offset, modulo the number of usable byte values, plus the minimum.
     32     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     33     uint32_t primary;
     34     if(isCompressible) {
     35         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
     36         primary = (uint32_t)((offset % 251) + 4) << 16;
     37         offset /= 251;
     38     } else {
     39         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
     40         primary = (uint32_t)((offset % 254) + 2) << 16;
     41         offset /= 254;
     42     }
     43     // First byte, assume no further overflow.
     44     return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
     45 }
     46 
     47 uint32_t
     48 Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
     49     // Extract the third byte, minus the minimum byte value,
     50     // plus the offset, modulo the number of usable byte values, plus the minimum.
     51     offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2;
     52     uint32_t primary = (uint32_t)((offset % 254) + 2) << 8;
     53     offset /= 254;
     54     // Same with the second byte,
     55     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     56     if(isCompressible) {
     57         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
     58         primary |= (uint32_t)((offset % 251) + 4) << 16;
     59         offset /= 251;
     60     } else {
     61         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
     62         primary |= (uint32_t)((offset % 254) + 2) << 16;
     63         offset /= 254;
     64     }
     65     // First byte, assume no further overflow.
     66     return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
     67 }
     68 
     69 uint32_t
     70 Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
     71     // Extract the second byte, minus the minimum byte value,
     72     // minus the step, modulo the number of usable byte values, plus the minimum.
     73     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     74     // Assume no further underflow for the first byte.
     75     U_ASSERT(0 < step && step <= 0x7f);
     76     int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step;
     77     if(isCompressible) {
     78         if(byte2 < 4) {
     79             byte2 += 251;
     80             basePrimary -= 0x1000000;
     81         }
     82     } else {
     83         if(byte2 < 2) {
     84             byte2 += 254;
     85             basePrimary -= 0x1000000;
     86         }
     87     }
     88     return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16);
     89 }
     90 
     91 uint32_t
     92 Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
     93     // Extract the third byte, minus the minimum byte value,
     94     // minus the step, modulo the number of usable byte values, plus the minimum.
     95     U_ASSERT(0 < step && step <= 0x7f);
     96     int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step;
     97     if(byte3 >= 2) {
     98         return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8);
     99     }
    100     byte3 += 254;
    101     // Same with the second byte,
    102     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
    103     int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1;
    104     if(isCompressible) {
    105         if(byte2 < 4) {
    106             byte2 = 0xfe;
    107             basePrimary -= 0x1000000;
    108         }
    109     } else {
    110         if(byte2 < 2) {
    111             byte2 = 0xff;
    112             basePrimary -= 0x1000000;
    113         }
    114     }
    115     // First byte, assume no further underflow.
    116     return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8);
    117 }
    118 
    119 uint32_t
    120 Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) {
    121     uint32_t p = (uint32_t)(dataCE >> 32);  // three-byte primary pppppp00
    122     int32_t lower32 = (int32_t)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)
    123     int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
    124     UBool isCompressible = (lower32 & 0x80) != 0;
    125     return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset);
    126 }
    127 
    128 uint32_t
    129 Collation::unassignedPrimaryFromCodePoint(UChar32 c) {
    130     // Create a gap before U+0000. Use c=-1 for [first unassigned].
    131     ++c;
    132     // Fourth byte: 18 values, every 14th byte value (gap of 13).
    133     uint32_t primary = 2 + (c % 18) * 14;
    134     c /= 18;
    135     // Third byte: 254 values.
    136     primary |= (2 + (c % 254)) << 8;
    137     c /= 254;
    138     // Second byte: 251 values 04..FE excluding the primary compression bytes.
    139     primary |= (4 + (c % 251)) << 16;
    140     // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
    141     return primary | (UNASSIGNED_IMPLICIT_BYTE << 24);
    142 }
    143 
    144 U_NAMESPACE_END
    145 
    146 #endif  // !UCONFIG_NO_COLLATION
    147