1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * collation.cpp 7 * 8 * created on: 2010oct27 9 * created by: Markus W. Scherer 10 */ 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_COLLATION 15 16 #include "collation.h" 17 #include "uassert.h" 18 19 U_NAMESPACE_BEGIN 20 21 // Some compilers don't care if constants are defined in the .cpp file. 22 // MS Visual C++ does not like it, but gcc requires it. clang does not care. 23 #ifndef _MSC_VER 24 const uint8_t Collation::LEVEL_SEPARATOR_BYTE; 25 const uint8_t Collation::MERGE_SEPARATOR_BYTE; 26 const uint32_t Collation::ONLY_TERTIARY_MASK; 27 const uint32_t Collation::CASE_AND_TERTIARY_MASK; 28 #endif 29 30 uint32_t 31 Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 32 // Extract the second byte, minus the minimum byte value, 33 // plus the offset, modulo the number of usable byte values, plus the minimum. 34 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 35 uint32_t primary; 36 if(isCompressible) { 37 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4; 38 primary = (uint32_t)((offset % 251) + 4) << 16; 39 offset /= 251; 40 } else { 41 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2; 42 primary = (uint32_t)((offset % 254) + 2) << 16; 43 offset /= 254; 44 } 45 // First byte, assume no further overflow. 46 return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24)); 47 } 48 49 uint32_t 50 Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 51 // Extract the third byte, minus the minimum byte value, 52 // plus the offset, modulo the number of usable byte values, plus the minimum. 53 offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2; 54 uint32_t primary = (uint32_t)((offset % 254) + 2) << 8; 55 offset /= 254; 56 // Same with the second byte, 57 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 58 if(isCompressible) { 59 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4; 60 primary |= (uint32_t)((offset % 251) + 4) << 16; 61 offset /= 251; 62 } else { 63 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2; 64 primary |= (uint32_t)((offset % 254) + 2) << 16; 65 offset /= 254; 66 } 67 // First byte, assume no further overflow. 68 return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24)); 69 } 70 71 uint32_t 72 Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 73 // Extract the second byte, minus the minimum byte value, 74 // minus the step, modulo the number of usable byte values, plus the minimum. 75 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 76 // Assume no further underflow for the first byte. 77 U_ASSERT(0 < step && step <= 0x7f); 78 int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step; 79 if(isCompressible) { 80 if(byte2 < 4) { 81 byte2 += 251; 82 basePrimary -= 0x1000000; 83 } 84 } else { 85 if(byte2 < 2) { 86 byte2 += 254; 87 basePrimary -= 0x1000000; 88 } 89 } 90 return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16); 91 } 92 93 uint32_t 94 Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 95 // Extract the third byte, minus the minimum byte value, 96 // minus the step, modulo the number of usable byte values, plus the minimum. 97 U_ASSERT(0 < step && step <= 0x7f); 98 int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step; 99 if(byte3 >= 2) { 100 return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8); 101 } 102 byte3 += 254; 103 // Same with the second byte, 104 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 105 int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1; 106 if(isCompressible) { 107 if(byte2 < 4) { 108 byte2 = 0xfe; 109 basePrimary -= 0x1000000; 110 } 111 } else { 112 if(byte2 < 2) { 113 byte2 = 0xff; 114 basePrimary -= 0x1000000; 115 } 116 } 117 // First byte, assume no further underflow. 118 return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8); 119 } 120 121 uint32_t 122 Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) { 123 uint32_t p = (uint32_t)(dataCE >> 32); // three-byte primary pppppp00 124 int32_t lower32 = (int32_t)dataCE; // base code point b & step s: bbbbbbss (bit 7: isCompressible) 125 int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f); // delta * increment 126 UBool isCompressible = (lower32 & 0x80) != 0; 127 return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset); 128 } 129 130 uint32_t 131 Collation::unassignedPrimaryFromCodePoint(UChar32 c) { 132 // Create a gap before U+0000. Use c=-1 for [first unassigned]. 133 ++c; 134 // Fourth byte: 18 values, every 14th byte value (gap of 13). 135 uint32_t primary = 2 + (c % 18) * 14; 136 c /= 18; 137 // Third byte: 254 values. 138 primary |= (2 + (c % 254)) << 8; 139 c /= 254; 140 // Second byte: 251 values 04..FE excluding the primary compression bytes. 141 primary |= (4 + (c % 251)) << 16; 142 // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18). 143 return primary | (UNASSIGNED_IMPLICIT_BYTE << 24); 144 } 145 146 U_NAMESPACE_END 147 148 #endif // !UCONFIG_NO_COLLATION 149