1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * collation.cpp 7 * 8 * created on: 2010oct27 9 * created by: Markus W. Scherer 10 */ 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_COLLATION 15 16 #include "collation.h" 17 #include "uassert.h" 18 19 U_NAMESPACE_BEGIN 20 21 // Some compilers don't care if constants are defined in the .cpp file. 22 // MS Visual C++ does not like it, but gcc requires it. clang does not care. 23 #ifndef _MSC_VER 24 const uint32_t Collation::ONLY_TERTIARY_MASK; 25 const uint32_t Collation::CASE_AND_TERTIARY_MASK; 26 #endif 27 28 uint32_t 29 Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 30 // Extract the second byte, minus the minimum byte value, 31 // plus the offset, modulo the number of usable byte values, plus the minimum. 32 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 33 uint32_t primary; 34 if(isCompressible) { 35 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4; 36 primary = (uint32_t)((offset % 251) + 4) << 16; 37 offset /= 251; 38 } else { 39 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2; 40 primary = (uint32_t)((offset % 254) + 2) << 16; 41 offset /= 254; 42 } 43 // First byte, assume no further overflow. 44 return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24)); 45 } 46 47 uint32_t 48 Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 49 // Extract the third byte, minus the minimum byte value, 50 // plus the offset, modulo the number of usable byte values, plus the minimum. 51 offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2; 52 uint32_t primary = (uint32_t)((offset % 254) + 2) << 8; 53 offset /= 254; 54 // Same with the second byte, 55 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 56 if(isCompressible) { 57 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4; 58 primary |= (uint32_t)((offset % 251) + 4) << 16; 59 offset /= 251; 60 } else { 61 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2; 62 primary |= (uint32_t)((offset % 254) + 2) << 16; 63 offset /= 254; 64 } 65 // First byte, assume no further overflow. 66 return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24)); 67 } 68 69 uint32_t 70 Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 71 // Extract the second byte, minus the minimum byte value, 72 // minus the step, modulo the number of usable byte values, plus the minimum. 73 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 74 // Assume no further underflow for the first byte. 75 U_ASSERT(0 < step && step <= 0x7f); 76 int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step; 77 if(isCompressible) { 78 if(byte2 < 4) { 79 byte2 += 251; 80 basePrimary -= 0x1000000; 81 } 82 } else { 83 if(byte2 < 2) { 84 byte2 += 254; 85 basePrimary -= 0x1000000; 86 } 87 } 88 return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16); 89 } 90 91 uint32_t 92 Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 93 // Extract the third byte, minus the minimum byte value, 94 // minus the step, modulo the number of usable byte values, plus the minimum. 95 U_ASSERT(0 < step && step <= 0x7f); 96 int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step; 97 if(byte3 >= 2) { 98 return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8); 99 } 100 byte3 += 254; 101 // Same with the second byte, 102 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 103 int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1; 104 if(isCompressible) { 105 if(byte2 < 4) { 106 byte2 = 0xfe; 107 basePrimary -= 0x1000000; 108 } 109 } else { 110 if(byte2 < 2) { 111 byte2 = 0xff; 112 basePrimary -= 0x1000000; 113 } 114 } 115 // First byte, assume no further underflow. 116 return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8); 117 } 118 119 uint32_t 120 Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) { 121 uint32_t p = (uint32_t)(dataCE >> 32); // three-byte primary pppppp00 122 int32_t lower32 = (int32_t)dataCE; // base code point b & step s: bbbbbbss (bit 7: isCompressible) 123 int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f); // delta * increment 124 UBool isCompressible = (lower32 & 0x80) != 0; 125 return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset); 126 } 127 128 uint32_t 129 Collation::unassignedPrimaryFromCodePoint(UChar32 c) { 130 // Create a gap before U+0000. Use c=-1 for [first unassigned]. 131 ++c; 132 // Fourth byte: 18 values, every 14th byte value (gap of 13). 133 uint32_t primary = 2 + (c % 18) * 14; 134 c /= 18; 135 // Third byte: 254 values. 136 primary |= (2 + (c % 254)) << 8; 137 c /= 254; 138 // Second byte: 251 values 04..FE excluding the primary compression bytes. 139 primary |= (4 + (c % 251)) << 16; 140 // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18). 141 return primary | (UNASSIGNED_IMPLICIT_BYTE << 24); 142 } 143 144 U_NAMESPACE_END 145 146 #endif // !UCONFIG_NO_COLLATION 147