1 /* 2 ******************************************************************************* 3 * Copyright (C) 2013-2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * collationsettings.cpp 7 * 8 * created on: 2013feb07 9 * created by: Markus W. Scherer 10 */ 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_COLLATION 15 16 #include "unicode/ucol.h" 17 #include "cmemory.h" 18 #include "collation.h" 19 #include "collationdata.h" 20 #include "collationsettings.h" 21 #include "sharedobject.h" 22 #include "uassert.h" 23 #include "umutex.h" 24 #include "uvectr32.h" 25 26 U_NAMESPACE_BEGIN 27 28 CollationSettings::CollationSettings(const CollationSettings &other) 29 : SharedObject(other), 30 options(other.options), variableTop(other.variableTop), 31 reorderTable(NULL), 32 minHighNoReorder(other.minHighNoReorder), 33 reorderRanges(NULL), reorderRangesLength(0), 34 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), 35 fastLatinOptions(other.fastLatinOptions) { 36 UErrorCode errorCode = U_ZERO_ERROR; 37 copyReorderingFrom(other, errorCode); 38 if(fastLatinOptions >= 0) { 39 uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries)); 40 } 41 } 42 43 CollationSettings::~CollationSettings() { 44 if(reorderCodesCapacity != 0) { 45 uprv_free(const_cast<int32_t *>(reorderCodes)); 46 } 47 } 48 49 UBool 50 CollationSettings::operator==(const CollationSettings &other) const { 51 if(options != other.options) { return FALSE; } 52 if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; } 53 if(reorderCodesLength != other.reorderCodesLength) { return FALSE; } 54 for(int32_t i = 0; i < reorderCodesLength; ++i) { 55 if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; } 56 } 57 return TRUE; 58 } 59 60 int32_t 61 CollationSettings::hashCode() const { 62 int32_t h = options << 8; 63 if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; } 64 h ^= reorderCodesLength; 65 for(int32_t i = 0; i < reorderCodesLength; ++i) { 66 h ^= (reorderCodes[i] << i); 67 } 68 return h; 69 } 70 71 void 72 CollationSettings::resetReordering() { 73 // When we turn off reordering, we want to set a NULL permutation 74 // rather than a no-op permutation. 75 // Keep the memory via reorderCodes and its capacity. 76 reorderTable = NULL; 77 minHighNoReorder = 0; 78 reorderRangesLength = 0; 79 reorderCodesLength = 0; 80 } 81 82 void 83 CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, 84 const uint32_t *ranges, int32_t rangesLength, 85 const uint8_t *table, UErrorCode &errorCode) { 86 if(U_FAILURE(errorCode)) { return; } 87 if(table != NULL && 88 (rangesLength == 0 ? 89 !reorderTableHasSplitBytes(table) : 90 rangesLength >= 2 && 91 // The first offset must be 0. The last offset must not be 0. 92 (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) { 93 // We need to release the memory before setting the alias pointer. 94 if(reorderCodesCapacity != 0) { 95 uprv_free(const_cast<int32_t *>(reorderCodes)); 96 reorderCodesCapacity = 0; 97 } 98 reorderTable = table; 99 reorderCodes = codes; 100 reorderCodesLength = length; 101 // Drop ranges before the first split byte. They are reordered by the table. 102 // This then speeds up reordering of the remaining ranges. 103 int32_t firstSplitByteRangeIndex = 0; 104 while(firstSplitByteRangeIndex < rangesLength && 105 (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { 106 // The second byte of the primary limit is 0. 107 ++firstSplitByteRangeIndex; 108 } 109 if(firstSplitByteRangeIndex == rangesLength) { 110 U_ASSERT(!reorderTableHasSplitBytes(table)); 111 minHighNoReorder = 0; 112 reorderRanges = NULL; 113 reorderRangesLength = 0; 114 } else { 115 U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); 116 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; 117 reorderRanges = ranges + firstSplitByteRangeIndex; 118 reorderRangesLength = rangesLength - firstSplitByteRangeIndex; 119 } 120 return; 121 } 122 // Regenerate missing data. 123 setReordering(data, codes, length, errorCode); 124 } 125 126 void 127 CollationSettings::setReordering(const CollationData &data, 128 const int32_t *codes, int32_t codesLength, 129 UErrorCode &errorCode) { 130 if(U_FAILURE(errorCode)) { return; } 131 if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) { 132 resetReordering(); 133 return; 134 } 135 UVector32 rangesList(errorCode); 136 data.makeReorderRanges(codes, codesLength, rangesList, errorCode); 137 if(U_FAILURE(errorCode)) { return; } 138 int32_t rangesLength = rangesList.size(); 139 if(rangesLength == 0) { 140 resetReordering(); 141 return; 142 } 143 const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer()); 144 // ranges[] contains at least two (limit, offset) pairs. 145 // The first offset must be 0. The last offset must not be 0. 146 // Separators (at the low end) and trailing weights (at the high end) 147 // are never reordered. 148 U_ASSERT(rangesLength >= 2); 149 U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); 150 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; 151 152 // Write the lead byte permutation table. 153 // Set a 0 for each lead byte that has a range boundary in the middle. 154 uint8_t table[256]; 155 int32_t b = 0; 156 int32_t firstSplitByteRangeIndex = -1; 157 for(int32_t i = 0; i < rangesLength; ++i) { 158 uint32_t pair = ranges[i]; 159 int32_t limit1 = (int32_t)(pair >> 24); 160 while(b < limit1) { 161 table[b] = (uint8_t)(b + pair); 162 ++b; 163 } 164 // Check the second byte of the limit. 165 if((pair & 0xff0000) != 0) { 166 table[limit1] = 0; 167 b = limit1 + 1; 168 if(firstSplitByteRangeIndex < 0) { 169 firstSplitByteRangeIndex = i; 170 } 171 } 172 } 173 while(b <= 0xff) { 174 table[b] = (uint8_t)b; 175 ++b; 176 } 177 if(firstSplitByteRangeIndex < 0) { 178 // The lead byte permutation table alone suffices for reordering. 179 rangesLength = 0; 180 } else { 181 // Remove the ranges below the first split byte. 182 ranges += firstSplitByteRangeIndex; 183 rangesLength -= firstSplitByteRangeIndex; 184 } 185 setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode); 186 } 187 188 void 189 CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, 190 const uint32_t *ranges, int32_t rangesLength, 191 const uint8_t *table, UErrorCode &errorCode) { 192 if(U_FAILURE(errorCode)) { return; } 193 int32_t *ownedCodes; 194 int32_t totalLength = codesLength + rangesLength; 195 U_ASSERT(totalLength > 0); 196 if(totalLength <= reorderCodesCapacity) { 197 ownedCodes = const_cast<int32_t *>(reorderCodes); 198 } else { 199 // Allocate one memory block for the codes, the ranges, and the 16-aligned table. 200 int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints 201 ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256); 202 if(ownedCodes == NULL) { 203 resetReordering(); 204 errorCode = U_MEMORY_ALLOCATION_ERROR; 205 return; 206 } 207 if(reorderCodesCapacity != 0) { 208 uprv_free(const_cast<int32_t *>(reorderCodes)); 209 } 210 reorderCodes = ownedCodes; 211 reorderCodesCapacity = capacity; 212 } 213 uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); 214 uprv_memcpy(ownedCodes, codes, codesLength * 4); 215 uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); 216 reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity); 217 reorderCodesLength = codesLength; 218 reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength; 219 reorderRangesLength = rangesLength; 220 } 221 222 void 223 CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) { 224 if(U_FAILURE(errorCode)) { return; } 225 if(!other.hasReordering()) { 226 resetReordering(); 227 return; 228 } 229 minHighNoReorder = other.minHighNoReorder; 230 if(other.reorderCodesCapacity == 0) { 231 // The reorder arrays are aliased to memory-mapped data. 232 reorderTable = other.reorderTable; 233 reorderRanges = other.reorderRanges; 234 reorderRangesLength = other.reorderRangesLength; 235 reorderCodes = other.reorderCodes; 236 reorderCodesLength = other.reorderCodesLength; 237 } else { 238 setReorderArrays(other.reorderCodes, other.reorderCodesLength, 239 other.reorderRanges, other.reorderRangesLength, 240 other.reorderTable, errorCode); 241 } 242 } 243 244 UBool 245 CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { 246 U_ASSERT(table[0] == 0); 247 for(int32_t i = 1; i < 256; ++i) { 248 if(table[i] == 0) { 249 return TRUE; 250 } 251 } 252 return FALSE; 253 } 254 255 uint32_t 256 CollationSettings::reorderEx(uint32_t p) const { 257 if(p >= minHighNoReorder) { return p; } 258 // Round up p so that its lower 16 bits are >= any offset bits. 259 // Then compare q directly with (limit, offset) pairs. 260 uint32_t q = p | 0xffff; 261 uint32_t r; 262 const uint32_t *ranges = reorderRanges; 263 while(q >= (r = *ranges)) { ++ranges; } 264 return p + (r << 24); 265 } 266 267 void 268 CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { 269 if(U_FAILURE(errorCode)) { return; } 270 int32_t noStrength = options & ~STRENGTH_MASK; 271 switch(value) { 272 case UCOL_PRIMARY: 273 case UCOL_SECONDARY: 274 case UCOL_TERTIARY: 275 case UCOL_QUATERNARY: 276 case UCOL_IDENTICAL: 277 options = noStrength | (value << STRENGTH_SHIFT); 278 break; 279 case UCOL_DEFAULT: 280 options = noStrength | (defaultOptions & STRENGTH_MASK); 281 break; 282 default: 283 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 284 break; 285 } 286 } 287 288 void 289 CollationSettings::setFlag(int32_t bit, UColAttributeValue value, 290 int32_t defaultOptions, UErrorCode &errorCode) { 291 if(U_FAILURE(errorCode)) { return; } 292 switch(value) { 293 case UCOL_ON: 294 options |= bit; 295 break; 296 case UCOL_OFF: 297 options &= ~bit; 298 break; 299 case UCOL_DEFAULT: 300 options = (options & ~bit) | (defaultOptions & bit); 301 break; 302 default: 303 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 304 break; 305 } 306 } 307 308 void 309 CollationSettings::setCaseFirst(UColAttributeValue value, 310 int32_t defaultOptions, UErrorCode &errorCode) { 311 if(U_FAILURE(errorCode)) { return; } 312 int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK; 313 switch(value) { 314 case UCOL_OFF: 315 options = noCaseFirst; 316 break; 317 case UCOL_LOWER_FIRST: 318 options = noCaseFirst | CASE_FIRST; 319 break; 320 case UCOL_UPPER_FIRST: 321 options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK; 322 break; 323 case UCOL_DEFAULT: 324 options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK); 325 break; 326 default: 327 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 328 break; 329 } 330 } 331 332 void 333 CollationSettings::setAlternateHandling(UColAttributeValue value, 334 int32_t defaultOptions, UErrorCode &errorCode) { 335 if(U_FAILURE(errorCode)) { return; } 336 int32_t noAlternate = options & ~ALTERNATE_MASK; 337 switch(value) { 338 case UCOL_NON_IGNORABLE: 339 options = noAlternate; 340 break; 341 case UCOL_SHIFTED: 342 options = noAlternate | SHIFTED; 343 break; 344 case UCOL_DEFAULT: 345 options = noAlternate | (defaultOptions & ALTERNATE_MASK); 346 break; 347 default: 348 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 349 break; 350 } 351 } 352 353 void 354 CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { 355 if(U_FAILURE(errorCode)) { return; } 356 int32_t noMax = options & ~MAX_VARIABLE_MASK; 357 switch(value) { 358 case MAX_VAR_SPACE: 359 case MAX_VAR_PUNCT: 360 case MAX_VAR_SYMBOL: 361 case MAX_VAR_CURRENCY: 362 options = noMax | (value << MAX_VARIABLE_SHIFT); 363 break; 364 case UCOL_DEFAULT: 365 options = noMax | (defaultOptions & MAX_VARIABLE_MASK); 366 break; 367 default: 368 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 369 break; 370 } 371 } 372 373 U_NAMESPACE_END 374 375 #endif // !UCONFIG_NO_COLLATION 376