1 /* 2 ******************************************************************************* 3 * Copyright (C) 2013-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * collationrootelements.cpp 7 * 8 * created on: 2013mar05 9 * created by: Markus W. Scherer 10 */ 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_COLLATION 15 16 #include "collation.h" 17 #include "collationrootelements.h" 18 #include "uassert.h" 19 20 U_NAMESPACE_BEGIN 21 22 int64_t 23 CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const { 24 if(p == 0) { return 0; } 25 U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]); 26 int32_t index = findP(p); 27 uint32_t q = elements[index]; 28 uint32_t secTer; 29 if(p == (q & 0xffffff00)) { 30 // p == elements[index] is a root primary. Find the CE before it. 31 // We must not be in a primary range. 32 U_ASSERT((q & PRIMARY_STEP_MASK) == 0); 33 secTer = elements[index - 1]; 34 if((secTer & SEC_TER_DELTA_FLAG) == 0) { 35 // Primary CE just before p. 36 p = secTer & 0xffffff00; 37 secTer = Collation::COMMON_SEC_AND_TER_CE; 38 } else { 39 // secTer = last secondary & tertiary for the previous primary 40 index -= 2; 41 for(;;) { 42 p = elements[index]; 43 if((p & SEC_TER_DELTA_FLAG) == 0) { 44 p &= 0xffffff00; 45 break; 46 } 47 --index; 48 } 49 } 50 } else { 51 // p > elements[index] which is the previous primary. 52 // Find the last secondary & tertiary weights for it. 53 p = q & 0xffffff00; 54 secTer = Collation::COMMON_SEC_AND_TER_CE; 55 for(;;) { 56 q = elements[++index]; 57 if((q & SEC_TER_DELTA_FLAG) == 0) { 58 // We must not be in a primary range. 59 U_ASSERT((q & PRIMARY_STEP_MASK) == 0); 60 break; 61 } 62 secTer = q; 63 } 64 } 65 return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); 66 } 67 68 int64_t 69 CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const { 70 if(p == 0) { return 0; } 71 int32_t index = findP(p); 72 if(p != (elements[index] & 0xffffff00)) { 73 for(;;) { 74 p = elements[++index]; 75 if((p & SEC_TER_DELTA_FLAG) == 0) { 76 // First primary after p. We must not be in a primary range. 77 U_ASSERT((p & PRIMARY_STEP_MASK) == 0); 78 break; 79 } 80 } 81 } 82 // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. 83 return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE; 84 } 85 86 uint32_t 87 CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const { 88 int32_t index = findPrimary(p); 89 int32_t step; 90 uint32_t q = elements[index]; 91 if(p == (q & 0xffffff00)) { 92 // Found p itself. Return the previous primary. 93 // See if p is at the end of a previous range. 94 step = (int32_t)q & PRIMARY_STEP_MASK; 95 if(step == 0) { 96 // p is not at the end of a range. Look for the previous primary. 97 do { 98 p = elements[--index]; 99 } while((p & SEC_TER_DELTA_FLAG) != 0); 100 return p & 0xffffff00; 101 } 102 } else { 103 // p is in a range, and not at the start. 104 uint32_t nextElement = elements[index + 1]; 105 U_ASSERT(isEndOfPrimaryRange(nextElement)); 106 step = (int32_t)nextElement & PRIMARY_STEP_MASK; 107 } 108 // Return the previous range primary. 109 if((p & 0xffff) == 0) { 110 return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step); 111 } else { 112 return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step); 113 } 114 } 115 116 uint32_t 117 CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const { 118 int32_t index; 119 uint32_t previousSec, sec; 120 if(p == 0) { 121 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; 122 // Gap at the beginning of the secondary CE range. 123 previousSec = 0; 124 sec = elements[index] >> 16; 125 } else { 126 index = findPrimary(p) + 1; 127 previousSec = Collation::BEFORE_WEIGHT16; 128 sec = getFirstSecTerForPrimary(index) >> 16; 129 } 130 U_ASSERT(s >= sec); 131 while(s > sec) { 132 previousSec = sec; 133 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); 134 sec = elements[index++] >> 16; 135 } 136 U_ASSERT(sec == s); 137 return previousSec; 138 } 139 140 uint32_t 141 CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const { 142 U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0); 143 int32_t index; 144 uint32_t previousTer, secTer; 145 if(p == 0) { 146 if(s == 0) { 147 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; 148 // Gap at the beginning of the tertiary CE range. 149 previousTer = 0; 150 } else { 151 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; 152 previousTer = Collation::BEFORE_WEIGHT16; 153 } 154 secTer = elements[index] & ~SEC_TER_DELTA_FLAG; 155 } else { 156 index = findPrimary(p) + 1; 157 previousTer = Collation::BEFORE_WEIGHT16; 158 secTer = getFirstSecTerForPrimary(index); 159 } 160 uint32_t st = (s << 16) | t; 161 while(st > secTer) { 162 if((secTer >> 16) == s) { previousTer = secTer; } 163 U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); 164 secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; 165 } 166 U_ASSERT(secTer == st); 167 return previousTer & 0xffff; 168 } 169 170 uint32_t 171 CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const { 172 U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1])); 173 uint32_t q = elements[++index]; 174 int32_t step; 175 if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) { 176 // Return the next primary in this range. 177 if((p & 0xffff) == 0) { 178 return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step); 179 } else { 180 return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step); 181 } 182 } else { 183 // Return the next primary in the list. 184 while((q & SEC_TER_DELTA_FLAG) != 0) { 185 q = elements[++index]; 186 } 187 U_ASSERT((q & PRIMARY_STEP_MASK) == 0); 188 return q; 189 } 190 } 191 192 uint32_t 193 CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const { 194 uint32_t secTer; 195 uint32_t secLimit; 196 if(index == 0) { 197 // primary = 0 198 U_ASSERT(s != 0); 199 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; 200 secTer = elements[index]; 201 // Gap at the end of the secondary CE range. 202 secLimit = 0x10000; 203 } else { 204 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); 205 secTer = getFirstSecTerForPrimary(index + 1); 206 // If this is an explicit sec/ter unit, then it will be read once more. 207 // Gap for secondaries of primary CEs. 208 secLimit = getSecondaryBoundary(); 209 } 210 for(;;) { 211 uint32_t sec = secTer >> 16; 212 if(sec > s) { return sec; } 213 secTer = elements[++index]; 214 if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } 215 } 216 } 217 218 uint32_t 219 CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const { 220 uint32_t secTer; 221 uint32_t terLimit; 222 if(index == 0) { 223 // primary = 0 224 if(s == 0) { 225 U_ASSERT(t != 0); 226 index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; 227 // Gap at the end of the tertiary CE range. 228 terLimit = 0x4000; 229 } else { 230 index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; 231 // Gap for tertiaries of primary/secondary CEs. 232 terLimit = getTertiaryBoundary(); 233 } 234 secTer = elements[index] & ~SEC_TER_DELTA_FLAG; 235 } else { 236 U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); 237 secTer = getFirstSecTerForPrimary(index + 1); 238 // If this is an explicit sec/ter unit, then it will be read once more. 239 terLimit = getTertiaryBoundary(); 240 } 241 uint32_t st = (s << 16) | t; 242 for(;;) { 243 if(secTer > st) { 244 U_ASSERT((secTer >> 16) == s); 245 return secTer & 0xffff; 246 } 247 secTer = elements[++index]; 248 // No tertiary greater than t for this primary+secondary. 249 if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } 250 secTer &= ~SEC_TER_DELTA_FLAG; 251 } 252 } 253 254 uint32_t 255 CollationRootElements::getFirstSecTerForPrimary(int32_t index) const { 256 uint32_t secTer = elements[index]; 257 if((secTer & SEC_TER_DELTA_FLAG) == 0) { 258 // No sec/ter delta. 259 return Collation::COMMON_SEC_AND_TER_CE; 260 } 261 secTer &= ~SEC_TER_DELTA_FLAG; 262 if(secTer > Collation::COMMON_SEC_AND_TER_CE) { 263 // Implied sec/ter. 264 return Collation::COMMON_SEC_AND_TER_CE; 265 } 266 // Explicit sec/ter below common/common. 267 return secTer; 268 } 269 270 int32_t 271 CollationRootElements::findPrimary(uint32_t p) const { 272 // Requirement: p must occur as a root primary. 273 U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary 274 int32_t index = findP(p); 275 // If p is in a range, then we just assume that p is an actual primary in this range. 276 // (Too cumbersome/expensive to check.) 277 // Otherwise, it must be an exact match. 278 U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00)); 279 return index; 280 } 281 282 int32_t 283 CollationRootElements::findP(uint32_t p) const { 284 // p need not occur as a root primary. 285 // For example, it might be a reordering group boundary. 286 U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE); 287 // modified binary search 288 int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX]; 289 U_ASSERT(p >= elements[start]); 290 int32_t limit = length - 1; 291 U_ASSERT(elements[limit] >= PRIMARY_SENTINEL); 292 U_ASSERT(p < elements[limit]); 293 while((start + 1) < limit) { 294 // Invariant: elements[start] and elements[limit] are primaries, 295 // and elements[start]<=p<=elements[limit]. 296 int32_t i = (start + limit) / 2; 297 uint32_t q = elements[i]; 298 if((q & SEC_TER_DELTA_FLAG) != 0) { 299 // Find the next primary. 300 int32_t j = i + 1; 301 for(;;) { 302 if(j == limit) { break; } 303 q = elements[j]; 304 if((q & SEC_TER_DELTA_FLAG) == 0) { 305 i = j; 306 break; 307 } 308 ++j; 309 } 310 if((q & SEC_TER_DELTA_FLAG) != 0) { 311 // Find the preceding primary. 312 j = i - 1; 313 for(;;) { 314 if(j == start) { break; } 315 q = elements[j]; 316 if((q & SEC_TER_DELTA_FLAG) == 0) { 317 i = j; 318 break; 319 } 320 --j; 321 } 322 if((q & SEC_TER_DELTA_FLAG) != 0) { 323 // No primary between start and limit. 324 break; 325 } 326 } 327 } 328 if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary. 329 limit = i; 330 } else { 331 start = i; 332 } 333 } 334 return start; 335 } 336 337 U_NAMESPACE_END 338 339 #endif // !UCONFIG_NO_COLLATION 340