1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2013-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * CollationRootElements.java, ported from collationrootelements.h/.cpp 10 * 11 * C++ version created on: 2013mar01 12 * created by: Markus W. Scherer 13 */ 14 15 package android.icu.impl.coll; 16 17 /** 18 * Container and access methods for collation elements and weights 19 * that occur in the root collator. 20 * Needed for finding boundaries for building a tailoring. 21 * 22 * This class takes and returns 16-bit secondary and tertiary weights. 23 * @hide Only a subset of ICU is exposed in Android 24 */ 25 public final class CollationRootElements { 26 public CollationRootElements(long[] rootElements) { 27 elements = rootElements; 28 } 29 30 /** 31 * Higher than any root primary. 32 */ 33 public static final long PRIMARY_SENTINEL = 0xffffff00L; 34 35 /** 36 * Flag in a root element, set if the element contains secondary & tertiary weights, 37 * rather than a primary. 38 */ 39 public static final int SEC_TER_DELTA_FLAG = 0x80; 40 /** 41 * Mask for getting the primary range step value from a primary-range-end element. 42 */ 43 public static final int PRIMARY_STEP_MASK = 0x7f; 44 45 /** 46 * Index of the first CE with a non-zero tertiary weight. 47 * Same as the start of the compact root elements table. 48 */ 49 public static final int IX_FIRST_TERTIARY_INDEX = 0; 50 /** 51 * Index of the first CE with a non-zero secondary weight. 52 */ 53 static final int IX_FIRST_SECONDARY_INDEX = 1; 54 /** 55 * Index of the first CE with a non-zero primary weight. 56 */ 57 static final int IX_FIRST_PRIMARY_INDEX = 2; 58 /** 59 * Must match Collation.COMMON_SEC_AND_TER_CE. 60 */ 61 static final int IX_COMMON_SEC_AND_TER_CE = 3; 62 /** 63 * Secondary & tertiary boundaries. 64 * Bits 31..24: [fixed last secondary common byte 45] 65 * Bits 23..16: [fixed first ignorable secondary byte 80] 66 * Bits 15.. 8: reserved, 0 67 * Bits 7.. 0: [fixed first ignorable tertiary byte 3C] 68 */ 69 static final int IX_SEC_TER_BOUNDARIES = 4; 70 /** 71 * The current number of indexes. 72 * Currently the same as elements[IX_FIRST_TERTIARY_INDEX]. 73 */ 74 static final int IX_COUNT = 5; 75 76 /** 77 * Returns the boundary between tertiary weights of primary/secondary CEs 78 * and those of tertiary CEs. 79 * This is the upper limit for tertiaries of primary/secondary CEs. 80 * This minus one is the lower limit for tertiaries of tertiary CEs. 81 */ 82 public int getTertiaryBoundary() { 83 return ((int)elements[IX_SEC_TER_BOUNDARIES] << 8) & 0xff00; 84 } 85 86 /** 87 * Returns the first assigned tertiary CE. 88 */ 89 long getFirstTertiaryCE() { 90 return elements[(int)elements[IX_FIRST_TERTIARY_INDEX]] & ~SEC_TER_DELTA_FLAG; 91 } 92 93 /** 94 * Returns the last assigned tertiary CE. 95 */ 96 long getLastTertiaryCE() { 97 return elements[(int)elements[IX_FIRST_SECONDARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG; 98 } 99 100 /** 101 * Returns the last common secondary weight. 102 * This is the lower limit for secondaries of primary CEs. 103 */ 104 public int getLastCommonSecondary() { 105 return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 16) & 0xff00; 106 } 107 108 /** 109 * Returns the boundary between secondary weights of primary CEs 110 * and those of secondary CEs. 111 * This is the upper limit for secondaries of primary CEs. 112 * This minus one is the lower limit for secondaries of secondary CEs. 113 */ 114 public int getSecondaryBoundary() { 115 return ((int)elements[IX_SEC_TER_BOUNDARIES] >> 8) & 0xff00; 116 } 117 118 /** 119 * Returns the first assigned secondary CE. 120 */ 121 long getFirstSecondaryCE() { 122 return elements[(int)elements[IX_FIRST_SECONDARY_INDEX]] & ~SEC_TER_DELTA_FLAG; 123 } 124 125 /** 126 * Returns the last assigned secondary CE. 127 */ 128 long getLastSecondaryCE() { 129 return elements[(int)elements[IX_FIRST_PRIMARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG; 130 } 131 132 /** 133 * Returns the first assigned primary weight. 134 */ 135 long getFirstPrimary() { 136 return elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]; // step=0: cannot be a range end 137 } 138 139 /** 140 * Returns the first assigned primary CE. 141 */ 142 long getFirstPrimaryCE() { 143 return Collation.makeCE(getFirstPrimary()); 144 } 145 146 /** 147 * Returns the last root CE with a primary weight before p. 148 * Intended only for reordering group boundaries. 149 */ 150 long lastCEWithPrimaryBefore(long p) { 151 if(p == 0) { return 0; } 152 assert(p > elements[(int)elements[IX_FIRST_PRIMARY_INDEX]]); 153 int index = findP(p); 154 long q = elements[index]; 155 long secTer; 156 if(p == (q & 0xffffff00L)) { 157 // p == elements[index] is a root primary. Find the CE before it. 158 // We must not be in a primary range. 159 assert((q & PRIMARY_STEP_MASK) == 0); 160 secTer = elements[index - 1]; 161 if((secTer & SEC_TER_DELTA_FLAG) == 0) { 162 // Primary CE just before p. 163 p = secTer & 0xffffff00L; 164 secTer = Collation.COMMON_SEC_AND_TER_CE; 165 } else { 166 // secTer = last secondary & tertiary for the previous primary 167 index -= 2; 168 for(;;) { 169 p = elements[index]; 170 if((p & SEC_TER_DELTA_FLAG) == 0) { 171 p &= 0xffffff00L; 172 break; 173 } 174 --index; 175 } 176 } 177 } else { 178 // p > elements[index] which is the previous primary. 179 // Find the last secondary & tertiary weights for it. 180 p = q & 0xffffff00L; 181 secTer = Collation.COMMON_SEC_AND_TER_CE; 182 for(;;) { 183 q = elements[++index]; 184 if((q & SEC_TER_DELTA_FLAG) == 0) { 185 // We must not be in a primary range. 186 assert((q & PRIMARY_STEP_MASK) == 0); 187 break; 188 } 189 secTer = q; 190 } 191 } 192 return (p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); 193 } 194 195 /** 196 * Returns the first root CE with a primary weight of at least p. 197 * Intended only for reordering group boundaries. 198 */ 199 long firstCEWithPrimaryAtLeast(long p) { 200 if(p == 0) { return 0; } 201 int index = findP(p); 202 if(p != (elements[index] & 0xffffff00L)) { 203 for(;;) { 204 p = elements[++index]; 205 if((p & SEC_TER_DELTA_FLAG) == 0) { 206 // First primary after p. We must not be in a primary range. 207 assert((p & PRIMARY_STEP_MASK) == 0); 208 break; 209 } 210 } 211 } 212 // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. 213 return (p << 32) | Collation.COMMON_SEC_AND_TER_CE; 214 } 215 216 /** 217 * Returns the primary weight before p. 218 * p must be greater than the first root primary. 219 */ 220 long getPrimaryBefore(long p, boolean isCompressible) { 221 int index = findPrimary(p); 222 int step; 223 long q = elements[index]; 224 if(p == (q & 0xffffff00L)) { 225 // Found p itself. Return the previous primary. 226 // See if p is at the end of a previous range. 227 step = (int)q & PRIMARY_STEP_MASK; 228 if(step == 0) { 229 // p is not at the end of a range. Look for the previous primary. 230 do { 231 p = elements[--index]; 232 } while((p & SEC_TER_DELTA_FLAG) != 0); 233 return p & 0xffffff00L; 234 } 235 } else { 236 // p is in a range, and not at the start. 237 long nextElement = elements[index + 1]; 238 assert(isEndOfPrimaryRange(nextElement)); 239 step = (int)nextElement & PRIMARY_STEP_MASK; 240 } 241 // Return the previous range primary. 242 if((p & 0xffff) == 0) { 243 return Collation.decTwoBytePrimaryByOneStep(p, isCompressible, step); 244 } else { 245 return Collation.decThreeBytePrimaryByOneStep(p, isCompressible, step); 246 } 247 } 248 249 /** Returns the secondary weight before [p, s]. */ 250 int getSecondaryBefore(long p, int s) { 251 int index; 252 int previousSec, sec; 253 if(p == 0) { 254 index = (int)elements[IX_FIRST_SECONDARY_INDEX]; 255 // Gap at the beginning of the secondary CE range. 256 previousSec = 0; 257 sec = (int)(elements[index] >> 16); 258 } else { 259 index = findPrimary(p) + 1; 260 previousSec = Collation.BEFORE_WEIGHT16; 261 sec = (int)getFirstSecTerForPrimary(index) >>> 16; 262 } 263 assert(s >= sec); 264 while(s > sec) { 265 previousSec = sec; 266 assert((elements[index] & SEC_TER_DELTA_FLAG) != 0); 267 sec = (int)(elements[index++] >> 16); 268 } 269 assert(sec == s); 270 return previousSec; 271 } 272 273 /** Returns the tertiary weight before [p, s, t]. */ 274 int getTertiaryBefore(long p, int s, int t) { 275 assert((t & ~Collation.ONLY_TERTIARY_MASK) == 0); 276 int index; 277 int previousTer; 278 long secTer; 279 if(p == 0) { 280 if(s == 0) { 281 index = (int)elements[IX_FIRST_TERTIARY_INDEX]; 282 // Gap at the beginning of the tertiary CE range. 283 previousTer = 0; 284 } else { 285 index = (int)elements[IX_FIRST_SECONDARY_INDEX]; 286 previousTer = Collation.BEFORE_WEIGHT16; 287 } 288 secTer = elements[index] & ~SEC_TER_DELTA_FLAG; 289 } else { 290 index = findPrimary(p) + 1; 291 previousTer = Collation.BEFORE_WEIGHT16; 292 secTer = getFirstSecTerForPrimary(index); 293 } 294 long st = ((long)s << 16) | t; 295 while(st > secTer) { 296 if((int)(secTer >> 16) == s) { previousTer = (int)secTer; } 297 assert((elements[index] & SEC_TER_DELTA_FLAG) != 0); 298 secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; 299 } 300 assert(secTer == st); 301 return previousTer & 0xffff; 302 } 303 304 /** 305 * Finds the index of the input primary. 306 * p must occur as a root primary, and must not be 0. 307 */ 308 int findPrimary(long p) { 309 // Requirement: p must occur as a root primary. 310 assert((p & 0xff) == 0); // at most a 3-byte primary 311 int index = findP(p); 312 // If p is in a range, then we just assume that p is an actual primary in this range. 313 // (Too cumbersome/expensive to check.) 314 // Otherwise, it must be an exact match. 315 assert(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00L)); 316 return index; 317 } 318 319 /** 320 * Returns the primary weight after p where index=findPrimary(p). 321 * p must be at least the first root primary. 322 */ 323 long getPrimaryAfter(long p, int index, boolean isCompressible) { 324 assert(p == (elements[index] & 0xffffff00L) || isEndOfPrimaryRange(elements[index + 1])); 325 long q = elements[++index]; 326 int step; 327 if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int)q & PRIMARY_STEP_MASK) != 0) { 328 // Return the next primary in this range. 329 if((p & 0xffff) == 0) { 330 return Collation.incTwoBytePrimaryByOffset(p, isCompressible, step); 331 } else { 332 return Collation.incThreeBytePrimaryByOffset(p, isCompressible, step); 333 } 334 } else { 335 // Return the next primary in the list. 336 while((q & SEC_TER_DELTA_FLAG) != 0) { 337 q = elements[++index]; 338 } 339 assert((q & PRIMARY_STEP_MASK) == 0); 340 return q; 341 } 342 } 343 /** 344 * Returns the secondary weight after [p, s] where index=findPrimary(p) 345 * except use index=0 for p=0. 346 * 347 * <p>Must return a weight for every root [p, s] as well as for every weight 348 * returned by getSecondaryBefore(). If p!=0 then s can be BEFORE_WEIGHT16. 349 * 350 * <p>Exception: [0, 0] is handled by the CollationBuilder: 351 * Both its lower and upper boundaries are special. 352 */ 353 int getSecondaryAfter(int index, int s) { 354 long secTer; 355 int secLimit; 356 if(index == 0) { 357 // primary = 0 358 assert(s != 0); 359 index = (int)elements[IX_FIRST_SECONDARY_INDEX]; 360 secTer = elements[index]; 361 // Gap at the end of the secondary CE range. 362 secLimit = 0x10000; 363 } else { 364 assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]); 365 secTer = getFirstSecTerForPrimary(index + 1); 366 // If this is an explicit sec/ter unit, then it will be read once more. 367 // Gap for secondaries of primary CEs. 368 secLimit = getSecondaryBoundary(); 369 } 370 for(;;) { 371 int sec = (int)(secTer >> 16); 372 if(sec > s) { return sec; } 373 secTer = elements[++index]; 374 if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } 375 } 376 } 377 /** 378 * Returns the tertiary weight after [p, s, t] where index=findPrimary(p) 379 * except use index=0 for p=0. 380 * 381 * <p>Must return a weight for every root [p, s, t] as well as for every weight 382 * returned by getTertiaryBefore(). If s!=0 then t can be BEFORE_WEIGHT16. 383 * 384 * <p>Exception: [0, 0, 0] is handled by the CollationBuilder: 385 * Both its lower and upper boundaries are special. 386 */ 387 int getTertiaryAfter(int index, int s, int t) { 388 long secTer; 389 int terLimit; 390 if(index == 0) { 391 // primary = 0 392 if(s == 0) { 393 assert(t != 0); 394 index = (int)elements[IX_FIRST_TERTIARY_INDEX]; 395 // Gap at the end of the tertiary CE range. 396 terLimit = 0x4000; 397 } else { 398 index = (int)elements[IX_FIRST_SECONDARY_INDEX]; 399 // Gap for tertiaries of primary/secondary CEs. 400 terLimit = getTertiaryBoundary(); 401 } 402 secTer = elements[index] & ~SEC_TER_DELTA_FLAG; 403 } else { 404 assert(index >= (int)elements[IX_FIRST_PRIMARY_INDEX]); 405 secTer = getFirstSecTerForPrimary(index + 1); 406 // If this is an explicit sec/ter unit, then it will be read once more. 407 terLimit = getTertiaryBoundary(); 408 } 409 long st = (((long)s & 0xffffffffL) << 16) | t; 410 for(;;) { 411 if(secTer > st) { 412 assert((secTer >> 16) == s); 413 return (int)secTer & 0xffff; 414 } 415 secTer = elements[++index]; 416 // No tertiary greater than t for this primary+secondary. 417 if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } 418 secTer &= ~SEC_TER_DELTA_FLAG; 419 } 420 } 421 422 /** 423 * Returns the first secondary & tertiary weights for p where index=findPrimary(p)+1. 424 */ 425 private long getFirstSecTerForPrimary(int index) { 426 long secTer = elements[index]; 427 if((secTer & SEC_TER_DELTA_FLAG) == 0) { 428 // No sec/ter delta. 429 return Collation.COMMON_SEC_AND_TER_CE; 430 } 431 secTer &= ~SEC_TER_DELTA_FLAG; 432 if(secTer > Collation.COMMON_SEC_AND_TER_CE) { 433 // Implied sec/ter. 434 return Collation.COMMON_SEC_AND_TER_CE; 435 } 436 // Explicit sec/ter below common/common. 437 return secTer; 438 } 439 440 /** 441 * Finds the largest index i where elements[i]<=p. 442 * Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL). 443 * Does not require that p is a root collator primary. 444 */ 445 private int findP(long p) { 446 // p need not occur as a root primary. 447 // For example, it might be a reordering group boundary. 448 assert((p >> 24) != Collation.UNASSIGNED_IMPLICIT_BYTE); 449 // modified binary search 450 int start = (int)elements[IX_FIRST_PRIMARY_INDEX]; 451 assert(p >= elements[start]); 452 int limit = elements.length - 1; 453 assert(elements[limit] >= PRIMARY_SENTINEL); 454 assert(p < elements[limit]); 455 while((start + 1) < limit) { 456 // Invariant: elements[start] and elements[limit] are primaries, 457 // and elements[start]<=p<=elements[limit]. 458 int i = (int)(((long)start + (long)limit) / 2); 459 long q = elements[i]; 460 if((q & SEC_TER_DELTA_FLAG) != 0) { 461 // Find the next primary. 462 int j = i + 1; 463 for(;;) { 464 if(j == limit) { break; } 465 q = elements[j]; 466 if((q & SEC_TER_DELTA_FLAG) == 0) { 467 i = j; 468 break; 469 } 470 ++j; 471 } 472 if((q & SEC_TER_DELTA_FLAG) != 0) { 473 // Find the preceding primary. 474 j = i - 1; 475 for(;;) { 476 if(j == start) { break; } 477 q = elements[j]; 478 if((q & SEC_TER_DELTA_FLAG) == 0) { 479 i = j; 480 break; 481 } 482 --j; 483 } 484 if((q & SEC_TER_DELTA_FLAG) != 0) { 485 // No primary between start and limit. 486 break; 487 } 488 } 489 } 490 if(p < (q & 0xffffff00L)) { // Reset the "step" bits of a range end primary. 491 limit = i; 492 } else { 493 start = i; 494 } 495 } 496 return start; 497 } 498 499 private static boolean isEndOfPrimaryRange(long q) { 500 return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0; 501 } 502 503 /** 504 * Data structure: See ICU4C source/i18n/collationrootelements.h. 505 */ 506 private long[] elements; 507 } 508