1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ****************************************************************************** 5 * Copyright (C) 1996-2010, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ****************************************************************************** 8 */ 9 10 package com.ibm.icu.impl; 11 12 import java.io.DataOutputStream; 13 import java.io.IOException; 14 import java.io.OutputStream; 15 import java.util.Arrays; 16 17 import com.ibm.icu.lang.UCharacter; 18 import com.ibm.icu.text.UTF16; 19 20 /** 21 * Builder class to manipulate and generate a trie. 22 * This is useful for ICU data in primitive types. 23 * Provides a compact way to store information that is indexed by Unicode 24 * values, such as character properties, types, keyboard values, etc. This is 25 * very useful when you have a block of Unicode data that contains significant 26 * values while the rest of the Unicode data is unused in the application or 27 * when you have a lot of redundance, such as where all 21,000 Han ideographs 28 * have the same value. However, lookup is much faster than a hash table. 29 * A trie of any primitive data type serves two purposes: 30 * <UL type = round> 31 * <LI>Fast access of the indexed values. 32 * <LI>Smaller memory footprint. 33 * </UL> 34 * This is a direct port from the ICU4C version 35 * @author Syn Wee Quek 36 */ 37 public class IntTrieBuilder extends TrieBuilder 38 { 39 // public constructor ---------------------------------------------- 40 41 /** 42 * Copy constructor 43 */ 44 public IntTrieBuilder(IntTrieBuilder table) 45 { 46 super(table); 47 m_data_ = new int[m_dataCapacity_]; 48 System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_); 49 m_initialValue_ = table.m_initialValue_; 50 m_leadUnitValue_ = table.m_leadUnitValue_; 51 } 52 53 /** 54 * Constructs a build table 55 * @param aliasdata data to be filled into table 56 * @param maxdatalength maximum data length allowed in table 57 * @param initialvalue inital data value 58 * @param latin1linear is latin 1 to be linear 59 */ 60 public IntTrieBuilder(int aliasdata[], int maxdatalength, 61 int initialvalue, int leadunitvalue, 62 boolean latin1linear) 63 { 64 super(); 65 if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear 66 && maxdatalength < 1024)) { 67 throw new IllegalArgumentException( 68 "Argument maxdatalength is too small"); 69 } 70 71 if (aliasdata != null) { 72 m_data_ = aliasdata; 73 } 74 else { 75 m_data_ = new int[maxdatalength]; 76 } 77 78 // preallocate and reset the first data block (block index 0) 79 int j = DATA_BLOCK_LENGTH; 80 81 if (latin1linear) { 82 // preallocate and reset the first block (number 0) and Latin-1 83 // (U+0000..U+00ff) after that made sure above that 84 // maxDataLength >= 1024 85 // set indexes to point to consecutive data blocks 86 int i = 0; 87 do { 88 // do this at least for trie->index[0] even if that block is 89 // only partly used for Latin-1 90 m_index_[i ++] = j; 91 j += DATA_BLOCK_LENGTH; 92 } while (i < (256 >> SHIFT_)); 93 } 94 95 m_dataLength_ = j; 96 // reset the initially allocated blocks to the initial value 97 Arrays.fill(m_data_, 0, m_dataLength_, initialvalue); 98 m_initialValue_ = initialvalue; 99 m_leadUnitValue_ = leadunitvalue; 100 m_dataCapacity_ = maxdatalength; 101 m_isLatin1Linear_ = latin1linear; 102 m_isCompacted_ = false; 103 } 104 105 // public methods ------------------------------------------------------- 106 107 /*public final void print() 108 { 109 int i = 0; 110 int oldvalue = m_index_[i]; 111 int count = 0; 112 System.out.println("index length " + m_indexLength_ 113 + " --------------------------"); 114 while (i < m_indexLength_) { 115 if (m_index_[i] != oldvalue) { 116 System.out.println("index has " + count + " counts of " 117 + Integer.toHexString(oldvalue)); 118 count = 0; 119 oldvalue = m_index_[i]; 120 } 121 count ++; 122 i ++; 123 } 124 System.out.println("index has " + count + " counts of " 125 + Integer.toHexString(oldvalue)); 126 i = 0; 127 oldvalue = m_data_[i]; 128 count = 0; 129 System.out.println("data length " + m_dataLength_ 130 + " --------------------------"); 131 while (i < m_dataLength_) { 132 if (m_data_[i] != oldvalue) { 133 if ((oldvalue & 0xf1000000) == 0xf1000000) { 134 int temp = oldvalue & 0xffffff; 135 temp += 0x320; 136 oldvalue = 0xf1000000 | temp; 137 } 138 if ((oldvalue & 0xf2000000) == 0xf2000000) { 139 int temp = oldvalue & 0xffffff; 140 temp += 0x14a; 141 oldvalue = 0xf2000000 | temp; 142 } 143 System.out.println("data has " + count + " counts of " 144 + Integer.toHexString(oldvalue)); 145 count = 0; 146 oldvalue = m_data_[i]; 147 } 148 count ++; 149 i ++; 150 } 151 if ((oldvalue & 0xf1000000) == 0xf1000000) { 152 int temp = oldvalue & 0xffffff; 153 temp += 0x320; 154 oldvalue = 0xf1000000 | temp; 155 } 156 if ((oldvalue & 0xf2000000) == 0xf2000000) { 157 int temp = oldvalue & 0xffffff; 158 temp += 0x14a; 159 oldvalue = 0xf2000000 | temp; 160 } 161 System.out.println("data has " + count + " counts of " 162 + Integer.toHexString(oldvalue)); 163 } 164 */ 165 /** 166 * Gets a 32 bit data from the table data 167 * @param ch codepoint which data is to be retrieved 168 * @return the 32 bit data 169 */ 170 public int getValue(int ch) 171 { 172 // valid, uncompacted trie and valid c? 173 if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { 174 return 0; 175 } 176 177 int block = m_index_[ch >> SHIFT_]; 178 return m_data_[Math.abs(block) + (ch & MASK_)]; 179 } 180 181 /** 182 * Get a 32 bit data from the table data 183 * @param ch code point for which data is to be retrieved. 184 * @param inBlockZero Output parameter, inBlockZero[0] returns true if the 185 * char maps into block zero, otherwise false. 186 * @return the 32 bit data value. 187 */ 188 public int getValue(int ch, boolean [] inBlockZero) 189 { 190 // valid, uncompacted trie and valid c? 191 if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { 192 if (inBlockZero != null) { 193 inBlockZero[0] = true; 194 } 195 return 0; 196 } 197 198 int block = m_index_[ch >> SHIFT_]; 199 if (inBlockZero != null) { 200 inBlockZero[0] = (block == 0); 201 } 202 return m_data_[Math.abs(block) + (ch & MASK_)]; 203 } 204 205 206 /** 207 * Sets a 32 bit data in the table data 208 * @param ch codepoint which data is to be set 209 * @param value to set 210 * @return true if the set is successful, otherwise 211 * if the table has been compacted return false 212 */ 213 public boolean setValue(int ch, int value) 214 { 215 // valid, uncompacted trie and valid c? 216 if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) { 217 return false; 218 } 219 220 int block = getDataBlock(ch); 221 if (block < 0) { 222 return false; 223 } 224 225 m_data_[block + (ch & MASK_)] = value; 226 return true; 227 } 228 229 /** 230 * Serializes the build table with 32 bit data 231 * @param datamanipulate builder raw fold method implementation 232 * @param triedatamanipulate result trie fold method 233 * @return a new trie 234 */ 235 public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate, 236 Trie.DataManipulate triedatamanipulate) 237 { 238 if (datamanipulate == null) { 239 throw new IllegalArgumentException("Parameters can not be null"); 240 } 241 // fold and compact if necessary, also checks that indexLength is 242 // within limits 243 if (!m_isCompacted_) { 244 // compact once without overlap to improve folding 245 compact(false); 246 // fold the supplementary part of the index array 247 fold(datamanipulate); 248 // compact again with overlap for minimum data array length 249 compact(true); 250 m_isCompacted_ = true; 251 } 252 // is dataLength within limits? 253 if (m_dataLength_ >= MAX_DATA_LENGTH_) { 254 throw new ArrayIndexOutOfBoundsException("Data length too small"); 255 } 256 257 char index[] = new char[m_indexLength_]; 258 int data[] = new int[m_dataLength_]; 259 // write the index (stage 1) array and the 32-bit data (stage 2) array 260 // write 16-bit index values shifted right by INDEX_SHIFT_ 261 for (int i = 0; i < m_indexLength_; i ++) { 262 index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_); 263 } 264 // write 32-bit data values 265 System.arraycopy(m_data_, 0, data, 0, m_dataLength_); 266 267 int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_); 268 options |= OPTIONS_DATA_IS_32_BIT_; 269 if (m_isLatin1Linear_) { 270 options |= OPTIONS_LATIN1_IS_LINEAR_; 271 } 272 return new IntTrie(index, data, m_initialValue_, options, 273 triedatamanipulate); 274 } 275 276 277 /** 278 * Serializes the build table to an output stream. 279 * 280 * Compacts the build-time trie after all values are set, and then 281 * writes the serialized form onto an output stream. 282 * 283 * After this, this build-time Trie can only be serialized again and/or closed; 284 * no further values can be added. 285 * 286 * This function is the rough equivalent of utrie_seriaize() in ICU4C. 287 * 288 * @param os the output stream to which the seriaized trie will be written. 289 * If nul, the function still returns the size of the serialized Trie. 290 * @param reduceTo16Bits If true, reduce the data size to 16 bits. The resulting 291 * serialized form can then be used to create a CharTrie. 292 * @param datamanipulate builder raw fold method implementation 293 * @return the number of bytes written to the output stream. 294 */ 295 public int serialize(OutputStream os, boolean reduceTo16Bits, 296 TrieBuilder.DataManipulate datamanipulate) throws IOException { 297 if (datamanipulate == null) { 298 throw new IllegalArgumentException("Parameters can not be null"); 299 } 300 301 // fold and compact if necessary, also checks that indexLength is 302 // within limits 303 if (!m_isCompacted_) { 304 // compact once without overlap to improve folding 305 compact(false); 306 // fold the supplementary part of the index array 307 fold(datamanipulate); 308 // compact again with overlap for minimum data array length 309 compact(true); 310 m_isCompacted_ = true; 311 } 312 313 // is dataLength within limits? 314 int length; 315 if (reduceTo16Bits) { 316 length = m_dataLength_ + m_indexLength_; 317 } else { 318 length = m_dataLength_; 319 } 320 if (length >= MAX_DATA_LENGTH_) { 321 throw new ArrayIndexOutOfBoundsException("Data length too small"); 322 } 323 324 // struct UTrieHeader { 325 // int32_t signature; 326 // int32_t options (a bit field) 327 // int32_t indexLength 328 // int32_t dataLength 329 length = Trie.HEADER_LENGTH_ + 2*m_indexLength_; 330 if(reduceTo16Bits) { 331 length+=2*m_dataLength_; 332 } else { 333 length+=4*m_dataLength_; 334 } 335 336 if (os == null) { 337 // No output stream. Just return the length of the serialized Trie, in bytes. 338 return length; 339 } 340 341 DataOutputStream dos = new DataOutputStream(os); 342 dos.writeInt(Trie.HEADER_SIGNATURE_); 343 344 int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_); 345 if(!reduceTo16Bits) { 346 options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_; 347 } 348 if(m_isLatin1Linear_) { 349 options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_; 350 } 351 dos.writeInt(options); 352 353 dos.writeInt(m_indexLength_); 354 dos.writeInt(m_dataLength_); 355 356 /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */ 357 if(reduceTo16Bits) { 358 /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */ 359 for (int i=0; i<m_indexLength_; i++) { 360 int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_; 361 dos.writeChar(v); 362 } 363 364 /* write 16-bit data values */ 365 for(int i=0; i<m_dataLength_; i++) { 366 int v = m_data_[i] & 0x0000ffff; 367 dos.writeChar(v); 368 } 369 } else { 370 /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */ 371 for (int i=0; i<m_indexLength_; i++) { 372 int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_; 373 dos.writeChar(v); 374 } 375 376 /* write 32-bit data values */ 377 for(int i=0; i<m_dataLength_; i++) { 378 dos.writeInt(m_data_[i]); 379 } 380 } 381 382 return length; 383 384 } 385 386 387 /** 388 * Set a value in a range of code points [start..limit]. 389 * All code points c with start <= c < limit will get the value if 390 * overwrite is true or if the old value is 0. 391 * @param start the first code point to get the value 392 * @param limit one past the last code point to get the value 393 * @param value the value 394 * @param overwrite flag for whether old non-initial values are to be 395 * overwritten 396 * @return false if a failure occurred (illegal argument or data array 397 * overrun) 398 */ 399 public boolean setRange(int start, int limit, int value, 400 boolean overwrite) 401 { 402 // repeat value in [start..limit[ 403 // mark index values for repeat-data blocks by setting bit 31 of the 404 // index values fill around existing values if any, if(overwrite) 405 406 // valid, uncompacted trie and valid indexes? 407 if (m_isCompacted_ || start < UCharacter.MIN_VALUE 408 || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE 409 || limit > (UCharacter.MAX_VALUE + 1) || start > limit) { 410 return false; 411 } 412 413 if (start == limit) { 414 return true; // nothing to do 415 } 416 417 if ((start & MASK_) != 0) { 418 // set partial block at [start..following block boundary[ 419 int block = getDataBlock(start); 420 if (block < 0) { 421 return false; 422 } 423 424 int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_; 425 if (nextStart <= limit) { 426 fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH, 427 value, overwrite); 428 start = nextStart; 429 } 430 else { 431 fillBlock(block, start & MASK_, limit & MASK_, 432 value, overwrite); 433 return true; 434 } 435 } 436 437 // number of positions in the last, partial block 438 int rest = limit & MASK_; 439 440 // round down limit to a block boundary 441 limit &= ~MASK_; 442 443 // iterate over all-value blocks 444 int repeatBlock = 0; 445 if (value == m_initialValue_) { 446 // repeatBlock = 0; assigned above 447 } 448 else { 449 repeatBlock = -1; 450 } 451 while (start < limit) { 452 // get index value 453 int block = m_index_[start >> SHIFT_]; 454 if (block > 0) { 455 // already allocated, fill in value 456 fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite); 457 } 458 else if (m_data_[-block] != value && (block == 0 || overwrite)) { 459 // set the repeatBlock instead of the current block 0 or range 460 // block 461 if (repeatBlock >= 0) { 462 m_index_[start >> SHIFT_] = -repeatBlock; 463 } 464 else { 465 // create and set and fill the repeatBlock 466 repeatBlock = getDataBlock(start); 467 if (repeatBlock < 0) { 468 return false; 469 } 470 471 // set the negative block number to indicate that it is a 472 // repeat block 473 m_index_[start >> SHIFT_] = -repeatBlock; 474 fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true); 475 } 476 } 477 478 start += DATA_BLOCK_LENGTH; 479 } 480 481 if (rest > 0) { 482 // set partial block at [last block boundary..limit[ 483 int block = getDataBlock(start); 484 if (block < 0) { 485 return false; 486 } 487 fillBlock(block, 0, rest, value, overwrite); 488 } 489 490 return true; 491 } 492 493 // protected data member ------------------------------------------------ 494 495 protected int m_data_[]; 496 protected int m_initialValue_; 497 498 // private data member ------------------------------------------------ 499 500 private int m_leadUnitValue_; 501 502 // private methods ------------------------------------------------------ 503 504 private int allocDataBlock() 505 { 506 int newBlock = m_dataLength_; 507 int newTop = newBlock + DATA_BLOCK_LENGTH; 508 if (newTop > m_dataCapacity_) { 509 // out of memory in the data array 510 return -1; 511 } 512 m_dataLength_ = newTop; 513 return newBlock; 514 } 515 516 /** 517 * No error checking for illegal arguments. 518 * @param ch codepoint to look for 519 * @return -1 if no new data block available (out of memory in data array) 520 */ 521 private int getDataBlock(int ch) 522 { 523 ch >>= SHIFT_; 524 int indexValue = m_index_[ch]; 525 if (indexValue > 0) { 526 return indexValue; 527 } 528 529 // allocate a new data block 530 int newBlock = allocDataBlock(); 531 if (newBlock < 0) { 532 // out of memory in the data array 533 return -1; 534 } 535 m_index_[ch] = newBlock; 536 537 // copy-on-write for a block from a setRange() 538 System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock, 539 DATA_BLOCK_LENGTH << 2); 540 return newBlock; 541 } 542 543 /** 544 * Compact a folded build-time trie. 545 * The compaction 546 * - removes blocks that are identical with earlier ones 547 * - overlaps adjacent blocks as much as possible (if overlap == true) 548 * - moves blocks in steps of the data granularity 549 * - moves and overlaps blocks that overlap with multiple values in the overlap region 550 * 551 * It does not 552 * - try to move and overlap blocks that are not already adjacent 553 * @param overlap flag 554 */ 555 private void compact(boolean overlap) 556 { 557 if (m_isCompacted_) { 558 return; // nothing left to do 559 } 560 561 // compaction 562 // initialize the index map with "block is used/unused" flags 563 findUnusedBlocks(); 564 565 // if Latin-1 is preallocated and linear, then do not compact Latin-1 566 // data 567 int overlapStart = DATA_BLOCK_LENGTH; 568 if (m_isLatin1Linear_ && SHIFT_ <= 8) { 569 overlapStart += 256; 570 } 571 572 int newStart = DATA_BLOCK_LENGTH; 573 int i; 574 for (int start = newStart; start < m_dataLength_;) { 575 // start: index of first entry of current block 576 // newStart: index where the current block is to be moved 577 // (right after current end of already-compacted data) 578 // skip blocks that are not used 579 if (m_map_[start >>> SHIFT_] < 0) { 580 // advance start to the next block 581 start += DATA_BLOCK_LENGTH; 582 // leave newStart with the previous block! 583 continue; 584 } 585 // search for an identical block 586 if (start >= overlapStart) { 587 i = findSameDataBlock(m_data_, newStart, start, 588 overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH); 589 if (i >= 0) { 590 // found an identical block, set the other block's index 591 // value for the current block 592 m_map_[start >>> SHIFT_] = i; 593 // advance start to the next block 594 start += DATA_BLOCK_LENGTH; 595 // leave newStart with the previous block! 596 continue; 597 } 598 } 599 // see if the beginning of this block can be overlapped with the 600 // end of the previous block 601 if(overlap && start>=overlapStart) { 602 /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ 603 for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_; 604 i>0 && !equal_int(m_data_, newStart-i, start, i); 605 i-=DATA_GRANULARITY_) {} 606 } else { 607 i=0; 608 } 609 if (i > 0) { 610 // some overlap 611 m_map_[start >>> SHIFT_] = newStart - i; 612 // move the non-overlapping indexes to their new positions 613 start += i; 614 for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) { 615 m_data_[newStart ++] = m_data_[start ++]; 616 } 617 } 618 else if (newStart < start) { 619 // no overlap, just move the indexes to their new positions 620 m_map_[start >>> SHIFT_] = newStart; 621 for (i = DATA_BLOCK_LENGTH; i > 0; -- i) { 622 m_data_[newStart ++] = m_data_[start ++]; 623 } 624 } 625 else { // no overlap && newStart==start 626 m_map_[start >>> SHIFT_] = start; 627 newStart += DATA_BLOCK_LENGTH; 628 start = newStart; 629 } 630 } 631 // now adjust the index (stage 1) table 632 for (i = 0; i < m_indexLength_; ++ i) { 633 m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_]; 634 } 635 m_dataLength_ = newStart; 636 } 637 638 /** 639 * Find the same data block 640 * @param data array 641 * @param dataLength 642 * @param otherBlock 643 * @param step 644 */ 645 private static final int findSameDataBlock(int data[], int dataLength, 646 int otherBlock, int step) 647 { 648 // ensure that we do not even partially get past dataLength 649 dataLength -= DATA_BLOCK_LENGTH; 650 651 for (int block = 0; block <= dataLength; block += step) { 652 if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) { 653 return block; 654 } 655 } 656 return -1; 657 } 658 659 /** 660 * Fold the normalization data for supplementary code points into 661 * a compact area on top of the BMP-part of the trie index, 662 * with the lead surrogates indexing this compact area. 663 * 664 * Duplicate the index values for lead surrogates: 665 * From inside the BMP area, where some may be overridden with folded values, 666 * to just after the BMP area, where they can be retrieved for 667 * code point lookups. 668 * @param manipulate fold implementation 669 */ 670 private final void fold(DataManipulate manipulate) 671 { 672 int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_]; 673 int index[] = m_index_; 674 // copy the lead surrogate indexes into a temporary array 675 System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0, 676 SURROGATE_BLOCK_COUNT_); 677 678 // set all values for lead surrogate code *units* to leadUnitValue 679 // so that by default runtime lookups will find no data for associated 680 // supplementary code points, unless there is data for such code points 681 // which will result in a non-zero folding value below that is set for 682 // the respective lead units 683 // the above saved the indexes for surrogate code *points* 684 // fill the indexes with simplified code from utrie_setRange32() 685 int block = 0; 686 if (m_leadUnitValue_ == m_initialValue_) { 687 // leadUnitValue == initialValue, use all-initial-value block 688 // block = 0; if block here left empty 689 } 690 else { 691 // create and fill the repeatBlock 692 block = allocDataBlock(); 693 if (block < 0) { 694 // data table overflow 695 throw new IllegalStateException("Internal error: Out of memory space"); 696 } 697 fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true); 698 // negative block number to indicate that it is a repeat block 699 block = -block; 700 } 701 for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) { 702 m_index_[c] = block; 703 } 704 705 // Fold significant index values into the area just after the BMP 706 // indexes. 707 // In case the first lead surrogate has significant data, 708 // its index block must be used first (in which case the folding is a 709 // no-op). 710 // Later all folded index blocks are moved up one to insert the copied 711 // lead surrogate indexes. 712 int indexLength = BMP_INDEX_LENGTH_; 713 // search for any index (stage 1) entries for supplementary code points 714 for (int c = 0x10000; c < 0x110000;) { 715 if (index[c >> SHIFT_] != 0) { 716 // there is data, treat the full block for a lead surrogate 717 c &= ~0x3ff; 718 // is there an identical index block? 719 block = findSameIndexBlock(index, indexLength, c >> SHIFT_); 720 721 // get a folded value for [c..c+0x400[ and, 722 // if different from the value for the lead surrogate code 723 // point, set it for the lead surrogate code unit 724 725 int value = manipulate.getFoldedValue(c, 726 block + SURROGATE_BLOCK_COUNT_); 727 if (value != getValue(UTF16.getLeadSurrogate(c))) { 728 if (!setValue(UTF16.getLeadSurrogate(c), value)) { 729 // data table overflow 730 throw new ArrayIndexOutOfBoundsException( 731 "Data table overflow"); 732 } 733 // if we did not find an identical index block... 734 if (block == indexLength) { 735 // move the actual index (stage 1) entries from the 736 // supplementary position to the new one 737 System.arraycopy(index, c >> SHIFT_, index, indexLength, 738 SURROGATE_BLOCK_COUNT_); 739 indexLength += SURROGATE_BLOCK_COUNT_; 740 } 741 } 742 c += 0x400; 743 } 744 else { 745 c += DATA_BLOCK_LENGTH; 746 } 747 } 748 749 // index array overflow? 750 // This is to guarantee that a folding offset is of the form 751 // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. 752 // If the index is too large, then n>=1024 and more than 10 bits are 753 // necessary. 754 // In fact, it can only ever become n==1024 with completely unfoldable 755 // data and the additional block of duplicated values for lead 756 // surrogates. 757 if (indexLength >= MAX_INDEX_LENGTH_) { 758 throw new ArrayIndexOutOfBoundsException("Index table overflow"); 759 } 760 // make space for the lead surrogate index block and insert it between 761 // the BMP indexes and the folded ones 762 System.arraycopy(index, BMP_INDEX_LENGTH_, index, 763 BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_, 764 indexLength - BMP_INDEX_LENGTH_); 765 System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_, 766 SURROGATE_BLOCK_COUNT_); 767 indexLength += SURROGATE_BLOCK_COUNT_; 768 m_indexLength_ = indexLength; 769 } 770 771 /** 772 * @internal 773 */ 774 private void fillBlock(int block, int start, int limit, int value, 775 boolean overwrite) 776 { 777 limit += block; 778 block += start; 779 if (overwrite) { 780 while (block < limit) { 781 m_data_[block ++] = value; 782 } 783 } 784 else { 785 while (block < limit) { 786 if (m_data_[block] == m_initialValue_) { 787 m_data_[block] = value; 788 } 789 ++ block; 790 } 791 } 792 } 793 } 794 795