Home | History | Annotate | Download | only in impl
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 ******************************************************************************
      6 * Copyright (C) 1996-2010, International Business Machines Corporation and   *
      7 * others. All Rights Reserved.                                               *
      8 ******************************************************************************
      9 */
     10 
     11 package android.icu.impl;
     12 
     13 import java.io.DataOutputStream;
     14 import java.io.IOException;
     15 import java.io.OutputStream;
     16 import java.util.Arrays;
     17 
     18 import android.icu.lang.UCharacter;
     19 import android.icu.text.UTF16;
     20 
     21 /**
     22  * Builder class to manipulate and generate a trie.
     23  * This is useful for ICU data in primitive types.
     24  * Provides a compact way to store information that is indexed by Unicode
     25  * values, such as character properties, types, keyboard values, etc. This is
     26  * very useful when you have a block of Unicode data that contains significant
     27  * values while the rest of the Unicode data is unused in the application or
     28  * when you have a lot of redundance, such as where all 21,000 Han ideographs
     29  * have the same value.  However, lookup is much faster than a hash table.
     30  * A trie of any primitive data type serves two purposes:
     31  * <UL type = round>
     32  *     <LI>Fast access of the indexed values.
     33  *     <LI>Smaller memory footprint.
     34  * </UL>
     35  * This is a direct port from the ICU4C version
     36  * @author             Syn Wee Quek
     37  * @hide Only a subset of ICU is exposed in Android
     38  */
     39 public class IntTrieBuilder extends TrieBuilder
     40 {
     41     // public constructor ----------------------------------------------
     42 
     43     /**
     44      * Copy constructor
     45      */
     46     public IntTrieBuilder(IntTrieBuilder table)
     47     {
     48         super(table);
     49         m_data_ = new int[m_dataCapacity_];
     50         System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
     51         m_initialValue_ = table.m_initialValue_;
     52         m_leadUnitValue_ = table.m_leadUnitValue_;
     53     }
     54 
     55     /**
     56      * Constructs a build table
     57      * @param aliasdata data to be filled into table
     58      * @param maxdatalength maximum data length allowed in table
     59      * @param initialvalue inital data value
     60      * @param latin1linear is latin 1 to be linear
     61      */
     62     public IntTrieBuilder(int aliasdata[], int maxdatalength,
     63                           int initialvalue, int leadunitvalue,
     64                           boolean latin1linear)
     65     {
     66         super();
     67         if (maxdatalength < DATA_BLOCK_LENGTH || (latin1linear
     68                                                   && maxdatalength < 1024)) {
     69             throw new IllegalArgumentException(
     70                                                "Argument maxdatalength is too small");
     71         }
     72 
     73         if (aliasdata != null) {
     74             m_data_ = aliasdata;
     75         }
     76         else {
     77             m_data_ = new int[maxdatalength];
     78         }
     79 
     80         // preallocate and reset the first data block (block index 0)
     81         int j = DATA_BLOCK_LENGTH;
     82 
     83         if (latin1linear) {
     84             // preallocate and reset the first block (number 0) and Latin-1
     85             // (U+0000..U+00ff) after that made sure above that
     86             // maxDataLength >= 1024
     87             // set indexes to point to consecutive data blocks
     88             int i = 0;
     89             do {
     90                 // do this at least for trie->index[0] even if that block is
     91                 // only partly used for Latin-1
     92                 m_index_[i ++] = j;
     93                 j += DATA_BLOCK_LENGTH;
     94             } while (i < (256 >> SHIFT_));
     95         }
     96 
     97         m_dataLength_ = j;
     98         // reset the initially allocated blocks to the initial value
     99         Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
    100         m_initialValue_ = initialvalue;
    101         m_leadUnitValue_ = leadunitvalue;
    102         m_dataCapacity_ = maxdatalength;
    103         m_isLatin1Linear_ = latin1linear;
    104         m_isCompacted_ = false;
    105     }
    106 
    107     // public methods -------------------------------------------------------
    108 
    109     /*public final void print()
    110       {
    111       int i = 0;
    112       int oldvalue = m_index_[i];
    113       int count = 0;
    114       System.out.println("index length " + m_indexLength_
    115       + " --------------------------");
    116       while (i < m_indexLength_) {
    117       if (m_index_[i] != oldvalue) {
    118       System.out.println("index has " + count + " counts of "
    119       + Integer.toHexString(oldvalue));
    120       count = 0;
    121       oldvalue = m_index_[i];
    122       }
    123       count ++;
    124       i ++;
    125       }
    126       System.out.println("index has " + count + " counts of "
    127       + Integer.toHexString(oldvalue));
    128       i = 0;
    129       oldvalue = m_data_[i];
    130       count = 0;
    131       System.out.println("data length " + m_dataLength_
    132       + " --------------------------");
    133       while (i < m_dataLength_) {
    134       if (m_data_[i] != oldvalue) {
    135       if ((oldvalue & 0xf1000000) == 0xf1000000) {
    136       int temp = oldvalue & 0xffffff;
    137       temp += 0x320;
    138       oldvalue = 0xf1000000 | temp;
    139       }
    140       if ((oldvalue & 0xf2000000) == 0xf2000000) {
    141       int temp = oldvalue & 0xffffff;
    142       temp += 0x14a;
    143       oldvalue = 0xf2000000 | temp;
    144       }
    145       System.out.println("data has " + count + " counts of "
    146       + Integer.toHexString(oldvalue));
    147       count = 0;
    148       oldvalue = m_data_[i];
    149       }
    150       count ++;
    151       i ++;
    152       }
    153       if ((oldvalue & 0xf1000000) == 0xf1000000) {
    154       int temp = oldvalue & 0xffffff;
    155       temp += 0x320;
    156       oldvalue = 0xf1000000 | temp;
    157       }
    158       if ((oldvalue & 0xf2000000) == 0xf2000000) {
    159       int temp = oldvalue & 0xffffff;
    160       temp += 0x14a;
    161       oldvalue = 0xf2000000 | temp;
    162       }
    163       System.out.println("data has " + count + " counts of "
    164       + Integer.toHexString(oldvalue));
    165       }
    166     */
    167     /**
    168      * Gets a 32 bit data from the table data
    169      * @param ch codepoint which data is to be retrieved
    170      * @return the 32 bit data
    171      */
    172     public int getValue(int ch)
    173     {
    174         // valid, uncompacted trie and valid c?
    175         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
    176             return 0;
    177         }
    178 
    179         int block = m_index_[ch >> SHIFT_];
    180         return m_data_[Math.abs(block) + (ch & MASK_)];
    181     }
    182 
    183     /**
    184      * Get a 32 bit data from the table data
    185      * @param ch  code point for which data is to be retrieved.
    186      * @param inBlockZero  Output parameter, inBlockZero[0] returns true if the
    187      *                      char maps into block zero, otherwise false.
    188      * @return the 32 bit data value.
    189      */
    190     public int getValue(int ch, boolean [] inBlockZero)
    191     {
    192         // valid, uncompacted trie and valid c?
    193         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
    194             if (inBlockZero != null) {
    195                 inBlockZero[0] = true;
    196             }
    197             return 0;
    198         }
    199 
    200         int block = m_index_[ch >> SHIFT_];
    201         if (inBlockZero != null) {
    202             inBlockZero[0] = (block == 0);
    203         }
    204         return m_data_[Math.abs(block) + (ch & MASK_)];
    205     }
    206 
    207 
    208     /**
    209      * Sets a 32 bit data in the table data
    210      * @param ch codepoint which data is to be set
    211      * @param value to set
    212      * @return true if the set is successful, otherwise
    213      *              if the table has been compacted return false
    214      */
    215     public boolean setValue(int ch, int value)
    216     {
    217         // valid, uncompacted trie and valid c?
    218         if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
    219             return false;
    220         }
    221 
    222         int block = getDataBlock(ch);
    223         if (block < 0) {
    224             return false;
    225         }
    226 
    227         m_data_[block + (ch & MASK_)] = value;
    228         return true;
    229     }
    230 
    231     /**
    232      * Serializes the build table with 32 bit data
    233      * @param datamanipulate builder raw fold method implementation
    234      * @param triedatamanipulate result trie fold method
    235      * @return a new trie
    236      */
    237     public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
    238                              Trie.DataManipulate triedatamanipulate)
    239     {
    240         if (datamanipulate == null) {
    241             throw new IllegalArgumentException("Parameters can not be null");
    242         }
    243         // fold and compact if necessary, also checks that indexLength is
    244         // within limits
    245         if (!m_isCompacted_) {
    246             // compact once without overlap to improve folding
    247             compact(false);
    248             // fold the supplementary part of the index array
    249             fold(datamanipulate);
    250             // compact again with overlap for minimum data array length
    251             compact(true);
    252             m_isCompacted_ = true;
    253         }
    254         // is dataLength within limits?
    255         if (m_dataLength_ >= MAX_DATA_LENGTH_) {
    256             throw new ArrayIndexOutOfBoundsException("Data length too small");
    257         }
    258 
    259         char index[] = new char[m_indexLength_];
    260         int data[] = new int[m_dataLength_];
    261         // write the index (stage 1) array and the 32-bit data (stage 2) array
    262         // write 16-bit index values shifted right by INDEX_SHIFT_
    263         for (int i = 0; i < m_indexLength_; i ++) {
    264             index[i] = (char)(m_index_[i] >>> INDEX_SHIFT_);
    265         }
    266         // write 32-bit data values
    267         System.arraycopy(m_data_, 0, data, 0, m_dataLength_);
    268 
    269         int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
    270         options |= OPTIONS_DATA_IS_32_BIT_;
    271         if (m_isLatin1Linear_) {
    272             options |= OPTIONS_LATIN1_IS_LINEAR_;
    273         }
    274         return new IntTrie(index, data, m_initialValue_, options,
    275                            triedatamanipulate);
    276     }
    277 
    278 
    279     /**
    280      * Serializes the build table to an output stream.
    281      *
    282      * Compacts the build-time trie after all values are set, and then
    283      * writes the serialized form onto an output stream.
    284      *
    285      * After this, this build-time Trie can only be serialized again and/or closed;
    286      * no further values can be added.
    287      *
    288      * This function is the rough equivalent of utrie_seriaize() in ICU4C.
    289      *
    290      * @param os the output stream to which the seriaized trie will be written.
    291      *         If nul, the function still returns the size of the serialized Trie.
    292      * @param reduceTo16Bits If true, reduce the data size to 16 bits.  The resulting
    293      *         serialized form can then be used to create a CharTrie.
    294      * @param datamanipulate builder raw fold method implementation
    295      * @return the number of bytes written to the output stream.
    296      */
    297      public int serialize(OutputStream os, boolean reduceTo16Bits,
    298             TrieBuilder.DataManipulate datamanipulate)  throws IOException {
    299          if (datamanipulate == null) {
    300              throw new IllegalArgumentException("Parameters can not be null");
    301          }
    302 
    303          // fold and compact if necessary, also checks that indexLength is
    304          // within limits
    305          if (!m_isCompacted_) {
    306              // compact once without overlap to improve folding
    307              compact(false);
    308              // fold the supplementary part of the index array
    309              fold(datamanipulate);
    310              // compact again with overlap for minimum data array length
    311              compact(true);
    312              m_isCompacted_ = true;
    313          }
    314 
    315          // is dataLength within limits?
    316          int length;
    317          if (reduceTo16Bits) {
    318              length = m_dataLength_ + m_indexLength_;
    319          } else {
    320              length = m_dataLength_;
    321          }
    322          if (length >= MAX_DATA_LENGTH_) {
    323              throw new ArrayIndexOutOfBoundsException("Data length too small");
    324          }
    325 
    326          //  struct UTrieHeader {
    327          //      int32_t   signature;
    328          //      int32_t   options  (a bit field)
    329          //      int32_t   indexLength
    330          //      int32_t   dataLength
    331          length = Trie.HEADER_LENGTH_ + 2*m_indexLength_;
    332          if(reduceTo16Bits) {
    333              length+=2*m_dataLength_;
    334          } else {
    335              length+=4*m_dataLength_;
    336          }
    337 
    338          if (os == null) {
    339              // No output stream.  Just return the length of the serialized Trie, in bytes.
    340              return length;
    341          }
    342 
    343          DataOutputStream dos = new DataOutputStream(os);
    344          dos.writeInt(Trie.HEADER_SIGNATURE_);
    345 
    346          int options = Trie.INDEX_STAGE_1_SHIFT_ | (Trie.INDEX_STAGE_2_SHIFT_<<Trie.HEADER_OPTIONS_INDEX_SHIFT_);
    347          if(!reduceTo16Bits) {
    348              options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_;
    349          }
    350          if(m_isLatin1Linear_) {
    351              options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_;
    352          }
    353          dos.writeInt(options);
    354 
    355          dos.writeInt(m_indexLength_);
    356          dos.writeInt(m_dataLength_);
    357 
    358          /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
    359          if(reduceTo16Bits) {
    360              /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
    361              for (int i=0; i<m_indexLength_; i++) {
    362                  int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_;
    363                  dos.writeChar(v);
    364              }
    365 
    366              /* write 16-bit data values */
    367              for(int i=0; i<m_dataLength_; i++) {
    368                  int v = m_data_[i] & 0x0000ffff;
    369                  dos.writeChar(v);
    370              }
    371          } else {
    372              /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
    373              for (int i=0; i<m_indexLength_; i++) {
    374                  int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_;
    375                  dos.writeChar(v);
    376              }
    377 
    378              /* write 32-bit data values */
    379              for(int i=0; i<m_dataLength_; i++) {
    380                  dos.writeInt(m_data_[i]);
    381              }
    382         }
    383 
    384          return length;
    385 
    386     }
    387 
    388 
    389     /**
    390      * Set a value in a range of code points [start..limit].
    391      * All code points c with start &lt;= c &lt; limit will get the value if
    392      * overwrite is true or if the old value is 0.
    393      * @param start the first code point to get the value
    394      * @param limit one past the last code point to get the value
    395      * @param value the value
    396      * @param overwrite flag for whether old non-initial values are to be
    397      *        overwritten
    398      * @return false if a failure occurred (illegal argument or data array
    399      *               overrun)
    400      */
    401     public boolean setRange(int start, int limit, int value,
    402                             boolean overwrite)
    403     {
    404         // repeat value in [start..limit[
    405         // mark index values for repeat-data blocks by setting bit 31 of the
    406         // index values fill around existing values if any, if(overwrite)
    407 
    408         // valid, uncompacted trie and valid indexes?
    409         if (m_isCompacted_ || start < UCharacter.MIN_VALUE
    410             || start > UCharacter.MAX_VALUE || limit < UCharacter.MIN_VALUE
    411             || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
    412             return false;
    413         }
    414 
    415         if (start == limit) {
    416             return true; // nothing to do
    417         }
    418 
    419         if ((start & MASK_) != 0) {
    420             // set partial block at [start..following block boundary[
    421             int block = getDataBlock(start);
    422             if (block < 0) {
    423                 return false;
    424             }
    425 
    426             int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
    427             if (nextStart <= limit) {
    428                 fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
    429                           value, overwrite);
    430                 start = nextStart;
    431             }
    432             else {
    433                 fillBlock(block, start & MASK_, limit & MASK_,
    434                           value, overwrite);
    435                 return true;
    436             }
    437         }
    438 
    439         // number of positions in the last, partial block
    440         int rest = limit & MASK_;
    441 
    442         // round down limit to a block boundary
    443         limit &= ~MASK_;
    444 
    445         // iterate over all-value blocks
    446         int repeatBlock = 0;
    447         if (value == m_initialValue_) {
    448             // repeatBlock = 0; assigned above
    449         }
    450         else {
    451             repeatBlock = -1;
    452         }
    453         while (start < limit) {
    454             // get index value
    455             int block = m_index_[start >> SHIFT_];
    456             if (block > 0) {
    457                 // already allocated, fill in value
    458                 fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
    459             }
    460             else if (m_data_[-block] != value && (block == 0 || overwrite)) {
    461                 // set the repeatBlock instead of the current block 0 or range
    462                 // block
    463                 if (repeatBlock >= 0) {
    464                     m_index_[start >> SHIFT_] = -repeatBlock;
    465                 }
    466                 else {
    467                     // create and set and fill the repeatBlock
    468                     repeatBlock = getDataBlock(start);
    469                     if (repeatBlock < 0) {
    470                         return false;
    471                     }
    472 
    473                     // set the negative block number to indicate that it is a
    474                     // repeat block
    475                     m_index_[start >> SHIFT_] = -repeatBlock;
    476                     fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value, true);
    477                 }
    478             }
    479 
    480             start += DATA_BLOCK_LENGTH;
    481         }
    482 
    483         if (rest > 0) {
    484             // set partial block at [last block boundary..limit[
    485             int block = getDataBlock(start);
    486             if (block < 0) {
    487                 return false;
    488             }
    489             fillBlock(block, 0, rest, value, overwrite);
    490         }
    491 
    492         return true;
    493     }
    494 
    495     // protected data member ------------------------------------------------
    496 
    497     protected int m_data_[];
    498     protected int m_initialValue_;
    499 
    500     //  private data member ------------------------------------------------
    501 
    502     private int m_leadUnitValue_;
    503 
    504     // private methods ------------------------------------------------------
    505 
    506     private int allocDataBlock()
    507     {
    508         int newBlock = m_dataLength_;
    509         int newTop = newBlock + DATA_BLOCK_LENGTH;
    510         if (newTop > m_dataCapacity_) {
    511             // out of memory in the data array
    512             return -1;
    513         }
    514         m_dataLength_ = newTop;
    515         return newBlock;
    516     }
    517 
    518     /**
    519      * No error checking for illegal arguments.
    520      * @param ch codepoint to look for
    521      * @return -1 if no new data block available (out of memory in data array)
    522      */
    523     private int getDataBlock(int ch)
    524     {
    525         ch >>= SHIFT_;
    526         int indexValue = m_index_[ch];
    527         if (indexValue > 0) {
    528             return indexValue;
    529         }
    530 
    531         // allocate a new data block
    532         int newBlock = allocDataBlock();
    533         if (newBlock < 0) {
    534             // out of memory in the data array
    535             return -1;
    536         }
    537         m_index_[ch] = newBlock;
    538 
    539         // copy-on-write for a block from a setRange()
    540         System.arraycopy(m_data_, Math.abs(indexValue), m_data_, newBlock,
    541                          DATA_BLOCK_LENGTH << 2);
    542         return newBlock;
    543     }
    544 
    545     /**
    546      * Compact a folded build-time trie.
    547      * The compaction
    548      * - removes blocks that are identical with earlier ones
    549      * - overlaps adjacent blocks as much as possible (if overlap == true)
    550      * - moves blocks in steps of the data granularity
    551      * - moves and overlaps blocks that overlap with multiple values in the overlap region
    552      *
    553      * It does not
    554      * - try to move and overlap blocks that are not already adjacent
    555      * @param overlap flag
    556      */
    557     private void compact(boolean overlap)
    558     {
    559         if (m_isCompacted_) {
    560             return; // nothing left to do
    561         }
    562 
    563         // compaction
    564         // initialize the index map with "block is used/unused" flags
    565         findUnusedBlocks();
    566 
    567         // if Latin-1 is preallocated and linear, then do not compact Latin-1
    568         // data
    569         int overlapStart = DATA_BLOCK_LENGTH;
    570         if (m_isLatin1Linear_ && SHIFT_ <= 8) {
    571             overlapStart += 256;
    572         }
    573 
    574         int newStart = DATA_BLOCK_LENGTH;
    575         int i;
    576         for (int start = newStart; start < m_dataLength_;) {
    577             // start: index of first entry of current block
    578             // newStart: index where the current block is to be moved
    579             //           (right after current end of already-compacted data)
    580             // skip blocks that are not used
    581             if (m_map_[start >>> SHIFT_] < 0) {
    582                 // advance start to the next block
    583                 start += DATA_BLOCK_LENGTH;
    584                 // leave newStart with the previous block!
    585                 continue;
    586             }
    587             // search for an identical block
    588             if (start >= overlapStart) {
    589                 i = findSameDataBlock(m_data_, newStart, start,
    590                                           overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
    591                 if (i >= 0) {
    592                     // found an identical block, set the other block's index
    593                     // value for the current block
    594                     m_map_[start >>> SHIFT_] = i;
    595                     // advance start to the next block
    596                     start += DATA_BLOCK_LENGTH;
    597                     // leave newStart with the previous block!
    598                     continue;
    599                 }
    600             }
    601             // see if the beginning of this block can be overlapped with the
    602             // end of the previous block
    603             if(overlap && start>=overlapStart) {
    604                 /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
    605                 for(i=DATA_BLOCK_LENGTH-DATA_GRANULARITY_;
    606                     i>0 && !equal_int(m_data_, newStart-i, start, i);
    607                     i-=DATA_GRANULARITY_) {}
    608             } else {
    609                 i=0;
    610             }
    611             if (i > 0) {
    612                 // some overlap
    613                 m_map_[start >>> SHIFT_] = newStart - i;
    614                 // move the non-overlapping indexes to their new positions
    615                 start += i;
    616                 for (i = DATA_BLOCK_LENGTH - i; i > 0; -- i) {
    617                     m_data_[newStart ++] = m_data_[start ++];
    618                 }
    619             }
    620             else if (newStart < start) {
    621                 // no overlap, just move the indexes to their new positions
    622                 m_map_[start >>> SHIFT_] = newStart;
    623                 for (i = DATA_BLOCK_LENGTH; i > 0; -- i) {
    624                     m_data_[newStart ++] = m_data_[start ++];
    625                 }
    626             }
    627             else { // no overlap && newStart==start
    628                 m_map_[start >>> SHIFT_] = start;
    629                 newStart += DATA_BLOCK_LENGTH;
    630                 start = newStart;
    631             }
    632         }
    633         // now adjust the index (stage 1) table
    634         for (i = 0; i < m_indexLength_; ++ i) {
    635             m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
    636         }
    637         m_dataLength_ = newStart;
    638     }
    639 
    640     /**
    641      * Find the same data block
    642      * @param data array
    643      * @param dataLength
    644      * @param otherBlock
    645      * @param step
    646      */
    647     private static final int findSameDataBlock(int data[], int dataLength,
    648                                                int otherBlock, int step)
    649     {
    650         // ensure that we do not even partially get past dataLength
    651         dataLength -= DATA_BLOCK_LENGTH;
    652 
    653         for (int block = 0; block <= dataLength; block += step) {
    654             if(equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
    655                 return block;
    656             }
    657         }
    658         return -1;
    659     }
    660 
    661     /**
    662      * Fold the normalization data for supplementary code points into
    663      * a compact area on top of the BMP-part of the trie index,
    664      * with the lead surrogates indexing this compact area.
    665      *
    666      * Duplicate the index values for lead surrogates:
    667      * From inside the BMP area, where some may be overridden with folded values,
    668      * to just after the BMP area, where they can be retrieved for
    669      * code point lookups.
    670      * @param manipulate fold implementation
    671      */
    672     private final void fold(DataManipulate manipulate)
    673     {
    674         int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
    675         int index[] = m_index_;
    676         // copy the lead surrogate indexes into a temporary array
    677         System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
    678                          SURROGATE_BLOCK_COUNT_);
    679 
    680         // set all values for lead surrogate code *units* to leadUnitValue
    681         // so that by default runtime lookups will find no data for associated
    682         // supplementary code points, unless there is data for such code points
    683         // which will result in a non-zero folding value below that is set for
    684         // the respective lead units
    685         // the above saved the indexes for surrogate code *points*
    686         // fill the indexes with simplified code from utrie_setRange32()
    687         int block = 0;
    688         if (m_leadUnitValue_ == m_initialValue_) {
    689             // leadUnitValue == initialValue, use all-initial-value block
    690             // block = 0; if block here left empty
    691         }
    692         else {
    693             // create and fill the repeatBlock
    694             block = allocDataBlock();
    695             if (block < 0) {
    696                 // data table overflow
    697                 throw new IllegalStateException("Internal error: Out of memory space");
    698             }
    699             fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_, true);
    700             // negative block number to indicate that it is a repeat block
    701             block = -block;
    702         }
    703         for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++ c) {
    704             m_index_[c] = block;
    705         }
    706 
    707         // Fold significant index values into the area just after the BMP
    708         // indexes.
    709         // In case the first lead surrogate has significant data,
    710         // its index block must be used first (in which case the folding is a
    711         // no-op).
    712         // Later all folded index blocks are moved up one to insert the copied
    713         // lead surrogate indexes.
    714         int indexLength = BMP_INDEX_LENGTH_;
    715         // search for any index (stage 1) entries for supplementary code points
    716         for (int c = 0x10000; c < 0x110000;) {
    717             if (index[c >> SHIFT_] != 0) {
    718                 // there is data, treat the full block for a lead surrogate
    719                 c &= ~0x3ff;
    720                 // is there an identical index block?
    721                 block = findSameIndexBlock(index, indexLength, c >> SHIFT_);
    722 
    723                 // get a folded value for [c..c+0x400[ and,
    724                 // if different from the value for the lead surrogate code
    725                 // point, set it for the lead surrogate code unit
    726 
    727                 int value = manipulate.getFoldedValue(c,
    728                                                       block + SURROGATE_BLOCK_COUNT_);
    729                 if (value != getValue(UTF16.getLeadSurrogate(c))) {
    730                     if (!setValue(UTF16.getLeadSurrogate(c), value)) {
    731                         // data table overflow
    732                         throw new ArrayIndexOutOfBoundsException(
    733                                                                  "Data table overflow");
    734                     }
    735                     // if we did not find an identical index block...
    736                     if (block == indexLength) {
    737                         // move the actual index (stage 1) entries from the
    738                         // supplementary position to the new one
    739                         System.arraycopy(index, c >> SHIFT_, index, indexLength,
    740                                          SURROGATE_BLOCK_COUNT_);
    741                         indexLength += SURROGATE_BLOCK_COUNT_;
    742                     }
    743                 }
    744                 c += 0x400;
    745             }
    746             else {
    747                 c += DATA_BLOCK_LENGTH;
    748             }
    749         }
    750 
    751         // index array overflow?
    752         // This is to guarantee that a folding offset is of the form
    753         // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
    754         // If the index is too large, then n>=1024 and more than 10 bits are
    755         // necessary.
    756         // In fact, it can only ever become n==1024 with completely unfoldable
    757         // data and the additional block of duplicated values for lead
    758         // surrogates.
    759         if (indexLength >= MAX_INDEX_LENGTH_) {
    760             throw new ArrayIndexOutOfBoundsException("Index table overflow");
    761         }
    762         // make space for the lead surrogate index block and insert it between
    763         // the BMP indexes and the folded ones
    764         System.arraycopy(index, BMP_INDEX_LENGTH_, index,
    765                          BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_,
    766                          indexLength - BMP_INDEX_LENGTH_);
    767         System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
    768                          SURROGATE_BLOCK_COUNT_);
    769         indexLength += SURROGATE_BLOCK_COUNT_;
    770         m_indexLength_ = indexLength;
    771     }
    772 
    773     /**
    774      * @hide draft / provisional / internal are hidden on Android
    775      */
    776     private void fillBlock(int block, int start, int limit, int value,
    777                            boolean overwrite)
    778     {
    779         limit += block;
    780         block += start;
    781         if (overwrite) {
    782             while (block < limit) {
    783                 m_data_[block ++] = value;
    784             }
    785         }
    786         else {
    787             while (block < limit) {
    788                 if (m_data_[block] == m_initialValue_) {
    789                     m_data_[block] = value;
    790                 }
    791                 ++ block;
    792             }
    793         }
    794     }
    795 }
    796 
    797