Home | History | Annotate | Download | only in coll
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2012-2015, International Business Machines
      6  * Corporation and others.  All Rights Reserved.
      7  *******************************************************************************
      8  * CollationKeys.java, ported from collationkeys.h/.cpp
      9  *
     10  * C++ version created on: 2012sep02
     11  * created by: Markus W. Scherer
     12  */
     13 
     14 package com.ibm.icu.impl.coll;
     15 
     16 import com.ibm.icu.text.Collator;
     17 
     18 public final class CollationKeys /* all methods are static */ {
     19 
     20     // Java porting note: C++ SortKeyByteSink class extends a common class ByteSink,
     21     // which is not available in Java. We don't need a super class created for implementing
     22     // collation features.
     23     public static abstract class SortKeyByteSink {
     24         protected byte[] buffer_;
     25         // protected int capacity_; == buffer_.length
     26         private int appended_ = 0;
     27         // not used in Java -- private int ignore_ = 0;
     28 
     29         public SortKeyByteSink(byte[] dest) {
     30             buffer_ = dest;
     31         }
     32 
     33         /**
     34          * Needed in Java for when we write to the buffer directly.
     35          * In C++, the SortKeyByteSink is a subclass of ByteSink and lower-level code can write to that.
     36          * TODO: Can we make Java SortKeyByteSink have-a ByteArrayWrapper and write through to it?
     37          * Or maybe create interface ByteSink, have SortKeyByteSink implement it, and have BOCSU write to that??
     38          */
     39         public void setBufferAndAppended(byte[] dest, int app) {
     40             buffer_ = dest;
     41             appended_ = app;
     42         }
     43 
     44         /* not used in Java -- public void IgnoreBytes(int numIgnore) {
     45             ignore_ = numIgnore;
     46         } */
     47 
     48         /**
     49          * @param bytes
     50          *            the array of byte
     51          * @param n
     52          *            the length of bytes to be appended
     53          */
     54         public void Append(byte[] bytes, int n) {
     55             if (n <= 0 || bytes == null) {
     56                 return;
     57             }
     58 
     59             /* not used in Java -- if (ignore_ > 0) {
     60                 int ignoreRest = ignore_ - n;
     61                 if (ignoreRest >= 0) {
     62                     ignore_ = ignoreRest;
     63                     return;
     64                 } else {
     65                     start = ignore_;
     66                     n = -ignoreRest;
     67                     ignore_ = 0;
     68                 }
     69             } */
     70 
     71             int length = appended_;
     72             appended_ += n;
     73 
     74             int available = buffer_.length - length;
     75             if (n <= available) {
     76                 System.arraycopy(bytes, 0, buffer_, length, n);
     77             } else {
     78                 AppendBeyondCapacity(bytes, 0, n, length);
     79             }
     80         }
     81 
     82         public void Append(int b) {
     83             /* not used in Java -- if (ignore_ > 0) {
     84                 --ignore_;
     85             } else */ {
     86                 if (appended_ < buffer_.length || Resize(1, appended_)) {
     87                     buffer_[appended_] = (byte) b;
     88                 }
     89                 ++appended_;
     90             }
     91         }
     92 
     93         // Java porting note: This method is not used by collator implementation.
     94         //
     95         // virtual char *GetAppendBuffer(int min_capacity,
     96         // int desired_capacity_hint,
     97         // char *scratch, int scratch_capacity,
     98         // int *result_capacity);
     99 
    100         public int NumberOfBytesAppended() {
    101             return appended_;
    102         }
    103 
    104         public int GetRemainingCapacity() {
    105             return /* not used in Java -- ignore_ + */ buffer_.length - appended_;
    106         }
    107 
    108         public boolean Overflowed() {
    109             return appended_ > buffer_.length;
    110         }
    111 
    112         /* not used in Java -- public boolean IsOk() {
    113             return true;
    114         } */
    115 
    116         /**
    117          * @param bytes
    118          *            the array of byte
    119          * @param start
    120          *            the start index within the array to be appended
    121          * @param n
    122          *            the length of bytes to be appended
    123          * @param length
    124          *            the length of buffer required to store the entire data (i.e. already appended
    125          *            bytes + bytes to be appended by this method)
    126          */
    127         protected abstract void AppendBeyondCapacity(byte[] bytes, int start, int n, int length);
    128 
    129         protected abstract boolean Resize(int appendCapacity, int length);
    130     }
    131 
    132     public static class LevelCallback {
    133         /**
    134          * @param level
    135          *            The next level about to be written to the ByteSink.
    136          * @return true if the level is to be written (the base class implementation always returns
    137          *         true)
    138          */
    139         boolean needToWrite(int level) {
    140             return true;
    141         }
    142     }
    143     public static final LevelCallback SIMPLE_LEVEL_FALLBACK = new LevelCallback();
    144 
    145     private static final class SortKeyLevel {
    146         private static final int INITIAL_CAPACITY = 40;
    147 
    148         byte[] buffer = new byte[INITIAL_CAPACITY];
    149         int len = 0;
    150         // not used in Java -- private static final boolean ok = true;  // In C++ "ok" is reset when memory allocations fail.
    151 
    152         SortKeyLevel() {
    153         }
    154 
    155         /* not used in Java -- boolean isOk() {
    156             return ok;
    157         } */
    158 
    159         boolean isEmpty() {
    160             return len == 0;
    161         }
    162 
    163         int length() {
    164             return len;
    165         }
    166 
    167         // Java porting note: Java uses this instead of C++ operator [] overload
    168         // uint8_t operator[](int index)
    169         byte getAt(int index) {
    170             return buffer[index];
    171         }
    172 
    173         byte[] data() {
    174             return buffer;
    175         }
    176 
    177         void appendByte(int b) {
    178             if (len < buffer.length || ensureCapacity(1)) {
    179                 buffer[len++] = (byte) b;
    180             }
    181         }
    182 
    183         void appendWeight16(int w) {
    184             assert ((w & 0xffff) != 0);
    185             byte b0 = (byte) (w >>> 8);
    186             byte b1 = (byte) w;
    187             int appendLength = (b1 == 0) ? 1 : 2;
    188             if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
    189                 buffer[len++] = b0;
    190                 if (b1 != 0) {
    191                     buffer[len++] = b1;
    192                 }
    193             }
    194         }
    195 
    196         void appendWeight32(long w) {
    197             assert (w != 0);
    198             byte[] bytes = new byte[] { (byte) (w >>> 24), (byte) (w >>> 16), (byte) (w >>> 8),
    199                     (byte) w };
    200             int appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
    201             if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
    202                 buffer[len++] = bytes[0];
    203                 if (bytes[1] != 0) {
    204                     buffer[len++] = bytes[1];
    205                     if (bytes[2] != 0) {
    206                         buffer[len++] = bytes[2];
    207                         if (bytes[3] != 0) {
    208                             buffer[len++] = bytes[3];
    209                         }
    210                     }
    211                 }
    212             }
    213         }
    214 
    215         void appendReverseWeight16(int w) {
    216             assert ((w & 0xffff) != 0);
    217             byte b0 = (byte) (w >>> 8);
    218             byte b1 = (byte) w;
    219             int appendLength = (b1 == 0) ? 1 : 2;
    220             if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
    221                 if (b1 == 0) {
    222                     buffer[len++] = b0;
    223                 } else {
    224                     buffer[len] = b1;
    225                     buffer[len + 1] = b0;
    226                     len += 2;
    227                 }
    228             }
    229         }
    230 
    231         // Appends all but the last byte to the sink. The last byte should be the 01 terminator.
    232         void appendTo(SortKeyByteSink sink) {
    233             assert (len > 0 && buffer[len - 1] == 1);
    234             sink.Append(buffer, len - 1);
    235         }
    236 
    237         private boolean ensureCapacity(int appendCapacity) {
    238             /* not used in Java -- if (!ok) {
    239                 return false;
    240             } */
    241             int newCapacity = 2 * buffer.length;
    242             int altCapacity = len + 2 * appendCapacity;
    243             if (newCapacity < altCapacity) {
    244                 newCapacity = altCapacity;
    245             }
    246             if (newCapacity < 200) {
    247                 newCapacity = 200;
    248             }
    249             byte[] newbuf = new byte[newCapacity];
    250             System.arraycopy(buffer, 0, newbuf, 0, len);
    251             buffer = newbuf;
    252 
    253             return true;
    254         }
    255     }
    256 
    257     private static SortKeyLevel getSortKeyLevel(int levels, int level) {
    258         return (levels & level) != 0 ? new SortKeyLevel() : null;
    259     }
    260 
    261     private CollationKeys() {
    262     } // no instantiation
    263 
    264     // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
    265     private static final int SEC_COMMON_LOW = Collation.COMMON_BYTE;
    266     private static final int SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
    267     static final int SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; // read by CollationDataReader
    268     private static final int SEC_COMMON_MAX_COUNT = 0x21;
    269 
    270     // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
    271     private static final int CASE_LOWER_FIRST_COMMON_LOW = 1;
    272     private static final int CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
    273     private static final int CASE_LOWER_FIRST_COMMON_HIGH = 13;
    274     private static final int CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
    275 
    276     // Case level, upperFirst: Compress up to 13 common weights as 3..15.
    277     private static final int CASE_UPPER_FIRST_COMMON_LOW = 3;
    278     @SuppressWarnings("unused")
    279     private static final int CASE_UPPER_FIRST_COMMON_HIGH = 15;
    280     private static final int CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
    281 
    282     // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
    283     private static final int TER_ONLY_COMMON_LOW = Collation.COMMON_BYTE;
    284     private static final int TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
    285     private static final int TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
    286     private static final int TER_ONLY_COMMON_MAX_COUNT = 0x61;
    287 
    288     // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
    289     private static final int TER_LOWER_FIRST_COMMON_LOW = Collation.COMMON_BYTE;
    290     private static final int TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
    291     private static final int TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
    292     private static final int TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
    293 
    294     // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
    295     private static final int TER_UPPER_FIRST_COMMON_LOW = Collation.COMMON_BYTE + 0x80;
    296     private static final int TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
    297     private static final int TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
    298     private static final int TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
    299 
    300     // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
    301     private static final int QUAT_COMMON_LOW = 0x1c;
    302     private static final int QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
    303     private static final int QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
    304     private static final int QUAT_COMMON_MAX_COUNT = 0x71;
    305     // Primary weights shifted to quaternary level must be encoded with
    306     // a lead byte below the common-weight compression range.
    307     private static final int QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b
    308 
    309     /**
    310      * Map from collation strength (UColAttributeValue) to a mask of Collation.Level bits up to that
    311      * strength, excluding the CASE_LEVEL which is independent of the strength, and excluding
    312      * IDENTICAL_LEVEL which this function does not write.
    313      */
    314     private static final int levelMasks[] = new int[] {
    315         2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
    316         6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
    317         0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
    318         0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
    319         0, 0, 0, 0,
    320         0, 0, 0, 0,
    321         0, 0, 0,
    322         0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
    323     };
    324 
    325     /**
    326      * Writes the sort key bytes for minLevel up to the iterator data's strength. Optionally writes
    327      * the case level. Stops writing levels when callback.needToWrite(level) returns false.
    328      * Separates levels with the LEVEL_SEPARATOR_BYTE but does not write a TERMINATOR_BYTE.
    329      */
    330     public static void writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes,
    331             CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback,
    332             boolean preflight) {
    333 
    334         int options = settings.options;
    335         // Set of levels to process and write.
    336         int levels = levelMasks[CollationSettings.getStrength(options)];
    337         if ((options & CollationSettings.CASE_LEVEL) != 0) {
    338             levels |= Collation.CASE_LEVEL_FLAG;
    339         }
    340         // Minus the levels below minLevel.
    341         levels &= ~((1 << minLevel) - 1);
    342         if (levels == 0) {
    343             return;
    344         }
    345 
    346         long variableTop;
    347         if ((options & CollationSettings.ALTERNATE_MASK) == 0) {
    348             variableTop = 0;
    349         } else {
    350             // +1 so that we can use "<" and primary ignorables test out early.
    351             variableTop = settings.variableTop + 1;
    352         }
    353 
    354         int tertiaryMask = CollationSettings.getTertiaryMask(options);
    355 
    356         byte[] p234 = new byte[3];
    357         SortKeyLevel cases = getSortKeyLevel(levels, Collation.CASE_LEVEL_FLAG);
    358         SortKeyLevel secondaries = getSortKeyLevel(levels, Collation.SECONDARY_LEVEL_FLAG);
    359         SortKeyLevel tertiaries = getSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG);
    360         SortKeyLevel quaternaries = getSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG);
    361 
    362         long prevReorderedPrimary = 0;  // 0==no compression
    363         int commonCases = 0;
    364         int commonSecondaries = 0;
    365         int commonTertiaries = 0;
    366         int commonQuaternaries = 0;
    367 
    368         int prevSecondary = 0;
    369         int secSegmentStart = 0;
    370 
    371         for (;;) {
    372             // No need to keep all CEs in the buffer when we write a sort key.
    373             iter.clearCEsIfNoneRemaining();
    374             long ce = iter.nextCE();
    375             long p = ce >>> 32;
    376             if (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY) {
    377                 // Variable CE, shift it to quaternary level.
    378                 // Ignore all following primary ignorables, and shift further variable CEs.
    379                 if (commonQuaternaries != 0) {
    380                     --commonQuaternaries;
    381                     while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
    382                         quaternaries.appendByte(QUAT_COMMON_MIDDLE);
    383                         commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
    384                     }
    385                     // Shifted primary weights are lower than the common weight.
    386                     quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
    387                     commonQuaternaries = 0;
    388                 }
    389                 do {
    390                     if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
    391                         if (settings.hasReordering()) {
    392                             p = settings.reorder(p);
    393                         }
    394                         if (((int) p >>> 24) >= QUAT_SHIFTED_LIMIT_BYTE) {
    395                             // Prevent shifted primary lead bytes from
    396                             // overlapping with the common compression range.
    397                             quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
    398                         }
    399                         quaternaries.appendWeight32(p);
    400                     }
    401                     do {
    402                         ce = iter.nextCE();
    403                         p = ce >>> 32;
    404                     } while (p == 0);
    405                 } while (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY);
    406             }
    407             // ce could be primary ignorable, or NO_CE, or the merge separator,
    408             // or a regular primary CE, but it is not variable.
    409             // If ce==NO_CE, then write nothing for the primary level but
    410             // terminate compression on all levels and then exit the loop.
    411             if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) {
    412                 // Test the un-reordered primary for compressibility.
    413                 boolean isCompressible = compressibleBytes[(int) p >>> 24];
    414                 if(settings.hasReordering()) {
    415                     p = settings.reorder(p);
    416                 }
    417                 int p1 = (int) p >>> 24;
    418                 if (!isCompressible || p1 != ((int) prevReorderedPrimary >>> 24)) {
    419                     if (prevReorderedPrimary != 0) {
    420                         if (p < prevReorderedPrimary) {
    421                             // No primary compression terminator
    422                             // at the end of the level or merged segment.
    423                             if (p1 > Collation.MERGE_SEPARATOR_BYTE) {
    424                                 sink.Append(Collation.PRIMARY_COMPRESSION_LOW_BYTE);
    425                             }
    426                         } else {
    427                             sink.Append(Collation.PRIMARY_COMPRESSION_HIGH_BYTE);
    428                         }
    429                     }
    430                     sink.Append(p1);
    431                     if(isCompressible) {
    432                         prevReorderedPrimary = p;
    433                     } else {
    434                         prevReorderedPrimary = 0;
    435                     }
    436                 }
    437                 byte p2 = (byte) (p >>> 16);
    438                 if (p2 != 0) {
    439                     p234[0] = p2;
    440                     p234[1] = (byte) (p >>> 8);
    441                     p234[2] = (byte) p;
    442                     sink.Append(p234, (p234[1] == 0) ? 1 : (p234[2] == 0) ? 2 : 3);
    443                 }
    444                 // Optimization for internalNextSortKeyPart():
    445                 // When the primary level overflows we can stop because we need not
    446                 // calculate (preflight) the whole sort key length.
    447                 if (!preflight && sink.Overflowed()) {
    448                     // not used in Java -- if (!sink.IsOk()) {
    449                     // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in
    450                     // C implementation. IsOk() in Java always returns true, so this
    451                     // is a dead code.
    452                     return;
    453                 }
    454             }
    455 
    456             int lower32 = (int) ce;
    457             if (lower32 == 0) {
    458                 continue;
    459             } // completely ignorable, no secondary/case/tertiary/quaternary
    460 
    461             if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) {
    462                 int s = lower32 >>> 16;  // 16 bits
    463                 if (s == 0) {
    464                     // secondary ignorable
    465                 } else if (s == Collation.COMMON_WEIGHT16 &&
    466                         ((options & CollationSettings.BACKWARD_SECONDARY) == 0 ||
    467                             p != Collation.MERGE_SEPARATOR_PRIMARY)) {
    468                     // s is a common secondary weight, and
    469                     // backwards-secondary is off or the ce is not the merge separator.
    470                     ++commonSecondaries;
    471                 } else if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) {
    472                     if (commonSecondaries != 0) {
    473                         --commonSecondaries;
    474                         while (commonSecondaries >= SEC_COMMON_MAX_COUNT) {
    475                             secondaries.appendByte(SEC_COMMON_MIDDLE);
    476                             commonSecondaries -= SEC_COMMON_MAX_COUNT;
    477                         }
    478                         int b;
    479                         if (s < Collation.COMMON_WEIGHT16) {
    480                             b = SEC_COMMON_LOW + commonSecondaries;
    481                         } else {
    482                             b = SEC_COMMON_HIGH - commonSecondaries;
    483                         }
    484                         secondaries.appendByte(b);
    485                         commonSecondaries = 0;
    486                     }
    487                     secondaries.appendWeight16(s);
    488                 } else {
    489                     if (commonSecondaries != 0) {
    490                         --commonSecondaries;
    491                         // Append reverse weights. The level will be re-reversed later.
    492                         int remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
    493                         int b;
    494                         if (prevSecondary < Collation.COMMON_WEIGHT16) {
    495                             b = SEC_COMMON_LOW + remainder;
    496                         } else {
    497                             b = SEC_COMMON_HIGH - remainder;
    498                         }
    499                         secondaries.appendByte(b);
    500                         commonSecondaries -= remainder;
    501                         // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
    502                         while (commonSecondaries > 0) { // same as >= SEC_COMMON_MAX_COUNT
    503                             secondaries.appendByte(SEC_COMMON_MIDDLE);
    504                             commonSecondaries -= SEC_COMMON_MAX_COUNT;
    505                         }
    506                         // commonSecondaries == 0
    507                     }
    508                     if (0 < p && p <= Collation.MERGE_SEPARATOR_PRIMARY) {
    509                         // The backwards secondary level compares secondary weights backwards
    510                         // within segments separated by the merge separator (U+FFFE).
    511                         byte[] secs = secondaries.data();
    512                         int last = secondaries.length() - 1;
    513                         while (secSegmentStart < last) {
    514                             byte b = secs[secSegmentStart];
    515                             secs[secSegmentStart++] = secs[last];
    516                             secs[last--] = b;
    517                         }
    518                         secondaries.appendByte(p == Collation.NO_CE_PRIMARY ?
    519                             Collation.LEVEL_SEPARATOR_BYTE : Collation.MERGE_SEPARATOR_BYTE);
    520                         prevSecondary = 0;
    521                         secSegmentStart = secondaries.length();
    522                     } else {
    523                         secondaries.appendReverseWeight16(s);
    524                         prevSecondary = s;
    525                     }
    526                 }
    527             }
    528 
    529             if ((levels & Collation.CASE_LEVEL_FLAG) != 0) {
    530                 if ((CollationSettings.getStrength(options) == Collator.PRIMARY) ? p == 0
    531                         : (lower32 >>> 16) == 0) {
    532                     // Primary+caseLevel: Ignore case level weights of primary ignorables.
    533                     // Otherwise: Ignore case level weights of secondary ignorables.
    534                     // For details see the comments in the CollationCompare class.
    535                 } else {
    536                     int c = (lower32 >>> 8) & 0xff; // case bits & tertiary lead byte
    537                     assert ((c & 0xc0) != 0xc0);
    538                     if ((c & 0xc0) == 0 && c > Collation.LEVEL_SEPARATOR_BYTE) {
    539                         ++commonCases;
    540                     } else {
    541                         if ((options & CollationSettings.UPPER_FIRST) == 0) {
    542                             // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14,
    543                             // upper=15.
    544                             // If there are only common (=lowest) weights in the whole level,
    545                             // then we need not write anything.
    546                             // Level length differences are handled already on the next-higher level.
    547                             if (commonCases != 0 &&
    548                                     (c > Collation.LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
    549                                 --commonCases;
    550                                 while (commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
    551                                     cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
    552                                     commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
    553                                 }
    554                                 int b;
    555                                 if (c <= Collation.LEVEL_SEPARATOR_BYTE) {
    556                                     b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
    557                                 } else {
    558                                     b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
    559                                 }
    560                                 cases.appendByte(b << 4);
    561                                 commonCases = 0;
    562                             }
    563                             if (c > Collation.LEVEL_SEPARATOR_BYTE) {
    564                                 c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >>> 6)) << 4; // 14 or 15
    565                             }
    566                         } else {
    567                             // upperFirst: Compress common weights to nibbles 3..15, mixed=2,
    568                             // upper=1.
    569                             // The compressed common case weights only go up from the "low" value
    570                             // because with upperFirst the common weight is the highest one.
    571                             if (commonCases != 0) {
    572                                 --commonCases;
    573                                 while (commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
    574                                     cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
    575                                     commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
    576                                 }
    577                                 cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
    578                                 commonCases = 0;
    579                             }
    580                             if (c > Collation.LEVEL_SEPARATOR_BYTE) {
    581                                 c = (CASE_UPPER_FIRST_COMMON_LOW - (c >>> 6)) << 4; // 2 or 1
    582                             }
    583                         }
    584                         // c is a separator byte 01,
    585                         // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
    586                         cases.appendByte(c);
    587                     }
    588                 }
    589             }
    590 
    591             if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) {
    592                 int t = lower32 & tertiaryMask;
    593                 assert ((lower32 & 0xc000) != 0xc000);
    594                 if (t == Collation.COMMON_WEIGHT16) {
    595                     ++commonTertiaries;
    596                 } else if ((tertiaryMask & 0x8000) == 0) {
    597                     // Tertiary weights without case bits.
    598                     // Move lead bytes 06..3F to C6..FF for a large common-weight range.
    599                     if (commonTertiaries != 0) {
    600                         --commonTertiaries;
    601                         while (commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
    602                             tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
    603                             commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
    604                         }
    605                         int b;
    606                         if (t < Collation.COMMON_WEIGHT16) {
    607                             b = TER_ONLY_COMMON_LOW + commonTertiaries;
    608                         } else {
    609                             b = TER_ONLY_COMMON_HIGH - commonTertiaries;
    610                         }
    611                         tertiaries.appendByte(b);
    612                         commonTertiaries = 0;
    613                     }
    614                     if (t > Collation.COMMON_WEIGHT16) {
    615                         t += 0xc000;
    616                     }
    617                     tertiaries.appendWeight16(t);
    618                 } else if ((options & CollationSettings.UPPER_FIRST) == 0) {
    619                     // Tertiary weights with caseFirst=lowerFirst.
    620                     // Move lead bytes 06..BF to 46..FF for the common-weight range.
    621                     if (commonTertiaries != 0) {
    622                         --commonTertiaries;
    623                         while (commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
    624                             tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
    625                             commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
    626                         }
    627                         int b;
    628                         if (t < Collation.COMMON_WEIGHT16) {
    629                             b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
    630                         } else {
    631                             b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
    632                         }
    633                         tertiaries.appendByte(b);
    634                         commonTertiaries = 0;
    635                     }
    636                     if (t > Collation.COMMON_WEIGHT16) {
    637                         t += 0x4000;
    638                     }
    639                     tertiaries.appendWeight16(t);
    640                 } else {
    641                     // Tertiary weights with caseFirst=upperFirst.
    642                     // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
    643                     // to keep tertiary CEs well-formed.
    644                     // Their case+tertiary weights must be greater than those of
    645                     // primary and secondary CEs.
    646                     //
    647                     // Separator         01 -> 01      (unchanged)
    648                     // Lowercase     02..04 -> 82..84  (includes uncased)
    649                     // Common weight     05 -> 85..C5  (common-weight compression range)
    650                     // Lowercase     06..3F -> C6..FF
    651                     // Mixed case    42..7F -> 42..7F
    652                     // Uppercase     82..BF -> 02..3F
    653                     // Tertiary CE   86..BF -> C6..FF
    654                     if (t <= Collation.NO_CE_WEIGHT16) {
    655                         // Keep separators unchanged.
    656                     } else if ((lower32 >>> 16) != 0) {
    657                         // Invert case bits of primary & secondary CEs.
    658                         t ^= 0xc000;
    659                         if (t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
    660                             t -= 0x4000;
    661                         }
    662                     } else {
    663                         // Keep uppercase bits of tertiary CEs.
    664                         assert (0x8600 <= t && t <= 0xbfff);
    665                         t += 0x4000;
    666                     }
    667                     if (commonTertiaries != 0) {
    668                         --commonTertiaries;
    669                         while (commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
    670                             tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
    671                             commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
    672                         }
    673                         int b;
    674                         if (t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
    675                             b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
    676                         } else {
    677                             b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
    678                         }
    679                         tertiaries.appendByte(b);
    680                         commonTertiaries = 0;
    681                     }
    682                     tertiaries.appendWeight16(t);
    683                 }
    684             }
    685 
    686             if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
    687                 int q = lower32 & 0xffff;
    688                 if ((q & 0xc0) == 0 && q > Collation.NO_CE_WEIGHT16) {
    689                     ++commonQuaternaries;
    690                 } else if (q == Collation.NO_CE_WEIGHT16
    691                         && (options & CollationSettings.ALTERNATE_MASK) == 0
    692                         && quaternaries.isEmpty()) {
    693                     // If alternate=non-ignorable and there are only common quaternary weights,
    694                     // then we need not write anything.
    695                     // The only weights greater than the merge separator and less than the common
    696                     // weight
    697                     // are shifted primary weights, which are not generated for
    698                     // alternate=non-ignorable.
    699                     // There are also exactly as many quaternary weights as tertiary weights,
    700                     // so level length differences are handled already on tertiary level.
    701                     // Any above-common quaternary weight will compare greater regardless.
    702                     quaternaries.appendByte(Collation.LEVEL_SEPARATOR_BYTE);
    703                 } else {
    704                     if (q == Collation.NO_CE_WEIGHT16) {
    705                         q = Collation.LEVEL_SEPARATOR_BYTE;
    706                     } else {
    707                         q = 0xfc + ((q >>> 6) & 3);
    708                     }
    709                     if (commonQuaternaries != 0) {
    710                         --commonQuaternaries;
    711                         while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
    712                             quaternaries.appendByte(QUAT_COMMON_MIDDLE);
    713                             commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
    714                         }
    715                         int b;
    716                         if (q < QUAT_COMMON_LOW) {
    717                             b = QUAT_COMMON_LOW + commonQuaternaries;
    718                         } else {
    719                             b = QUAT_COMMON_HIGH - commonQuaternaries;
    720                         }
    721                         quaternaries.appendByte(b);
    722                         commonQuaternaries = 0;
    723                     }
    724                     quaternaries.appendByte(q);
    725                 }
    726             }
    727 
    728             if ((lower32 >>> 24) == Collation.LEVEL_SEPARATOR_BYTE) {
    729                 break;
    730             } // ce == NO_CE
    731         }
    732 
    733         // Append the beyond-primary levels.
    734         // not used in Java -- boolean ok = true;
    735         if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) {
    736             if (!callback.needToWrite(Collation.SECONDARY_LEVEL)) {
    737                 return;
    738             }
    739             // not used in Java -- ok &= secondaries.isOk();
    740             sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
    741             secondaries.appendTo(sink);
    742         }
    743 
    744         if ((levels & Collation.CASE_LEVEL_FLAG) != 0) {
    745             if (!callback.needToWrite(Collation.CASE_LEVEL)) {
    746                 return;
    747             }
    748             // not used in Java -- ok &= cases.isOk();
    749             sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
    750             // Write pairs of nibbles as bytes, except separator bytes as themselves.
    751             int length = cases.length() - 1; // Ignore the trailing NO_CE.
    752             byte b = 0;
    753             for (int i = 0; i < length; ++i) {
    754                 byte c = cases.getAt(i);
    755                 assert ((c & 0xf) == 0 && c != 0);
    756                 if (b == 0) {
    757                     b = c;
    758                 } else {
    759                     sink.Append(b | ((c >> 4) & 0xf));
    760                     b = 0;
    761                 }
    762             }
    763             if (b != 0) {
    764                 sink.Append(b);
    765             }
    766         }
    767 
    768         if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) {
    769             if (!callback.needToWrite(Collation.TERTIARY_LEVEL)) {
    770                 return;
    771             }
    772             // not used in Java -- ok &= tertiaries.isOk();
    773             sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
    774             tertiaries.appendTo(sink);
    775         }
    776 
    777         if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
    778             if (!callback.needToWrite(Collation.QUATERNARY_LEVEL)) {
    779                 return;
    780             }
    781             // not used in Java -- ok &= quaternaries.isOk();
    782             sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
    783             quaternaries.appendTo(sink);
    784         }
    785 
    786         // not used in Java -- if (!ok || !sink.IsOk()) {
    787         // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in
    788         // C implementation. IsOk() in Java always returns true, so this
    789         // is a dead code.
    790     }
    791 }
    792