Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2012-2015, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * collationkeys.cpp
      7 *
      8 * created on: 2012sep02
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION
     15 
     16 #include "unicode/bytestream.h"
     17 #include "collation.h"
     18 #include "collationiterator.h"
     19 #include "collationkeys.h"
     20 #include "collationsettings.h"
     21 #include "uassert.h"
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 SortKeyByteSink::~SortKeyByteSink() {}
     26 
     27 void
     28 SortKeyByteSink::Append(const char *bytes, int32_t n) {
     29     if (n <= 0 || bytes == NULL) {
     30         return;
     31     }
     32     if (ignore_ > 0) {
     33         int32_t ignoreRest = ignore_ - n;
     34         if (ignoreRest >= 0) {
     35             ignore_ = ignoreRest;
     36             return;
     37         } else {
     38             bytes += ignore_;
     39             n = -ignoreRest;
     40             ignore_ = 0;
     41         }
     42     }
     43     int32_t length = appended_;
     44     appended_ += n;
     45     if ((buffer_ + length) == bytes) {
     46         return;  // the caller used GetAppendBuffer() and wrote the bytes already
     47     }
     48     int32_t available = capacity_ - length;
     49     if (n <= available) {
     50         uprv_memcpy(buffer_ + length, bytes, n);
     51     } else {
     52         AppendBeyondCapacity(bytes, n, length);
     53     }
     54 }
     55 
     56 char *
     57 SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
     58                                  int32_t desired_capacity_hint,
     59                                  char *scratch,
     60                                  int32_t scratch_capacity,
     61                                  int32_t *result_capacity) {
     62     if (min_capacity < 1 || scratch_capacity < min_capacity) {
     63         *result_capacity = 0;
     64         return NULL;
     65     }
     66     if (ignore_ > 0) {
     67         // Do not write ignored bytes right at the end of the buffer.
     68         *result_capacity = scratch_capacity;
     69         return scratch;
     70     }
     71     int32_t available = capacity_ - appended_;
     72     if (available >= min_capacity) {
     73         *result_capacity = available;
     74         return buffer_ + appended_;
     75     } else if (Resize(desired_capacity_hint, appended_)) {
     76         *result_capacity = capacity_ - appended_;
     77         return buffer_ + appended_;
     78     } else {
     79         *result_capacity = scratch_capacity;
     80         return scratch;
     81     }
     82 }
     83 
     84 namespace {
     85 
     86 /**
     87  * uint8_t byte buffer, similar to CharString but simpler.
     88  */
     89 class SortKeyLevel : public UMemory {
     90 public:
     91     SortKeyLevel() : len(0), ok(TRUE) {}
     92     ~SortKeyLevel() {}
     93 
     94     /** @return FALSE if memory allocation failed */
     95     UBool isOk() const { return ok; }
     96     UBool isEmpty() const { return len == 0; }
     97     int32_t length() const { return len; }
     98     const uint8_t *data() const { return buffer.getAlias(); }
     99     uint8_t operator[](int32_t index) const { return buffer[index]; }
    100 
    101     uint8_t *data() { return buffer.getAlias(); }
    102 
    103     void appendByte(uint32_t b);
    104     void appendWeight16(uint32_t w);
    105     void appendWeight32(uint32_t w);
    106     void appendReverseWeight16(uint32_t w);
    107 
    108     /** Appends all but the last byte to the sink. The last byte should be the 01 terminator. */
    109     void appendTo(ByteSink &sink) const {
    110         U_ASSERT(len > 0 && buffer[len - 1] == 1);
    111         sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len - 1);
    112     }
    113 
    114 private:
    115     MaybeStackArray<uint8_t, 40> buffer;
    116     int32_t len;
    117     UBool ok;
    118 
    119     UBool ensureCapacity(int32_t appendCapacity);
    120 
    121     SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class
    122     SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class
    123 };
    124 
    125 void SortKeyLevel::appendByte(uint32_t b) {
    126     if(len < buffer.getCapacity() || ensureCapacity(1)) {
    127         buffer[len++] = (uint8_t)b;
    128     }
    129 }
    130 
    131 void
    132 SortKeyLevel::appendWeight16(uint32_t w) {
    133     U_ASSERT((w & 0xffff) != 0);
    134     uint8_t b0 = (uint8_t)(w >> 8);
    135     uint8_t b1 = (uint8_t)w;
    136     int32_t appendLength = (b1 == 0) ? 1 : 2;
    137     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
    138         buffer[len++] = b0;
    139         if(b1 != 0) {
    140             buffer[len++] = b1;
    141         }
    142     }
    143 }
    144 
    145 void
    146 SortKeyLevel::appendWeight32(uint32_t w) {
    147     U_ASSERT(w != 0);
    148     uint8_t bytes[4] = { (uint8_t)(w >> 24), (uint8_t)(w >> 16), (uint8_t)(w >> 8), (uint8_t)w };
    149     int32_t appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
    150     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
    151         buffer[len++] = bytes[0];
    152         if(bytes[1] != 0) {
    153             buffer[len++] = bytes[1];
    154             if(bytes[2] != 0) {
    155                 buffer[len++] = bytes[2];
    156                 if(bytes[3] != 0) {
    157                     buffer[len++] = bytes[3];
    158                 }
    159             }
    160         }
    161     }
    162 }
    163 
    164 void
    165 SortKeyLevel::appendReverseWeight16(uint32_t w) {
    166     U_ASSERT((w & 0xffff) != 0);
    167     uint8_t b0 = (uint8_t)(w >> 8);
    168     uint8_t b1 = (uint8_t)w;
    169     int32_t appendLength = (b1 == 0) ? 1 : 2;
    170     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
    171         if(b1 == 0) {
    172             buffer[len++] = b0;
    173         } else {
    174             buffer[len] = b1;
    175             buffer[len + 1] = b0;
    176             len += 2;
    177         }
    178     }
    179 }
    180 
    181 UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) {
    182     if(!ok) {
    183         return FALSE;
    184     }
    185     int32_t newCapacity = 2 * buffer.getCapacity();
    186     int32_t altCapacity = len + 2 * appendCapacity;
    187     if (newCapacity < altCapacity) {
    188         newCapacity = altCapacity;
    189     }
    190     if (newCapacity < 200) {
    191         newCapacity = 200;
    192     }
    193     if(buffer.resize(newCapacity, len)==NULL) {
    194         return ok = FALSE;
    195     }
    196     return TRUE;
    197 }
    198 
    199 }  // namespace
    200 
    201 CollationKeys::LevelCallback::~LevelCallback() {}
    202 
    203 UBool
    204 CollationKeys::LevelCallback::needToWrite(Collation::Level /*level*/) { return TRUE; }
    205 
    206 /**
    207  * Map from collation strength (UColAttributeValue)
    208  * to a mask of Collation::Level bits up to that strength,
    209  * excluding the CASE_LEVEL which is independent of the strength,
    210  * and excluding IDENTICAL_LEVEL which this function does not write.
    211  */
    212 static const uint32_t levelMasks[UCOL_STRENGTH_LIMIT] = {
    213     2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
    214     6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
    215     0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
    216     0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
    217     0, 0, 0, 0,
    218     0, 0, 0, 0,
    219     0, 0, 0,
    220     0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
    221 };
    222 
    223 void
    224 CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
    225                                           const UBool *compressibleBytes,
    226                                           const CollationSettings &settings,
    227                                           SortKeyByteSink &sink,
    228                                           Collation::Level minLevel, LevelCallback &callback,
    229                                           UBool preflight, UErrorCode &errorCode) {
    230     if(U_FAILURE(errorCode)) { return; }
    231 
    232     int32_t options = settings.options;
    233     // Set of levels to process and write.
    234     uint32_t levels = levelMasks[CollationSettings::getStrength(options)];
    235     if((options & CollationSettings::CASE_LEVEL) != 0) {
    236         levels |= Collation::CASE_LEVEL_FLAG;
    237     }
    238     // Minus the levels below minLevel.
    239     levels &= ~(((uint32_t)1 << minLevel) - 1);
    240     if(levels == 0) { return; }
    241 
    242     uint32_t variableTop;
    243     if((options & CollationSettings::ALTERNATE_MASK) == 0) {
    244         variableTop = 0;
    245     } else {
    246         // +1 so that we can use "<" and primary ignorables test out early.
    247         variableTop = settings.variableTop + 1;
    248     }
    249 
    250     uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
    251 
    252     SortKeyLevel cases;
    253     SortKeyLevel secondaries;
    254     SortKeyLevel tertiaries;
    255     SortKeyLevel quaternaries;
    256 
    257     uint32_t prevReorderedPrimary = 0;  // 0==no compression
    258     int32_t commonCases = 0;
    259     int32_t commonSecondaries = 0;
    260     int32_t commonTertiaries = 0;
    261     int32_t commonQuaternaries = 0;
    262 
    263     uint32_t prevSecondary = 0;
    264     int32_t secSegmentStart = 0;
    265 
    266     for(;;) {
    267         // No need to keep all CEs in the buffer when we write a sort key.
    268         iter.clearCEsIfNoneRemaining();
    269         int64_t ce = iter.nextCE(errorCode);
    270         uint32_t p = (uint32_t)(ce >> 32);
    271         if(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY) {
    272             // Variable CE, shift it to quaternary level.
    273             // Ignore all following primary ignorables, and shift further variable CEs.
    274             if(commonQuaternaries != 0) {
    275                 --commonQuaternaries;
    276                 while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
    277                     quaternaries.appendByte(QUAT_COMMON_MIDDLE);
    278                     commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
    279                 }
    280                 // Shifted primary weights are lower than the common weight.
    281                 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
    282                 commonQuaternaries = 0;
    283             }
    284             do {
    285                 if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
    286                     if(settings.hasReordering()) {
    287                         p = settings.reorder(p);
    288                     }
    289                     if((p >> 24) >= QUAT_SHIFTED_LIMIT_BYTE) {
    290                         // Prevent shifted primary lead bytes from
    291                         // overlapping with the common compression range.
    292                         quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
    293                     }
    294                     quaternaries.appendWeight32(p);
    295                 }
    296                 do {
    297                     ce = iter.nextCE(errorCode);
    298                     p = (uint32_t)(ce >> 32);
    299                 } while(p == 0);
    300             } while(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY);
    301         }
    302         // ce could be primary ignorable, or NO_CE, or the merge separator,
    303         // or a regular primary CE, but it is not variable.
    304         // If ce==NO_CE, then write nothing for the primary level but
    305         // terminate compression on all levels and then exit the loop.
    306         if(p > Collation::NO_CE_PRIMARY && (levels & Collation::PRIMARY_LEVEL_FLAG) != 0) {
    307             // Test the un-reordered primary for compressibility.
    308             UBool isCompressible = compressibleBytes[p >> 24];
    309             if(settings.hasReordering()) {
    310                 p = settings.reorder(p);
    311             }
    312             uint32_t p1 = p >> 24;
    313             if(!isCompressible || p1 != (prevReorderedPrimary >> 24)) {
    314                 if(prevReorderedPrimary != 0) {
    315                     if(p < prevReorderedPrimary) {
    316                         // No primary compression terminator
    317                         // at the end of the level or merged segment.
    318                         if(p1 > Collation::MERGE_SEPARATOR_BYTE) {
    319                             sink.Append(Collation::PRIMARY_COMPRESSION_LOW_BYTE);
    320                         }
    321                     } else {
    322                         sink.Append(Collation::PRIMARY_COMPRESSION_HIGH_BYTE);
    323                     }
    324                 }
    325                 sink.Append(p1);
    326                 if(isCompressible) {
    327                     prevReorderedPrimary = p;
    328                 } else {
    329                     prevReorderedPrimary = 0;
    330                 }
    331             }
    332             char p2 = (char)(p >> 16);
    333             if(p2 != 0) {
    334                 char buffer[3] = { p2, (char)(p >> 8), (char)p };
    335                 sink.Append(buffer, (buffer[1] == 0) ? 1 : (buffer[2] == 0) ? 2 : 3);
    336             }
    337             // Optimization for internalNextSortKeyPart():
    338             // When the primary level overflows we can stop because we need not
    339             // calculate (preflight) the whole sort key length.
    340             if(!preflight && sink.Overflowed()) {
    341                 if(U_SUCCESS(errorCode) && !sink.IsOk()) {
    342                     errorCode = U_MEMORY_ALLOCATION_ERROR;
    343                 }
    344                 return;
    345             }
    346         }
    347 
    348         uint32_t lower32 = (uint32_t)ce;
    349         if(lower32 == 0) { continue; }  // completely ignorable, no secondary/case/tertiary/quaternary
    350 
    351         if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
    352             uint32_t s = lower32 >> 16;
    353             if(s == 0) {
    354                 // secondary ignorable
    355             } else if(s == Collation::COMMON_WEIGHT16 &&
    356                     ((options & CollationSettings::BACKWARD_SECONDARY) == 0 ||
    357                         p != Collation::MERGE_SEPARATOR_PRIMARY)) {
    358                 // s is a common secondary weight, and
    359                 // backwards-secondary is off or the ce is not the merge separator.
    360                 ++commonSecondaries;
    361             } else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
    362                 if(commonSecondaries != 0) {
    363                     --commonSecondaries;
    364                     while(commonSecondaries >= SEC_COMMON_MAX_COUNT) {
    365                         secondaries.appendByte(SEC_COMMON_MIDDLE);
    366                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
    367                     }
    368                     uint32_t b;
    369                     if(s < Collation::COMMON_WEIGHT16) {
    370                         b = SEC_COMMON_LOW + commonSecondaries;
    371                     } else {
    372                         b = SEC_COMMON_HIGH - commonSecondaries;
    373                     }
    374                     secondaries.appendByte(b);
    375                     commonSecondaries = 0;
    376                 }
    377                 secondaries.appendWeight16(s);
    378             } else {
    379                 if(commonSecondaries != 0) {
    380                     --commonSecondaries;
    381                     // Append reverse weights. The level will be re-reversed later.
    382                     int32_t remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
    383                     uint32_t b;
    384                     if(prevSecondary < Collation::COMMON_WEIGHT16) {
    385                         b = SEC_COMMON_LOW + remainder;
    386                     } else {
    387                         b = SEC_COMMON_HIGH - remainder;
    388                     }
    389                     secondaries.appendByte(b);
    390                     commonSecondaries -= remainder;
    391                     // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
    392                     while(commonSecondaries > 0) {  // same as >= SEC_COMMON_MAX_COUNT
    393                         secondaries.appendByte(SEC_COMMON_MIDDLE);
    394                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
    395                     }
    396                     // commonSecondaries == 0
    397                 }
    398                 if(0 < p && p <= Collation::MERGE_SEPARATOR_PRIMARY) {
    399                     // The backwards secondary level compares secondary weights backwards
    400                     // within segments separated by the merge separator (U+FFFE).
    401                     uint8_t *secs = secondaries.data();
    402                     int32_t last = secondaries.length() - 1;
    403                     if(secSegmentStart < last) {
    404                         uint8_t *p = secs + secSegmentStart;
    405                         uint8_t *q = secs + last;
    406                         do {
    407                             uint8_t b = *p;
    408                             *p++ = *q;
    409                             *q-- = b;
    410                         } while(p < q);
    411                     }
    412                     secondaries.appendByte(p == Collation::NO_CE_PRIMARY ?
    413                         Collation::LEVEL_SEPARATOR_BYTE : Collation::MERGE_SEPARATOR_BYTE);
    414                     prevSecondary = 0;
    415                     secSegmentStart = secondaries.length();
    416                 } else {
    417                     secondaries.appendReverseWeight16(s);
    418                     prevSecondary = s;
    419                 }
    420             }
    421         }
    422 
    423         if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
    424             if((CollationSettings::getStrength(options) == UCOL_PRIMARY) ?
    425                     p == 0 : lower32 <= 0xffff) {
    426                 // Primary+caseLevel: Ignore case level weights of primary ignorables.
    427                 // Otherwise: Ignore case level weights of secondary ignorables.
    428                 // For details see the comments in the CollationCompare class.
    429             } else {
    430                 uint32_t c = (lower32 >> 8) & 0xff;  // case bits & tertiary lead byte
    431                 U_ASSERT((c & 0xc0) != 0xc0);
    432                 if((c & 0xc0) == 0 && c > Collation::LEVEL_SEPARATOR_BYTE) {
    433                     ++commonCases;
    434                 } else {
    435                     if((options & CollationSettings::UPPER_FIRST) == 0) {
    436                         // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
    437                         // If there are only common (=lowest) weights in the whole level,
    438                         // then we need not write anything.
    439                         // Level length differences are handled already on the next-higher level.
    440                         if(commonCases != 0 &&
    441                                 (c > Collation::LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
    442                             --commonCases;
    443                             while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
    444                                 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
    445                                 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
    446                             }
    447                             uint32_t b;
    448                             if(c <= Collation::LEVEL_SEPARATOR_BYTE) {
    449                                 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
    450                             } else {
    451                                 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
    452                             }
    453                             cases.appendByte(b << 4);
    454                             commonCases = 0;
    455                         }
    456                         if(c > Collation::LEVEL_SEPARATOR_BYTE) {
    457                             c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4;  // 14 or 15
    458                         }
    459                     } else {
    460                         // upperFirst: Compress common weights to nibbles 3..15, mixed=2, upper=1.
    461                         // The compressed common case weights only go up from the "low" value
    462                         // because with upperFirst the common weight is the highest one.
    463                         if(commonCases != 0) {
    464                             --commonCases;
    465                             while(commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
    466                                 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
    467                                 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
    468                             }
    469                             cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
    470                             commonCases = 0;
    471                         }
    472                         if(c > Collation::LEVEL_SEPARATOR_BYTE) {
    473                             c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4;  // 2 or 1
    474                         }
    475                     }
    476                     // c is a separator byte 01,
    477                     // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
    478                     cases.appendByte(c);
    479                 }
    480             }
    481         }
    482 
    483         if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
    484             uint32_t t = lower32 & tertiaryMask;
    485             U_ASSERT((lower32 & 0xc000) != 0xc000);
    486             if(t == Collation::COMMON_WEIGHT16) {
    487                 ++commonTertiaries;
    488             } else if((tertiaryMask & 0x8000) == 0) {
    489                 // Tertiary weights without case bits.
    490                 // Move lead bytes 06..3F to C6..FF for a large common-weight range.
    491                 if(commonTertiaries != 0) {
    492                     --commonTertiaries;
    493                     while(commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
    494                         tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
    495                         commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
    496                     }
    497                     uint32_t b;
    498                     if(t < Collation::COMMON_WEIGHT16) {
    499                         b = TER_ONLY_COMMON_LOW + commonTertiaries;
    500                     } else {
    501                         b = TER_ONLY_COMMON_HIGH - commonTertiaries;
    502                     }
    503                     tertiaries.appendByte(b);
    504                     commonTertiaries = 0;
    505                 }
    506                 if(t > Collation::COMMON_WEIGHT16) { t += 0xc000; }
    507                 tertiaries.appendWeight16(t);
    508             } else if((options & CollationSettings::UPPER_FIRST) == 0) {
    509                 // Tertiary weights with caseFirst=lowerFirst.
    510                 // Move lead bytes 06..BF to 46..FF for the common-weight range.
    511                 if(commonTertiaries != 0) {
    512                     --commonTertiaries;
    513                     while(commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
    514                         tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
    515                         commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
    516                     }
    517                     uint32_t b;
    518                     if(t < Collation::COMMON_WEIGHT16) {
    519                         b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
    520                     } else {
    521                         b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
    522                     }
    523                     tertiaries.appendByte(b);
    524                     commonTertiaries = 0;
    525                 }
    526                 if(t > Collation::COMMON_WEIGHT16) { t += 0x4000; }
    527                 tertiaries.appendWeight16(t);
    528             } else {
    529                 // Tertiary weights with caseFirst=upperFirst.
    530                 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
    531                 // to keep tertiary CEs well-formed.
    532                 // Their case+tertiary weights must be greater than those of
    533                 // primary and secondary CEs.
    534                 //
    535                 // Separator         01 -> 01      (unchanged)
    536                 // Lowercase     02..04 -> 82..84  (includes uncased)
    537                 // Common weight     05 -> 85..C5  (common-weight compression range)
    538                 // Lowercase     06..3F -> C6..FF
    539                 // Mixed case    42..7F -> 42..7F
    540                 // Uppercase     82..BF -> 02..3F
    541                 // Tertiary CE   86..BF -> C6..FF
    542                 if(t <= Collation::NO_CE_WEIGHT16) {
    543                     // Keep separators unchanged.
    544                 } else if(lower32 > 0xffff) {
    545                     // Invert case bits of primary & secondary CEs.
    546                     t ^= 0xc000;
    547                     if(t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
    548                         t -= 0x4000;
    549                     }
    550                 } else {
    551                     // Keep uppercase bits of tertiary CEs.
    552                     U_ASSERT(0x8600 <= t && t <= 0xbfff);
    553                     t += 0x4000;
    554                 }
    555                 if(commonTertiaries != 0) {
    556                     --commonTertiaries;
    557                     while(commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
    558                         tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
    559                         commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
    560                     }
    561                     uint32_t b;
    562                     if(t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
    563                         b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
    564                     } else {
    565                         b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
    566                     }
    567                     tertiaries.appendByte(b);
    568                     commonTertiaries = 0;
    569                 }
    570                 tertiaries.appendWeight16(t);
    571             }
    572         }
    573 
    574         if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
    575             uint32_t q = lower32 & 0xffff;
    576             if((q & 0xc0) == 0 && q > Collation::NO_CE_WEIGHT16) {
    577                 ++commonQuaternaries;
    578             } else if(q == Collation::NO_CE_WEIGHT16 &&
    579                     (options & CollationSettings::ALTERNATE_MASK) == 0 &&
    580                     quaternaries.isEmpty()) {
    581                 // If alternate=non-ignorable and there are only common quaternary weights,
    582                 // then we need not write anything.
    583                 // The only weights greater than the merge separator and less than the common weight
    584                 // are shifted primary weights, which are not generated for alternate=non-ignorable.
    585                 // There are also exactly as many quaternary weights as tertiary weights,
    586                 // so level length differences are handled already on tertiary level.
    587                 // Any above-common quaternary weight will compare greater regardless.
    588                 quaternaries.appendByte(Collation::LEVEL_SEPARATOR_BYTE);
    589             } else {
    590                 if(q == Collation::NO_CE_WEIGHT16) {
    591                     q = Collation::LEVEL_SEPARATOR_BYTE;
    592                 } else {
    593                     q = 0xfc + ((q >> 6) & 3);
    594                 }
    595                 if(commonQuaternaries != 0) {
    596                     --commonQuaternaries;
    597                     while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
    598                         quaternaries.appendByte(QUAT_COMMON_MIDDLE);
    599                         commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
    600                     }
    601                     uint32_t b;
    602                     if(q < QUAT_COMMON_LOW) {
    603                         b = QUAT_COMMON_LOW + commonQuaternaries;
    604                     } else {
    605                         b = QUAT_COMMON_HIGH - commonQuaternaries;
    606                     }
    607                     quaternaries.appendByte(b);
    608                     commonQuaternaries = 0;
    609                 }
    610                 quaternaries.appendByte(q);
    611             }
    612         }
    613 
    614         if((lower32 >> 24) == Collation::LEVEL_SEPARATOR_BYTE) { break; }  // ce == NO_CE
    615     }
    616 
    617     if(U_FAILURE(errorCode)) { return; }
    618 
    619     // Append the beyond-primary levels.
    620     UBool ok = TRUE;
    621     if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
    622         if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
    623         ok &= secondaries.isOk();
    624         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
    625         secondaries.appendTo(sink);
    626     }
    627 
    628     if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
    629         if(!callback.needToWrite(Collation::CASE_LEVEL)) { return; }
    630         ok &= cases.isOk();
    631         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
    632         // Write pairs of nibbles as bytes, except separator bytes as themselves.
    633         int32_t length = cases.length() - 1;  // Ignore the trailing NO_CE.
    634         uint8_t b = 0;
    635         for(int32_t i = 0; i < length; ++i) {
    636             uint8_t c = (uint8_t)cases[i];
    637             U_ASSERT((c & 0xf) == 0 && c != 0);
    638             if(b == 0) {
    639                 b = c;
    640             } else {
    641                 sink.Append(b | (c >> 4));
    642                 b = 0;
    643             }
    644         }
    645         if(b != 0) {
    646             sink.Append(b);
    647         }
    648     }
    649 
    650     if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
    651         if(!callback.needToWrite(Collation::TERTIARY_LEVEL)) { return; }
    652         ok &= tertiaries.isOk();
    653         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
    654         tertiaries.appendTo(sink);
    655     }
    656 
    657     if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
    658         if(!callback.needToWrite(Collation::QUATERNARY_LEVEL)) { return; }
    659         ok &= quaternaries.isOk();
    660         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
    661         quaternaries.appendTo(sink);
    662     }
    663 
    664     if(!ok || !sink.IsOk()) {
    665         errorCode = U_MEMORY_ALLOCATION_ERROR;
    666     }
    667 }
    668 
    669 U_NAMESPACE_END
    670 
    671 #endif  // !UCONFIG_NO_COLLATION
    672