Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2012-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * collationkeys.cpp
      7 *
      8 * created on: 2012sep02
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION
     15 
     16 #include "unicode/bytestream.h"
     17 #include "collation.h"
     18 #include "collationiterator.h"
     19 #include "collationkeys.h"
     20 #include "collationsettings.h"
     21 #include "uassert.h"
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 SortKeyByteSink::~SortKeyByteSink() {}
     26 
     27 void
     28 SortKeyByteSink::Append(const char *bytes, int32_t n) {
     29     if (n <= 0 || bytes == NULL) {
     30         return;
     31     }
     32     if (ignore_ > 0) {
     33         int32_t ignoreRest = ignore_ - n;
     34         if (ignoreRest >= 0) {
     35             ignore_ = ignoreRest;
     36             return;
     37         } else {
     38             bytes += ignore_;
     39             n = -ignoreRest;
     40             ignore_ = 0;
     41         }
     42     }
     43     int32_t length = appended_;
     44     appended_ += n;
     45     if ((buffer_ + length) == bytes) {
     46         return;  // the caller used GetAppendBuffer() and wrote the bytes already
     47     }
     48     int32_t available = capacity_ - length;
     49     if (n <= available) {
     50         uprv_memcpy(buffer_ + length, bytes, n);
     51     } else {
     52         AppendBeyondCapacity(bytes, n, length);
     53     }
     54 }
     55 
     56 char *
     57 SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
     58                                  int32_t desired_capacity_hint,
     59                                  char *scratch,
     60                                  int32_t scratch_capacity,
     61                                  int32_t *result_capacity) {
     62     if (min_capacity < 1 || scratch_capacity < min_capacity) {
     63         *result_capacity = 0;
     64         return NULL;
     65     }
     66     if (ignore_ > 0) {
     67         // Do not write ignored bytes right at the end of the buffer.
     68         *result_capacity = scratch_capacity;
     69         return scratch;
     70     }
     71     int32_t available = capacity_ - appended_;
     72     if (available >= min_capacity) {
     73         *result_capacity = available;
     74         return buffer_ + appended_;
     75     } else if (Resize(desired_capacity_hint, appended_)) {
     76         *result_capacity = capacity_ - appended_;
     77         return buffer_ + appended_;
     78     } else {
     79         *result_capacity = scratch_capacity;
     80         return scratch;
     81     }
     82 }
     83 
     84 namespace {
     85 
     86 /**
     87  * uint8_t byte buffer, similar to CharString but simpler.
     88  */
     89 class SortKeyLevel : public UMemory {
     90 public:
     91     SortKeyLevel() : len(0), ok(TRUE) {}
     92     ~SortKeyLevel() {}
     93 
     94     /** @return FALSE if memory allocation failed */
     95     UBool isOk() const { return ok; }
     96     UBool isEmpty() const { return len == 0; }
     97     int32_t length() const { return len; }
     98     const uint8_t *data() const { return buffer.getAlias(); }
     99     uint8_t operator[](int32_t index) const { return buffer[index]; }
    100 
    101     uint8_t *data() { return buffer.getAlias(); }
    102 
    103     void appendByte(uint32_t b);
    104     void appendWeight16(uint32_t w);
    105     void appendWeight32(uint32_t w);
    106     void appendReverseWeight16(uint32_t w);
    107 
    108     /** Appends all but the last byte to the sink. The last byte should be the 01 terminator. */
    109     void appendTo(ByteSink &sink) const {
    110         U_ASSERT(len > 0 && buffer[len - 1] == 1);
    111         sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len - 1);
    112     }
    113 
    114 private:
    115     MaybeStackArray<uint8_t, 40> buffer;
    116     int32_t len;
    117     UBool ok;
    118 
    119     UBool ensureCapacity(int32_t appendCapacity);
    120 
    121     SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class
    122     SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class
    123 };
    124 
    125 void SortKeyLevel::appendByte(uint32_t b) {
    126     if(len < buffer.getCapacity() || ensureCapacity(1)) {
    127         buffer[len++] = (uint8_t)b;
    128     }
    129 }
    130 
    131 void
    132 SortKeyLevel::appendWeight16(uint32_t w) {
    133     U_ASSERT((w & 0xffff) != 0);
    134     uint8_t b0 = (uint8_t)(w >> 8);
    135     uint8_t b1 = (uint8_t)w;
    136     int32_t appendLength = (b1 == 0) ? 1 : 2;
    137     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
    138         buffer[len++] = b0;
    139         if(b1 != 0) {
    140             buffer[len++] = b1;
    141         }
    142     }
    143 }
    144 
    145 void
    146 SortKeyLevel::appendWeight32(uint32_t w) {
    147     U_ASSERT(w != 0);
    148     uint8_t bytes[4] = { (uint8_t)(w >> 24), (uint8_t)(w >> 16), (uint8_t)(w >> 8), (uint8_t)w };
    149     int32_t appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
    150     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
    151         buffer[len++] = bytes[0];
    152         if(bytes[1] != 0) {
    153             buffer[len++] = bytes[1];
    154             if(bytes[2] != 0) {
    155                 buffer[len++] = bytes[2];
    156                 if(bytes[3] != 0) {
    157                     buffer[len++] = bytes[3];
    158                 }
    159             }
    160         }
    161     }
    162 }
    163 
    164 void
    165 SortKeyLevel::appendReverseWeight16(uint32_t w) {
    166     U_ASSERT((w & 0xffff) != 0);
    167     uint8_t b0 = (uint8_t)(w >> 8);
    168     uint8_t b1 = (uint8_t)w;
    169     int32_t appendLength = (b1 == 0) ? 1 : 2;
    170     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
    171         if(b1 == 0) {
    172             buffer[len++] = b0;
    173         } else {
    174             buffer[len] = b1;
    175             buffer[len + 1] = b0;
    176             len += 2;
    177         }
    178     }
    179 }
    180 
    181 UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) {
    182     if(!ok) {
    183         return FALSE;
    184     }
    185     int32_t newCapacity = 2 * buffer.getCapacity();
    186     int32_t altCapacity = len + 2 * appendCapacity;
    187     if (newCapacity < altCapacity) {
    188         newCapacity = altCapacity;
    189     }
    190     if (newCapacity < 200) {
    191         newCapacity = 200;
    192     }
    193     if(buffer.resize(newCapacity, len)==NULL) {
    194         return ok = FALSE;
    195     }
    196     return TRUE;
    197 }
    198 
    199 }  // namespace
    200 
    201 CollationKeys::LevelCallback::~LevelCallback() {}
    202 
    203 UBool
    204 CollationKeys::LevelCallback::needToWrite(Collation::Level /*level*/) { return TRUE; }
    205 
    206 /**
    207  * Map from collation strength (UColAttributeValue)
    208  * to a mask of Collation::Level bits up to that strength,
    209  * excluding the CASE_LEVEL which is independent of the strength,
    210  * and excluding IDENTICAL_LEVEL which this function does not write.
    211  */
    212 static const uint32_t levelMasks[UCOL_STRENGTH_LIMIT] = {
    213     2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
    214     6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
    215     0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
    216     0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
    217     0, 0, 0, 0,
    218     0, 0, 0, 0,
    219     0, 0, 0,
    220     0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
    221 };
    222 
    223 void
    224 CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
    225                                           const UBool *compressibleBytes,
    226                                           const CollationSettings &settings,
    227                                           SortKeyByteSink &sink,
    228                                           Collation::Level minLevel, LevelCallback &callback,
    229                                           UBool preflight, UErrorCode &errorCode) {
    230     if(U_FAILURE(errorCode)) { return; }
    231 
    232     int32_t options = settings.options;
    233     // Set of levels to process and write.
    234     uint32_t levels = levelMasks[CollationSettings::getStrength(options)];
    235     if((options & CollationSettings::CASE_LEVEL) != 0) {
    236         levels |= Collation::CASE_LEVEL_FLAG;
    237     }
    238     // Minus the levels below minLevel.
    239     levels &= ~(((uint32_t)1 << minLevel) - 1);
    240     if(levels == 0) { return; }
    241 
    242     uint32_t variableTop;
    243     if((options & CollationSettings::ALTERNATE_MASK) == 0) {
    244         variableTop = 0;
    245     } else {
    246         // +1 so that we can use "<" and primary ignorables test out early.
    247         variableTop = settings.variableTop + 1;
    248     }
    249     const uint8_t *reorderTable = settings.reorderTable;
    250 
    251     uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
    252 
    253     SortKeyLevel cases;
    254     SortKeyLevel secondaries;
    255     SortKeyLevel tertiaries;
    256     SortKeyLevel quaternaries;
    257 
    258     uint32_t compressedP1 = 0;  // 0==no compression; otherwise reordered compressible lead byte
    259     int32_t commonCases = 0;
    260     int32_t commonSecondaries = 0;
    261     int32_t commonTertiaries = 0;
    262     int32_t commonQuaternaries = 0;
    263 
    264     uint32_t prevSecondary = 0;
    265     UBool anyMergeSeparators = FALSE;
    266 
    267     for(;;) {
    268         // No need to keep all CEs in the buffer when we write a sort key.
    269         iter.clearCEsIfNoneRemaining();
    270         int64_t ce = iter.nextCE(errorCode);
    271         uint32_t p = (uint32_t)(ce >> 32);
    272         if(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY) {
    273             // Variable CE, shift it to quaternary level.
    274             // Ignore all following primary ignorables, and shift further variable CEs.
    275             if(commonQuaternaries != 0) {
    276                 --commonQuaternaries;
    277                 while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
    278                     quaternaries.appendByte(QUAT_COMMON_MIDDLE);
    279                     commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
    280                 }
    281                 // Shifted primary weights are lower than the common weight.
    282                 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
    283                 commonQuaternaries = 0;
    284             }
    285             do {
    286                 if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
    287                     uint32_t p1 = p >> 24;
    288                     if(reorderTable != NULL) { p1 = reorderTable[p1]; }
    289                     if(p1 >= QUAT_SHIFTED_LIMIT_BYTE) {
    290                         // Prevent shifted primary lead bytes from
    291                         // overlapping with the common compression range.
    292                         quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
    293                     }
    294                     quaternaries.appendWeight32((p1 << 24) | (p & 0xffffff));
    295                 }
    296                 do {
    297                     ce = iter.nextCE(errorCode);
    298                     p = (uint32_t)(ce >> 32);
    299                 } while(p == 0);
    300             } while(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY);
    301         }
    302         // ce could be primary ignorable, or NO_CE, or the merge separator,
    303         // or a regular primary CE, but it is not variable.
    304         // If ce==NO_CE, then write nothing for the primary level but
    305         // terminate compression on all levels and then exit the loop.
    306         if(p > Collation::NO_CE_PRIMARY && (levels & Collation::PRIMARY_LEVEL_FLAG) != 0) {
    307             uint32_t p1 = p >> 24;
    308             if(reorderTable != NULL) { p1 = reorderTable[p1]; }
    309             if(p1 != compressedP1) {
    310                 if(compressedP1 != 0) {
    311                     if(p1 < compressedP1) {
    312                         // No primary compression terminator
    313                         // at the end of the level or merged segment.
    314                         if(p1 > Collation::MERGE_SEPARATOR_BYTE) {
    315                             sink.Append(Collation::PRIMARY_COMPRESSION_LOW_BYTE);
    316                         }
    317                     } else {
    318                         sink.Append(Collation::PRIMARY_COMPRESSION_HIGH_BYTE);
    319                     }
    320                 }
    321                 sink.Append(p1);
    322                 // Test the un-reordered lead byte for compressibility but
    323                 // remember the reordered lead byte.
    324                 if(compressibleBytes[p >> 24]) {
    325                     compressedP1 = p1;
    326                 } else {
    327                     compressedP1 = 0;
    328                 }
    329             }
    330             char p2 = (char)(p >> 16);
    331             if(p2 != 0) {
    332                 char buffer[3] = { p2, (char)(p >> 8), (char)p };
    333                 sink.Append(buffer, (buffer[1] == 0) ? 1 : (buffer[2] == 0) ? 2 : 3);
    334             }
    335             // Optimization for internalNextSortKeyPart():
    336             // When the primary level overflows we can stop because we need not
    337             // calculate (preflight) the whole sort key length.
    338             if(!preflight && sink.Overflowed()) {
    339                 if(U_SUCCESS(errorCode) && !sink.IsOk()) {
    340                     errorCode = U_MEMORY_ALLOCATION_ERROR;
    341                 }
    342                 return;
    343             }
    344         }
    345 
    346         uint32_t lower32 = (uint32_t)ce;
    347         if(lower32 == 0) { continue; }  // completely ignorable, no secondary/case/tertiary/quaternary
    348 
    349         if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
    350             uint32_t s = lower32 >> 16;
    351             if(s == 0) {
    352                 // secondary ignorable
    353             } else if(s == Collation::COMMON_WEIGHT16) {
    354                 ++commonSecondaries;
    355             } else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
    356                 if(commonSecondaries != 0) {
    357                     --commonSecondaries;
    358                     while(commonSecondaries >= SEC_COMMON_MAX_COUNT) {
    359                         secondaries.appendByte(SEC_COMMON_MIDDLE);
    360                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
    361                     }
    362                     uint32_t b;
    363                     if(s < Collation::COMMON_WEIGHT16) {
    364                         b = SEC_COMMON_LOW + commonSecondaries;
    365                     } else {
    366                         b = SEC_COMMON_HIGH - commonSecondaries;
    367                     }
    368                     secondaries.appendByte(b);
    369                     commonSecondaries = 0;
    370                 }
    371                 secondaries.appendWeight16(s);
    372             } else {
    373                 if(commonSecondaries != 0) {
    374                     --commonSecondaries;
    375                     // Append reverse weights. The level will be re-reversed later.
    376                     int32_t remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
    377                     uint32_t b;
    378                     if(prevSecondary < Collation::COMMON_WEIGHT16) {
    379                         b = SEC_COMMON_LOW + remainder;
    380                     } else {
    381                         b = SEC_COMMON_HIGH - remainder;
    382                     }
    383                     secondaries.appendByte(b);
    384                     commonSecondaries -= remainder;
    385                     // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
    386                     while(commonSecondaries > 0) {  // same as >= SEC_COMMON_MAX_COUNT
    387                         secondaries.appendByte(SEC_COMMON_MIDDLE);
    388                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
    389                     }
    390                     // commonSecondaries == 0
    391                 }
    392                 // Reduce separators so that we can look for byte<=1 later.
    393                 if(s <= Collation::MERGE_SEPARATOR_WEIGHT16) {
    394                     if(s == Collation::MERGE_SEPARATOR_WEIGHT16) {
    395                         anyMergeSeparators = TRUE;
    396                     }
    397                     secondaries.appendByte((s >> 8) - 1);
    398                 } else {
    399                     secondaries.appendReverseWeight16(s);
    400                 }
    401                 prevSecondary = s;
    402             }
    403         }
    404 
    405         if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
    406             if((CollationSettings::getStrength(options) == UCOL_PRIMARY) ?
    407                     p == 0 : lower32 <= 0xffff) {
    408                 // Primary+caseLevel: Ignore case level weights of primary ignorables.
    409                 // Otherwise: Ignore case level weights of secondary ignorables.
    410                 // For details see the comments in the CollationCompare class.
    411             } else {
    412                 uint32_t c = (lower32 >> 8) & 0xff;  // case bits & tertiary lead byte
    413                 U_ASSERT((c & 0xc0) != 0xc0);
    414                 if((c & 0xc0) == 0 && c > Collation::MERGE_SEPARATOR_BYTE) {
    415                     ++commonCases;
    416                 } else {
    417                     if((options & CollationSettings::UPPER_FIRST) == 0) {
    418                         // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
    419                         if(commonCases != 0) {
    420                             --commonCases;
    421                             while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
    422                                 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
    423                                 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
    424                             }
    425                             uint32_t b;
    426                             if(c <= Collation::MERGE_SEPARATOR_BYTE) {
    427                                 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
    428                             } else {
    429                                 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
    430                             }
    431                             cases.appendByte(b << 4);
    432                             commonCases = 0;
    433                         }
    434                         if(c > Collation::MERGE_SEPARATOR_BYTE) {
    435                             c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4;  // 14 or 15
    436                         }
    437                     } else {
    438                         // upperFirst: Compress common weights to nibbles 3..15, mixed=2, upper=1.
    439                         // The compressed common case weights only go up from the "low" value
    440                         // because with upperFirst the common weight is the highest one.
    441                         if(commonCases != 0) {
    442                             --commonCases;
    443                             while(commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
    444                                 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
    445                                 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
    446                             }
    447                             cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
    448                             commonCases = 0;
    449                         }
    450                         if(c > Collation::MERGE_SEPARATOR_BYTE) {
    451                             c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4;  // 2 or 1
    452                         }
    453                     }
    454                     // c is a separator byte 01 or 02,
    455                     // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
    456                     cases.appendByte(c);
    457                 }
    458             }
    459         }
    460 
    461         if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
    462             uint32_t t = lower32 & tertiaryMask;
    463             U_ASSERT((lower32 & 0xc000) != 0xc000);
    464             if(t == Collation::COMMON_WEIGHT16) {
    465                 ++commonTertiaries;
    466             } else if((tertiaryMask & 0x8000) == 0) {
    467                 // Tertiary weights without case bits.
    468                 // Move lead bytes 06..3F to C6..FF for a large common-weight range.
    469                 if(commonTertiaries != 0) {
    470                     --commonTertiaries;
    471                     while(commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
    472                         tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
    473                         commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
    474                     }
    475                     uint32_t b;
    476                     if(t < Collation::COMMON_WEIGHT16) {
    477                         b = TER_ONLY_COMMON_LOW + commonTertiaries;
    478                     } else {
    479                         b = TER_ONLY_COMMON_HIGH - commonTertiaries;
    480                     }
    481                     tertiaries.appendByte(b);
    482                     commonTertiaries = 0;
    483                 }
    484                 if(t > Collation::COMMON_WEIGHT16) { t += 0xc000; }
    485                 tertiaries.appendWeight16(t);
    486             } else if((options & CollationSettings::UPPER_FIRST) == 0) {
    487                 // Tertiary weights with caseFirst=lowerFirst.
    488                 // Move lead bytes 06..BF to 46..FF for the common-weight range.
    489                 if(commonTertiaries != 0) {
    490                     --commonTertiaries;
    491                     while(commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
    492                         tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
    493                         commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
    494                     }
    495                     uint32_t b;
    496                     if(t < Collation::COMMON_WEIGHT16) {
    497                         b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
    498                     } else {
    499                         b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
    500                     }
    501                     tertiaries.appendByte(b);
    502                     commonTertiaries = 0;
    503                 }
    504                 if(t > Collation::COMMON_WEIGHT16) { t += 0x4000; }
    505                 tertiaries.appendWeight16(t);
    506             } else {
    507                 // Tertiary weights with caseFirst=upperFirst.
    508                 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
    509                 // to keep tertiary CEs well-formed.
    510                 // Their case+tertiary weights must be greater than those of
    511                 // primary and secondary CEs.
    512                 //
    513                 // Separators    01..02 -> 01..02  (unchanged)
    514                 // Lowercase     03..04 -> 83..84  (includes uncased)
    515                 // Common weight     05 -> 85..C5  (common-weight compression range)
    516                 // Lowercase     06..3F -> C6..FF
    517                 // Mixed case    43..7F -> 43..7F
    518                 // Uppercase     83..BF -> 03..3F
    519                 // Tertiary CE   86..BF -> C6..FF
    520                 if(t <= Collation::MERGE_SEPARATOR_WEIGHT16) {
    521                     // Keep separators unchanged.
    522                 } else if(lower32 > 0xffff) {
    523                     // Invert case bits of primary & secondary CEs.
    524                     t ^= 0xc000;
    525                     if(t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
    526                         t -= 0x4000;
    527                     }
    528                 } else {
    529                     // Keep uppercase bits of tertiary CEs.
    530                     U_ASSERT(0x8600 <= t && t <= 0xbfff);
    531                     t += 0x4000;
    532                 }
    533                 if(commonTertiaries != 0) {
    534                     --commonTertiaries;
    535                     while(commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
    536                         tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
    537                         commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
    538                     }
    539                     uint32_t b;
    540                     if(t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
    541                         b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
    542                     } else {
    543                         b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
    544                     }
    545                     tertiaries.appendByte(b);
    546                     commonTertiaries = 0;
    547                 }
    548                 tertiaries.appendWeight16(t);
    549             }
    550         }
    551 
    552         if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
    553             uint32_t q = lower32 & 0xffff;
    554             if((q & 0xc0) == 0 && q > Collation::MERGE_SEPARATOR_WEIGHT16) {
    555                 ++commonQuaternaries;
    556             } else if(q <= Collation::MERGE_SEPARATOR_WEIGHT16 &&
    557                     (options & CollationSettings::ALTERNATE_MASK) == 0 &&
    558                     (quaternaries.isEmpty() ||
    559                         quaternaries[quaternaries.length() - 1] == Collation::MERGE_SEPARATOR_BYTE)) {
    560                 // If alternate=non-ignorable and there are only
    561                 // common quaternary weights between two separators,
    562                 // then we need not write anything between these separators.
    563                 // The only weights greater than the merge separator and less than the common weight
    564                 // are shifted primary weights, which are not generated for alternate=non-ignorable.
    565                 // There are also exactly as many quaternary weights as tertiary weights,
    566                 // so level length differences are handled already on tertiary level.
    567                 // Any above-common quaternary weight will compare greater regardless.
    568                 quaternaries.appendByte(q >> 8);
    569             } else {
    570                 if(q <= Collation::MERGE_SEPARATOR_WEIGHT16) {
    571                     q >>= 8;
    572                 } else {
    573                     q = 0xfc + ((q >> 6) & 3);
    574                 }
    575                 if(commonQuaternaries != 0) {
    576                     --commonQuaternaries;
    577                     while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
    578                         quaternaries.appendByte(QUAT_COMMON_MIDDLE);
    579                         commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
    580                     }
    581                     uint32_t b;
    582                     if(q < QUAT_COMMON_LOW) {
    583                         b = QUAT_COMMON_LOW + commonQuaternaries;
    584                     } else {
    585                         b = QUAT_COMMON_HIGH - commonQuaternaries;
    586                     }
    587                     quaternaries.appendByte(b);
    588                     commonQuaternaries = 0;
    589                 }
    590                 quaternaries.appendByte(q);
    591             }
    592         }
    593 
    594         if((lower32 >> 24) == Collation::LEVEL_SEPARATOR_BYTE) { break; }  // ce == NO_CE
    595     }
    596 
    597     if(U_FAILURE(errorCode)) { return; }
    598 
    599     // Append the beyond-primary levels.
    600     UBool ok = TRUE;
    601     if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
    602         if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
    603         ok &= secondaries.isOk();
    604         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
    605         uint8_t *secs = secondaries.data();
    606         int32_t length = secondaries.length() - 1;  // Ignore the trailing NO_CE.
    607         if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
    608             // The backwards secondary level compares secondary weights backwards
    609             // within segments separated by the merge separator (U+FFFE, weight 02).
    610             // The separator weights 01 & 02 were reduced to 00 & 01 so that
    611             // we do not accidentally separate at a _second_ weight byte of 02.
    612             int32_t start = 0;
    613             for(;;) {
    614                 // Find the merge separator or the NO_CE terminator.
    615                 int32_t limit;
    616                 if(anyMergeSeparators) {
    617                     limit = start;
    618                     while(secs[limit] > 1) { ++limit; }
    619                 } else {
    620                     limit = length;
    621                 }
    622                 // Reverse this segment.
    623                 if(start < limit) {
    624                     uint8_t *p = secs + start;
    625                     uint8_t *q = secs + limit - 1;
    626                     while(p < q) {
    627                         uint8_t s = *p;
    628                         *p++ = *q;
    629                         *q-- = s;
    630                     }
    631                 }
    632                 // Did we reach the end of the string?
    633                 if(secs[limit] == 0) { break; }
    634                 // Restore the merge separator.
    635                 secs[limit] = 2;
    636                 // Skip the merge separator and continue.
    637                 start = limit + 1;
    638             }
    639         }
    640         sink.Append(reinterpret_cast<char *>(secs), length);
    641     }
    642 
    643     if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
    644         if(!callback.needToWrite(Collation::CASE_LEVEL)) { return; }
    645         ok &= cases.isOk();
    646         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
    647         // Write pairs of nibbles as bytes, except separator bytes as themselves.
    648         int32_t length = cases.length() - 1;  // Ignore the trailing NO_CE.
    649         uint8_t b = 0;
    650         for(int32_t i = 0; i < length; ++i) {
    651             uint8_t c = (uint8_t)cases[i];
    652             if(c <= Collation::MERGE_SEPARATOR_BYTE) {
    653                 U_ASSERT(c != 0);
    654                 if(b != 0) {
    655                     sink.Append(b);
    656                     b = 0;
    657                 }
    658                 sink.Append(c);
    659             } else {
    660                 U_ASSERT((c & 0xf) == 0);
    661                 if(b == 0) {
    662                     b = c;
    663                 } else {
    664                     sink.Append(b | (c >> 4));
    665                     b = 0;
    666                 }
    667             }
    668         }
    669         if(b != 0) {
    670             sink.Append(b);
    671         }
    672     }
    673 
    674     if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
    675         if(!callback.needToWrite(Collation::TERTIARY_LEVEL)) { return; }
    676         ok &= tertiaries.isOk();
    677         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
    678         tertiaries.appendTo(sink);
    679     }
    680 
    681     if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
    682         if(!callback.needToWrite(Collation::QUATERNARY_LEVEL)) { return; }
    683         ok &= quaternaries.isOk();
    684         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
    685         quaternaries.appendTo(sink);
    686     }
    687 
    688     if(!ok || !sink.IsOk()) {
    689         errorCode = U_MEMORY_ALLOCATION_ERROR;
    690     }
    691 }
    692 
    693 U_NAMESPACE_END
    694 
    695 #endif  // !UCONFIG_NO_COLLATION
    696