Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2012-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * collationkeys.h
      7 *
      8 * created on: 2012sep02
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #ifndef __COLLATIONKEYS_H__
     13 #define __COLLATIONKEYS_H__
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_COLLATION
     18 
     19 #include "unicode/bytestream.h"
     20 #include "unicode/ucol.h"
     21 #include "charstr.h"
     22 #include "collation.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 class CollationIterator;
     27 struct CollationDataReader;
     28 struct CollationSettings;
     29 
     30 class SortKeyByteSink : public ByteSink {
     31 public:
     32     SortKeyByteSink(char *dest, int32_t destCapacity)
     33             : buffer_(dest), capacity_(destCapacity),
     34               appended_(0), ignore_(0) {}
     35     virtual ~SortKeyByteSink();
     36 
     37     void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
     38 
     39     virtual void Append(const char *bytes, int32_t n);
     40     void Append(uint32_t b) {
     41         if (ignore_ > 0) {
     42             --ignore_;
     43         } else {
     44             if (appended_ < capacity_ || Resize(1, appended_)) {
     45                 buffer_[appended_] = (char)b;
     46             }
     47             ++appended_;
     48         }
     49     }
     50     virtual char *GetAppendBuffer(int32_t min_capacity,
     51                                   int32_t desired_capacity_hint,
     52                                   char *scratch, int32_t scratch_capacity,
     53                                   int32_t *result_capacity);
     54     int32_t NumberOfBytesAppended() const { return appended_; }
     55 
     56     /**
     57      * @return how many bytes can be appended (including ignored ones)
     58      *         without reallocation
     59      */
     60     int32_t GetRemainingCapacity() const {
     61         // Either ignore_ or appended_ should be 0.
     62         return ignore_ + capacity_ - appended_;
     63     }
     64 
     65     UBool Overflowed() const { return appended_ > capacity_; }
     66     /** @return FALSE if memory allocation failed */
     67     UBool IsOk() const { return buffer_ != NULL; }
     68 
     69 protected:
     70     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
     71     virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
     72 
     73     void SetNotOk() {
     74         buffer_ = NULL;
     75         capacity_ = 0;
     76     }
     77 
     78     char *buffer_;
     79     int32_t capacity_;
     80     int32_t appended_;
     81     int32_t ignore_;
     82 
     83 private:
     84     SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
     85     SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
     86 };
     87 
     88 class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
     89 public:
     90     class LevelCallback : public UMemory {
     91     public:
     92         virtual ~LevelCallback();
     93         /**
     94          * @param level The next level about to be written to the ByteSink.
     95          * @return TRUE if the level is to be written
     96          *         (the base class implementation always returns TRUE)
     97          */
     98         virtual UBool needToWrite(Collation::Level level);
     99     };
    100 
    101     /**
    102      * Writes the sort key bytes for minLevel up to the iterator data's strength.
    103      * Optionally writes the case level.
    104      * Stops writing levels when callback.needToWrite(level) returns FALSE.
    105      * Separates levels with the LEVEL_SEPARATOR_BYTE
    106      * but does not write a TERMINATOR_BYTE.
    107      */
    108     static void writeSortKeyUpToQuaternary(CollationIterator &iter,
    109                                            const UBool *compressibleBytes,
    110                                            const CollationSettings &settings,
    111                                            SortKeyByteSink &sink,
    112                                            Collation::Level minLevel, LevelCallback &callback,
    113                                            UBool preflight, UErrorCode &errorCode);
    114 private:
    115     friend struct CollationDataReader;
    116 
    117     CollationKeys();  // no instantiation
    118 
    119     // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
    120     static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
    121     static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
    122     static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
    123     static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
    124 
    125     // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
    126     static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
    127     static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
    128     static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
    129     static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
    130 
    131     // Case level, upperFirst: Compress up to 13 common weights as 3..15.
    132     static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
    133     static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
    134     static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
    135 
    136     // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
    137     static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
    138     static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
    139     static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
    140     static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
    141 
    142     // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
    143     static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
    144     static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
    145     static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
    146     static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
    147 
    148     // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
    149     static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
    150     static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
    151     static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
    152     static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
    153 
    154     // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
    155     static const uint32_t QUAT_COMMON_LOW = 0x1c;
    156     static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
    157     static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
    158     static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
    159     // Primary weights shifted to quaternary level must be encoded with
    160     // a lead byte below the common-weight compression range.
    161     static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1;  // 0x1b
    162 };
    163 
    164 U_NAMESPACE_END
    165 
    166 #endif  // !UCONFIG_NO_COLLATION
    167 #endif  // __COLLATIONKEYS_H__
    168